@@ -87,14 +87,83 @@ pub fn create_blanked_content(content: &str, diff: f64) -> (String, HashMap<usiz
8787 let mut blanked_content = content. chars ( ) . collect :: < Vec < char > > ( ) ;
8888 let mut original_chars = HashMap :: new ( ) ;
8989
90- // Only replace alphanumeric and some special characters
90+ // First identify character positions to protect (comments, function signatures, etc.)
91+ let mut protect_indices = Vec :: new ( ) ;
92+ let lines: Vec < & str > = content. lines ( ) . collect ( ) ;
93+ let mut line_start_indices = Vec :: new ( ) ;
94+
95+ // Calculate the starting index of each line
96+ let mut current_idx = 0 ;
97+ for line in & lines {
98+ line_start_indices. push ( current_idx) ;
99+ current_idx += line. len ( ) + 1 ; // +1 for the newline character
100+ }
101+
102+ // Process each line to identify protected regions
103+ for ( line_idx, line) in lines. iter ( ) . enumerate ( ) {
104+ let line_start = line_start_indices[ line_idx] ;
105+ let trimmed = line. trim ( ) ;
106+
107+ // Protect entire lines for these cases:
108+
109+ // 1. Comments
110+ if trimmed. starts_with ( '#' ) || trimmed. starts_with ( "//" ) || trimmed. starts_with ( "/*" ) {
111+ protect_indices. extend ( line_start..line_start + line. len ( ) ) ;
112+ continue ;
113+ }
114+
115+ // 2. Function/method definitions
116+ if trimmed. starts_with ( "def " ) || trimmed. starts_with ( "function " ) ||
117+ trimmed. starts_with ( "class " ) || trimmed. starts_with ( "struct " ) ||
118+ trimmed. starts_with ( "pub fn " ) || trimmed. starts_with ( "fn " ) {
119+ protect_indices. extend ( line_start..line_start + line. len ( ) ) ;
120+ continue ;
121+ }
122+
123+ // 3. Function signatures with parameters and return types
124+ if ( trimmed. contains ( "(" ) && trimmed. contains ( ")" ) ) &&
125+ ( trimmed. contains ( " -> " ) || trimmed. contains ( ") {" ) || trimmed. contains ( ") =>" ) ) {
126+ protect_indices. extend ( line_start..line_start + line. len ( ) ) ;
127+ continue ;
128+ }
129+
130+ // 4. Parameter declarations with default values
131+ if trimmed. contains ( " = " ) && ( trimmed. contains ( "(" ) || trimmed. contains ( ")" ) || trimmed. contains ( "," ) ) {
132+ protect_indices. extend ( line_start..line_start + line. len ( ) ) ;
133+ continue ;
134+ }
135+
136+ // 5. Import statements/includes
137+ if trimmed. starts_with ( "import " ) || trimmed. starts_with ( "from " ) ||
138+ trimmed. starts_with ( "using " ) || trimmed. starts_with ( "include " ) ||
139+ trimmed. starts_with ( "require " ) || trimmed. starts_with ( "use " ) {
140+ protect_indices. extend ( line_start..line_start + line. len ( ) ) ;
141+ continue ;
142+ }
143+ }
144+
145+ // Only replace alphanumeric and some special characters that aren't protected
91146 let replaceable_indices: Vec < usize > = blanked_content. iter ( ) . enumerate ( )
92- . filter ( |( _, c) | c. is_alphanumeric ( ) || * * c == '=' || * * c == '+' || * * c == '-' || * * c == '*' || * * c == '/' || * * c == '%' )
147+ . filter ( |( i, c) | {
148+ // The character should be replaceable AND not in a protected region
149+ ( c. is_alphanumeric ( ) || * * c == '=' || * * c == '+' || * * c == '-' ||
150+ * * c == '*' || * * c == '/' || * * c == '%' ) &&
151+ !protect_indices. contains ( i)
152+ } )
93153 . map ( |( i, _) | i)
94154 . collect ( ) ;
95155
96- let num_blanks = ( replaceable_indices. len ( ) as f64 * diff) as usize ;
97- let indices_to_blank: Vec < usize > = replaceable_indices. choose_multiple ( & mut rng, num_blanks) . cloned ( ) . collect ( ) ;
156+ // Calculate how many blanks to create (but ensure we have enough replaceable items)
157+ let num_replaceable = replaceable_indices. len ( ) ;
158+ if num_replaceable == 0 {
159+ return ( content. to_string ( ) , original_chars) ; // Nothing to blank
160+ }
161+
162+ let num_blanks = ( num_replaceable as f64 * diff) as usize ;
163+
164+ // Avoid trying to choose more items than available
165+ let actual_blanks = std:: cmp:: min ( num_blanks, num_replaceable) ;
166+ let indices_to_blank: Vec < usize > = replaceable_indices. choose_multiple ( & mut rng, actual_blanks) . cloned ( ) . collect ( ) ;
98167
99168 for & idx in & indices_to_blank {
100169 original_chars. insert ( idx, blanked_content[ idx] ) ;
0 commit comments