
    i'R                        d Z ddlZddlmZmZmZmZ ddlmZ ddddddd	d
dZ	de
de
fdZ	 d1de
de
de
dedee
eee
         ee
         f         f
dZde
deeeef                  de
de
fdZde
de
deeeef                  fdZde
de
deeeef                  fdZde
de
deeeef                  fdZde
de
deeeef                  fdZde
de
deeeef                  fdZde
de
deeeef                  fdZde
dee         fdZd ee         d!eeeef                  deeeef                  fd"Zde
de
deeeef                  fd#Zde
de
deeeef                  fd$Zde
de
deeeef                  fd%Zd&ee
         d'ed(ed)edeeef         f
d*Zde
d&ee
         d+ee
         de
d,e
deeeef                  fd-Zde
d.e
d/eeeef                  deeeef                  fd0ZdS )2aP  
Fuzzy Matching Module for File Operations

Implements a multi-strategy matching chain to robustly find and replace text,
accommodating variations in whitespace, indentation, and escaping common
in LLM-generated code.

The 8-strategy chain (inspired by OpenCode), tried in order:
1. Exact match - Direct string comparison
2. Line-trimmed - Strip leading/trailing whitespace per line
3. Whitespace normalized - Collapse multiple spaces/tabs to single space
4. Indentation flexible - Ignore indentation differences entirely
5. Escape normalized - Convert \n literals to actual newlines
6. Trimmed boundary - Trim first/last line whitespace only
7. Block anchor - Match first+last lines, use similarity for middle
8. Context-aware - 50% line similarity threshold

Multi-occurrence matching is handled via the replace_all flag.

Usage:
    from tools.fuzzy_match import fuzzy_find_and_replace
    
    new_content, match_count, strategy, error = fuzzy_find_and_replace(
        content="def foo():\n    pass",
        old_string="def foo():",
        new_string="def bar():",
        replace_all=False
    )
    N)TupleOptionalListCallable)SequenceMatcher"'z---z... )u   “u   ”u   ‘u   ’u   —u   –u   …    textreturnc                 p    t                                           D ]\  }}|                     ||          } | S )zBNormalizes Unicode characters to their standard ASCII equivalents.)UNICODE_MAPitemsreplace)r   charrepls      9/home/agentuser/.hermes/hermes-agent/tools/fuzzy_match.py_unicode_normalizer   +   s;    !'')) ( (
d||D$''K    Fcontent
old_string
new_stringreplace_allc           
         |s| dddfS ||k    r| dddfS dt           fdt          fdt          fdt          fd	t          fd
t
          fdt          fdt          fdt          fg	}|D ]g\  }} || |          }|rTt          |          dk    r|s| dddt          |           dfc S t          | ||          }|t          |          |dfc S h| dddfS )a)  
    Find and replace text using a chain of increasingly fuzzy matching strategies.

    Args:
        content: The file content to search in
        old_string: The text to find
        new_string: The replacement text
        replace_all: If True, replace all occurrences; if False, require uniqueness

    Returns:
        Tuple of (new_content, match_count, strategy_name, error_message)
        - If successful: (modified_content, number_of_replacements, strategy_used, None)
        - If failed: (original_content, 0, None, error_description)
    r   Nzold_string cannot be emptyz'old_string and new_string are identicalexactline_trimmedwhitespace_normalizedindentation_flexibleescape_normalizedtrimmed_boundaryunicode_normalizedblock_anchorcontext_aware   zFound zY matches for old_string. Provide more context to make it unique, or use replace_all=True.z1Could not find a match for old_string in the file)_strategy_exact_strategy_line_trimmed_strategy_whitespace_normalized_strategy_indentation_flexible_strategy_escape_normalized_strategy_trimmed_boundary_strategy_unicode_normalized_strategy_block_anchor_strategy_context_awarelen_apply_replacements)	r   r   r   r   
strategiesstrategy_namestrategy_fnmatchesnew_contents	            r   fuzzy_find_and_replacer7   2   sY      >4!===Z4!JJJ 
/"	/0	 "AB	!?@	9:	78	;<	/0	12
.J '1 B B"{+gz22 
	B7||a4XS\\ X X X    .gw
KKKGmTAAAA
	B AtPPPr   r5   c                 p    t          |d d          }| }|D ]\  }}|d|         |z   ||d         z   }|S )a  
    Apply replacements at the given positions.
    
    Args:
        content: Original content
        matches: List of (start, end) positions to replace
        new_string: Replacement text
    
    Returns:
        Content with replacements applied
    c                     | d         S )Nr    )xs    r   <lambda>z%_apply_replacements.<locals>.<lambda>v   s
    1Q4 r   T)keyreverseN)sorted)r   r5   r   sorted_matchesresultstartends          r   r1   r1   h   sX     GFFFNF$ < <
s*,vcdd|;Mr   patternc                     g }d}	 |                      ||          }|dk    rn-|                    ||t          |          z   f           |dz   }J|S )zStrategy 1: Exact string match.r   Tr&   )findappendr0   )r   rD   r5   rB   poss        r   r'   r'      sg    GEll7E**"99S3w<</0111a Nr   c                     d |                     d          D             }d                    |          }|                      d          }d |D             }t          | ||||          S )z
    Strategy 2: Match with line-by-line whitespace trimming.
    
    Strips leading/trailing whitespace from each line before matching.
    c                 6    g | ]}|                                 S r:   strip.0lines     r   
<listcomp>z*_strategy_line_trimmed.<locals>.<listcomp>   s     BBBdTZZ\\BBBr   
c                 6    g | ]}|                                 S r:   rL   rN   s     r   rQ   z*_strategy_line_trimmed.<locals>.<listcomp>   s     GGG

GGGr   )splitjoin_find_normalized_matches)r   rD   pattern_linespattern_normalizedcontent_linescontent_normalized_liness         r   r(   r(      s}     CBgmmD.A.ABBBM=11MM$''MGGGGG $ 8#  r   c                 ~    d } ||          } ||           }t          ||          }|sg S t          | ||          S )zC
    Strategy 3: Collapse multiple whitespace to single space.
    c                 .    t          j        dd|           S )Nz[ \t]+r   )resubss    r   	normalizez2_strategy_whitespace_normalized.<locals>.normalize   s    via(((r   )r'   _map_normalized_positions)r   rD   ra   rX   content_normalizedmatches_in_normalizeds         r   r)   r)      sj    ) ) ) #7++"7++ ,,>@RSS  	 %W.@BWXXXr   c           	          |                      d          }d |D             }d |                     d          D             }t          | |||d                    |                    S )z
    Strategy 4: Ignore indentation differences entirely.
    
    Strips all leading whitespace from lines before matching.
    rR   c                 6    g | ]}|                                 S r:   lstriprN   s     r   rQ   z2_strategy_indentation_flexible.<locals>.<listcomp>   s     FFFdkkmmFFFr   c                 6    g | ]}|                                 S r:   rg   rN   s     r   rQ   z2_strategy_indentation_flexible.<locals>.<listcomp>   s     CCCtT[[]]CCCr   )rT   rV   rU   )r   rD   rY   content_stripped_linesrW   s        r   r*   r*      ss     MM$''MFFFFFCCw}}T/B/BCCCM# 6=))  r   c                 N    d } ||          }||k    rg S t          | |          S )zt
    Strategy 5: Convert escape sequences to actual characters.
    
    Handles \n -> newline, \t -> tab, etc.
    c                 ~    |                      dd                               dd                               dd          S )Nz\nrR   z\t	z\r)r   r_   s    r   unescapez-_strategy_escape_normalized.<locals>.unescape   s6    yy%%--eT::BB5$OOOr   )r'   )r   rD   ro   pattern_unescapeds       r   r+   r+      sG    P P P !))G##	7$5666r   c           	         |                     d          }|sg S |d                                         |d<   t          |          dk    r|d                                         |d<   d                    |          }|                      d          }g }t          |          }t	          t          |          |z
  dz             D ]}||||z            }|                                }	|	d                                         |	d<   t          |	          dk    r|	d                                         |	d<   d                    |	          |k    r<t          ||||z   t          |                     \  }
}|                    |
|f           |S )z
    Strategy 6: Trim whitespace from first and last lines only.
    
    Useful when the pattern boundaries have whitespace differences.
    rR   r   r&   rF   )rT   rM   r0   rU   rangecopy_calculate_line_positionsrH   )r   rD   rW   modified_patternrY   r5   pattern_line_countiblock_linescheck_lines	start_posend_poss               r   r,   r,      s    MM$''M 	 %Q'--//M!
=A)"-3355byy//MM$''M G]++3}%%(::Q>?? 1 1#Aa*<&<$<= "&&(($Q--//A{a)"o3355KO99[!!%555!:q!&8"8#g,," "Iw NNIw/000Nr   originalc                     g }d}| D ]G}|                     |           t                              |          }||t          |          ndz  }H|                     |           |S )u  Build a list mapping each original character index to its normalized index.

    Because UNICODE_MAP replacements may expand characters (e.g. em-dash → '--',
    ellipsis → '...'), the normalised string can be longer than the original.
    This map lets us convert positions in the normalised string back to the
    corresponding positions in the original string.

    Returns a list of length ``len(original) + 1``; entry ``i`` is the
    normalised index that character ``i`` maps to.
    r   Nr&   )rH   r   getr0   )r|   rA   norm_posr   r   s        r   _build_orig_to_norm_mapr     sr     FH 9 9ht$$!1CIIIq8
MM(Mr   orig_to_normnorm_matchesc                 ,   i }t          | dd                   D ]\  }}||vr|||<   g }t          |           dz
  }|D ]T\  }}||vr
||         }	|	}
|
|k     r#| |
         |k     r|
dz  }
|
|k     r| |
         |k     |                    |	|
f           U|S )zNConvert (start, end) positions in the normalised string to original positions.NrF   r&   )	enumerater0   rH   )r   r   norm_to_orig_startorig_posr   resultsorig_len
norm_startnorm_end
orig_startorig_ends              r   _map_positions_norm_to_origr     s     *,'SbS(9:: 4 4(---+3x(%'G<  1$H , 
/ 
/
H///'
3
 !!l8&<x&G&GMH !!l8&<x&G&G 	
H-....Nr   c                     t          |          }t          |           }|| k    r||k    rg S t          ||          }|st          ||          }|sg S t          |           }t	          ||          S )u  Strategy 7: Unicode normalisation.

    Normalises smart quotes, em/en-dashes, ellipsis, and non-breaking spaces
    to their ASCII equivalents in both *content* and *pattern*, then runs
    exact and line_trimmed matching on the normalised copies.

    Positions are mapped back to the *original* string via
    ``_build_orig_to_norm_map`` — necessary because some UNICODE_MAP
    replacements expand a single character into multiple ASCII characters,
    making a naïve position copy incorrect.
    )r   r'   r(   r   r   )r   rD   norm_patternnorm_contentr   r   s         r   r-   r-   8  s     &g..L%g..Lw<7#:#:	"<>>L J-lLII 	*733L&|\BBBr   c           	         t          |          }t          |           }|                    d          }t          |          dk     rg S |d                                         }|d                                         }|                    d          }|                     d          }t          |          }	g }
t	          t          |          |	z
  dz             D ]Y}||                                         |k    r9|||	z   dz
                                           |k    r|
                    |           Zg }t          |
          }|dk    rdnd}|
D ]}|	dk    rd}nfd                    ||dz   ||	z   dz
                     }d                    |dd                   }t          d	||                                          }||k    r<t          ||||	z   t          |                     \  }}|                    ||f           |S )
z
    Strategy 8: Match by anchoring on first and last lines.
    Adjusted with permissive thresholds and unicode normalization.
    rR      r   rF   r&         ?gffffff?g      ?N)
r   rT   r0   rM   rr   rH   rU   r   ratiort   )r   rD   r   r   rW   
first_line	last_linenorm_content_linesorig_content_linesrv   potential_matchesrw   r5   candidate_count	threshold
similaritycontent_middlepattern_middlerz   r{   s                       r   r.   r.   W  s5    &g..L%g..L &&t,,M
=A	q!''))Jb!''))I &++D11 t,,]++3)**-??!CDD ( (q!''))Z77q#559:@@BBiOO$$Q'''G+,,O
 (1,,$I 1 1""JJ "YY'9!A#a@R>RST>T:T'UVVN!YY}QrT':;;N(~~NNTTVVJ""!:"Aq+='=s7||" "Iw NNIw/000Nr   c           	      `   |                     d          }|                      d          }|sg S g }t          |          }t          t          |          |z
  dz             D ]}||||z            }d}t          ||          D ]W\  }	}
t	          d|	                                |
                                                                          }|dk    r|dz  }X|t          |          dz  k    r<t          ||||z   t          |                     \  }}|                    ||f           |S )z
    Strategy 9: Line-by-line similarity with 50% threshold.
    
    Finds blocks where at least 50% of lines have high similarity.
    rR   r&   r   Ng?r   )	rT   r0   rr   zipr   rM   r   rt   rH   )r   rD   rW   rY   r5   rv   rw   rx   high_similarity_countp_linec_linesimrz   r{   s                 r   r/   r/     sN    MM$''MMM$''M 	G]++3}%%(::Q>?? 1 1#Aa*<&<$<= !"!-== 	+ 	+NFF!$GGMMOOCd{{%*% !C$6$6$<<<!:q!&8"8#g,," "Iw NNIw/000Nr   rY   
start_lineend_linecontent_lengthc                     t          d | d|         D                       }t          d | d|         D                       dz
  }||k    r|}||fS )a  Calculate start and end character positions from line indices.

    Args:
        content_lines: List of lines (without newlines)
        start_line: Starting line index (0-based)
        end_line: Ending line index (exclusive, 0-based)
        content_length: Total length of the original content string

    Returns:
        Tuple of (start_pos, end_pos) in the original content
    c              3   :   K   | ]}t          |          d z   V  dS r&   Nr0   rN   s     r   	<genexpr>z,_calculate_line_positions.<locals>.<genexpr>  s,      IIdCIIMIIIIIIr   Nc              3   :   K   | ]}t          |          d z   V  dS r   r   rN   s     r   r   z,_calculate_line_positions.<locals>.<genexpr>  s,      EED#d))a-EEEEEEr   r&   )sum)rY   r   r   r   rz   r{   s         r   rt   rt     sn     IImKZK.HIIIIIIEEM)8),DEEEEEIG.   gr   rZ   rX   c           	      `   |                     d          }t          |          }g }t          t          |          |z
  dz             D ]d}d                    ||||z                      }	|	|k    r<t	          ||||z   t          |                     \  }
}|                    |
|f           e|S )a  
    Find matches in normalized content and map back to original positions.
    
    Args:
        content: Original content string
        content_lines: Original content split by lines
        content_normalized_lines: Normalized content lines
        pattern: Original pattern
        pattern_normalized: Normalized pattern
    
    Returns:
        List of (start, end) positions in the original content
    rR   r&   )rT   r0   rr   rU   rt   rH   )r   rY   rZ   rD   rX   pattern_norm_linesnum_pattern_linesr5   rw   blockrz   r{   s               r   rV   rV     s      ,11$77.//G3/003DDqHII 	1 	1		21Q9J5J3JKLL&&&!:q!&7"7W" "Iw NNIw/000Nr   
normalizednormalized_matchesc           
         |sg S g }d}d}|t          |           k     r|t          |          k     r| |         ||         k    r |                    |           |dz  }|dz  }n| |         dv rI||         dk    r=|                    |           |dz  }|t          |           k     r| |         dvr|dz  }n?| |         dv r|                    |           |dz  }n|                    |           |dz  }|t          |           k     r|t          |          k     |t          |           k     r:|                    t          |                     |dz  }|t          |           k     :i }i }t          |          D ]\  }}	|	|vr|||	<   |||	<   g }
|D ]\  }|v r	|         }n(t          fdt          |          D                       }|dz
  |v r||dz
           dz   }n||z
  z   }|t          |           k     r,| |         dv r"|dz  }|t          |           k     r
| |         dv "|
                    |t          |t          |                     f           |
S )z
    Map positions from normalized string back to original.
    
    This is a best-effort mapping that works for whitespace normalization.
    r   r&   z 	r   c              3   .   K   | ]\  }}|k    |V  d S )Nr:   )rO   rw   nr   s      r   r   z,_map_normalized_positions.<locals>.<genexpr>(  s+      VV41aa:ooQooooVVr   )r0   rH   r   min)r|   r   r   r   orig_idxnorm_idxr   norm_to_orig_endr   r   original_matchesr   r   r   r   s                 @r   rb   rb     s	     	 LHH
S]]
"
"x#j//'A'AHH!555)))MHMHHh5((Z-AS-H-H)))MH#h--''HX,>e,K,KAh5(()))MHH )))MH' S]]
"
"x#j//'A'A, S]]
"
"C
OO,,,A S]]
"
"
 '55 . .(---+3x(%-""  2 L L
H++++J7JJ VVVV9\+B+BVVVVVJ a<+++'159HH!X
%:;H X&&8H+=+F+FMH X&&8H+=+F+F 	S3x==-I-I JKKKKr   )F)__doc__r]   typingr   r   r   r   difflibr   r   strr   boolintr7   r1   r'   r(   r)   r*   r+   r,   r   r   r-   r.   r/   rt   rV   rb   r:   r   r   <module>r      s   < 
			 2 2 2 2 2 2 2 2 2 2 2 2 # # # # # # SScs	 S S     163Q 3QC 3QS 3Qc 3Q)-3Q:?S(SV-YabeYf@f:g3Q 3Q 3Q 3Ql tE#s(O/D RU Z]    6
S 
3 
4c3h3H 
 
 
 
C # $uS#X:O    (YS Y3 Y4cSVhCX Y Y Y Y*C # $uSRUXBW     7 7s 7tE#s(O?T 7 7 7 7&' 'c 'd5c?>S ' ' ' 'Tc d3i    *s)uS#X' 
%S/   :C# C CU3PS8_@U C C C C>5C 5# 5$uS#X:O 5 5 5 5p S  3  4c3h;P        NT#Y C (+=@EJ3PS8_   ( c  $s)  8<S	 '* @C HLUSVX[S[_H]       FI I I37c3h3HIMQRWX[]`X`RaMbI I I I I Ir   