
    im                        U d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	m
Z
 ddlmZ ddlZddlZddlmZ  ej        e          Z eh d          Zee         ed<    ej        d	ej                  Zd
edefdZi aeeeeef         f         ed<   daeed<   dZi Z eeeeeeef         f         f         ed<   i Z!eeef         ed<   dZ"g dZ#e#d         Z$dZ%i ddddddddddddddddd dd!d"d#d$d%d$d&dd'd(d)d$d*d+d,d-i d.d-d/d0d1d2d3d$d4d0d5dd6d7d8d0d9d:d;d<d=d-d>d?d@d2dAd?dBd?dCd-dDd0i dEd0dFd0dGd7dHd7dId7dJd0dKd0dLdMdNd7dOd7dPd:dQd-dRddSd-dTd-dUd<Z&dVZ'dWZ(dXZ)dYZ*dZedefd[Z+dZede,fd\Z-dZede,fd]Z.i d^d_d`d_dadbdcdddedfdgdhdidfdjdkdld9dmdndodndpdqdrdsdtd*dudvdwd3dxdydydzd{d|d}d}d~dZ/eeef         ed<   dZede
e         fdZ0dZede,fdZ1dZede,fdZ2dZede
e         fdZ3defdZ4ddede5de5de
e5         fdZ6deeef         de7edf         de
e5         fdZ8deeef         de
e5         fdZ9deeef         de
e5         fdZ:deeef         deeef         fdZ;deeeeef         f         dedeeef         ddfdZ<dde,deeeeef         f         fdZ=	 	 ddZedede,deeeeef         f         fdZ>defdZ?deee5f         fdZ@d
edZede5ddfdZAd
edZede
e5         fdZBde5de
e5         fdZCdede
e5         fdZDdede
e5         fdZEdedede,fdZFd
edZede
e5         fdZGd
edZede
e5         fdZHd
edefdZId
edZedede
e5         fdZJd
ede
e5         fdZK	 	 	 	 dd
edZedede5dz  dede5fdZLdede5fdZMde	eeef                  de5fdZNdddde	eeef                  dede
e	eeef                           de5fdZOdS )zModel metadata, context lengths, and token estimation utilities.

Pure utility functions with no AIAgent dependency. Used by ContextCompressor
and run_agent.py for pre-flight context checks.
    N)Path)AnyDictListOptional)urlparse)OPENROUTER_MODELS_URL>:   x-aix.aiz-aiz.aikimi-cnarcee-ai	deep-seek
ai-gateway
minimax-cncopilot-acpmoonshot-cnqwen-portalxiaomi-mimoopenai-codexopencode-zengithub-modelsgoogle-geminigithub-copilotgoogle-ai-studiogoglmxaizaizengrokkilokimimimonousqwenarceelocalzhipualiyunclaudecustomgeminigithubgoogleollamavercelxiaomialibabaarceeaicopilotminimaxdeepseekkilocodemoonshotopencode	anthropic	dashscope
openrouter
qwen-oauthkimi-codingopencode-goollama-cloudkimi-coding-cn_PROVIDER_PREFIXESzE^(\d+\.?\d*b|latest|stable|q\d|fp?\d|instruct|chat|coder|vision|text)modelreturnc                 ,   d| vs|                      d          r| S |                     dd          \  }}|                                                                }|t          v r0t
                              |                                          r| S |S | S )ua  Strip a recognised provider prefix from a model string.

    ``"local:my-model"`` → ``"my-model"``
    ``"qwen3.5:27b"``   → ``"qwen3.5:27b"``  (unchanged — not a provider prefix)
    ``"qwen:0.5b"``     → ``"qwen:0.5b"``    (unchanged — Ollama model:tag)
    ``"deepseek:latest"``→ ``"deepseek:latest"``(unchanged — Ollama model:tag)
    :http   )
startswithsplitstriplowerrD   _OLLAMA_TAG_PATTERNmatch)rE   prefixsuffixprefix_lowers       </home/agentuser/.hermes/hermes-agent/agent/model_metadata.py_strip_provider_prefixrU   3   s     %5++F33[[a((NFF<<>>''))L)))$$V\\^^44 	LL    _model_metadata_cache_model_metadata_cache_timei  _endpoint_model_metadata_cache#_endpoint_model_metadata_cache_timei,  )     i }  i>  i@  r\   zclaude-opus-4-7i@B zclaude-opus-4.7zclaude-opus-4-6zclaude-sonnet-4-6zclaude-opus-4.6zclaude-sonnet-4.6r,   i@ zgpt-5.4-nanoi zgpt-5.4-minizgpt-5.4i zgpt-5.3-codex-sparkr[   zgpt-5.1-chatzgpt-5zgpt-4.1i zgpt-4r.   i   zgemma-4-31bi  zgemma-4-26bzgemma-3i   gemmai    r8   llamazqwen3-coder-pluszqwen3-coderi   r'   r7   i   r   i  zgrok-code-fastzgrok-4-1-fasti zgrok-2-visionzgrok-4-fastz	grok-4.20zgrok-4zgrok-3zgrok-2r"   r$   trinityelephantzQwen/Qwen3.5-397B-A17BzQwen/Qwen3.5-35B-A3Bzdeepseek-ai/DeepSeek-V3.2i   zmoonshotai/Kimi-K2.5zmoonshotai/Kimi-K2-ThinkingzMiniMaxAI/MiniMax-M2.5zXiaomiMiMo/MiMo-V2-Flashzmimo-v2-prozmimo-v2-omnizmimo-v2-flashzzai-org/GLM-5)
context_lengthcontext_windowmax_context_lengthmax_position_embeddingsmax_model_lenmax_input_tokensmax_sequence_lengthmax_seq_lenn_ctx_trainn_ctx)max_completion_tokensmax_output_tokens
max_tokens)	localhostz	127.0.0.1z::1z0.0.0.0)z.docker.internalz.containers.internalz.lima.internalbase_urlc                 T    | pd                                                     d          S )N /)rM   rstripro   s    rT   _normalize_base_urlru      s&    N!!##**3///rV   c                 H    dt          |                                           v S )Nopenrouter.ai)ru   rN   rt   s    rT   _is_openrouter_base_urlrx      s"    1(;;AACCCCrV   c                 ^    t          |           }t          |          ot          |           S N)ru   boolrx   )ro   
normalizeds     rT   _is_custom_endpointr}      s.    $X..J
G$;J$G$G GGrV   zapi.openai.comopenaizchatgpt.comapi.anthropic.comr<   zapi.z.air    zapi.moonshot.air@   zapi.moonshot.cnrC   zapi.kimi.comzapi.arcee.air(   zapi.minimaxzdashscope.aliyuncs.comr4   zdashscope-intl.aliyuncs.comzportal.qwen.air?   rw   r>   z!generativelanguage.googleapis.comzinference-api.nousresearch.comr&   zapi.deepseek.comzapi.githubcopilot.comr6   	fireworksrA   r   r3   rB   )zmodels.github.aizapi.fireworks.aizopencode.aizapi.x.aizapi.xiaomimimo.comzxiaomimimo.comz
ollama.com_URL_TO_PROVIDERc                    t          |           }|sdS t          d|v r|nd|           }|j                                        p|j                                        }t
                                          D ]\  }}||v r|c S dS )a  Infer the models.dev provider name from a base URL.

    This allows context length resolution via models.dev for custom endpoints
    like DashScope (Alibaba), Z.AI, Kimi, etc. without requiring the user to
    explicitly set the provider name in config.
    N://zhttps://)ru   r   netlocrN   pathr   items)ro   r|   parsedhosturl_partproviders         rT   _infer_provider_from_urlr      s     %X..J tEZ$7$7jj=T
=T=TUUF=  7FK$5$5$7$7D.4466  (tOOO 4rV   c                 $    t          |           d uS rz   )r   rt   s    rT   _is_known_provider_base_urlr     s    #H--T99rV   c                   	 t          |           }|sdS d|v r|nd| }	 t          |          }|j        pd	n# t          $ r Y dS w xY w	t          v rdS t          	fdt          D                       rdS ddl}	 |                    	          }|j	        p|j
        p|j        S # t          $ r Y nw xY w	                    d	          }t          |          d
k    rj	 t          |d                   t          |d                   }}|dk    rdS |dk    rd|cxk    rdk    rn ndS |dk    r|dk    rdS n# t          $ r Y nw xY wdS )zOReturn True if base_url points to a local machine (localhost / RFC-1918 / WSL).Fr   zhttp://rq   Tc              3   B   K   | ]}                     |          V  d S rz   )endswith).0rR   r   s     rT   	<genexpr>z$is_local_endpoint.<locals>.<genexpr>  s/      
I
IV4==  
I
I
I
I
I
IrV   r   N.   rJ   
                  )ru   r   hostname	Exception_LOCAL_HOSTSany_CONTAINER_LOCAL_SUFFIXES	ipaddress
ip_address
is_privateis_loopbackis_link_local
ValueErrorrL   lenint)
ro   r|   urlr   r   addrpartsfirstsecondr   s
            @rT   is_local_endpointr     s   $X..J u++**1G:1G1GC#$"   uu|t

I
I
I
I/H
I
I
III t##D))H$"2Hd6HH    JJsOOE
5zzQ		aMM3uQx==6E{{t||f 2 2 2 2 2 2 2 2 2t||#t 	 	 	D	5s?   : 
AA=)B' '
B43B4 0D9 D9 *D9 9
EEc                    ddl }t          |           }|}|                    d          r
|dd         }	 |                    d          5 }	 |                    | d          }|j        dk    r	 ddd           d	S n# t          $ r Y nw xY w	 |                    | d
          }|j        dk    r8	 |                                }d|v r	 ddd           dS n# t          $ r Y nw xY wn# t          $ r Y nw xY w	 |                    | d          }|j        dk    r|                    | d          }|j        dk    rd|j        v r	 ddd           dS n# t          $ r Y nw xY w	 |                    | d          }|j        dk    r&|                                }d|v r	 ddd           dS n# t          $ r Y nw xY wddd           n# 1 swxY w Y   n# t          $ r Y nw xY wdS )zDetect which local server is running at base_url by probing known endpoints.

    Returns one of: "ollama", "lm-studio", "vllm", "llamacpp", or None.
    r   N/v1g       @timeout/api/v1/models   	lm-studioz	/api/tagsmodelsr1   	/v1/props/propsdefault_generation_settingsllamacppz/versionversionvllm)	httpxru   r   Clientgetstatus_coder   jsontext)ro   r   r|   
server_urlclientrdatas          rT   detect_local_server_typer   6  sQ   
 LLL$X..JJ5!! %_
*\\#\&& '	&JJ*<<<===C''&'	 '	 '	 '	 '	 '	 '	 '	 (   

JJ*77788=C'' vvxx#t++#+#'	 '	 '	 '	 '	 '	 '	 '	  ,$      JJ*77788=C''

j#8#8#899A=C'',IQV,S,S%9'	 '	 '	 '	 '	 '	 '	 '	:    JJ*66677=C''6688D D((%K'	 '	 '	 '	 '	 '	 '	 '	L    M'	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	P     4s  G G#B 2G ?G 
B
GBG#C-5CG C-
C)&C-(C))C-,G-
C:7G9C::G>AEG G
E)&G(E))G-;F7)G 6G7
GGGGG GG GG 
G,+G,valuec              #      K   t          | t                    r2| V  |                                 D ]}t          |          E d {V  d S t          | t                    r| D ]}t          |          E d {V  d S d S rz   )
isinstancedictvalues_iter_nested_dictslist)r   nesteditems      rT   r   r   q  s      % 0llnn 	2 	2F)&1111111111	2 	2	E4	 	  0 	0 	0D)$//////////0 0	0 	0rV      逖 minimummaximumc                 $   	 t          | t                    rd S t          | t                    r(|                                                     dd          } t          |           }n# t          t          f$ r Y d S w xY w||cxk    r|k    rn n|S d S )N,rq   )r   r{   strrM   replacer   	TypeErrorr   )r   r   r   results       rT   _coerce_reasonable_intr   {  s    eT"" 	4eS!! 	3KKMM))#r22EUz"   tt&####G#####4s   A& AA& &A;:A;payloadkeys.c                     d |D             }t          |           D ]W}|                                D ]@\  }}t          |                                          |vr)t	          |          }||c c S AXd S )Nc                 6    h | ]}|                                 S  )rN   )r   keys     rT   	<setcomp>z%_extract_first_int.<locals>.<setcomp>  s     ***cciikk***rV   )r   r   r   rN   r   )r   r   keysetmappingr   r   coerceds          rT   _extract_first_intr     s    **T***F%g..  !--// 	 	JC3xx~~v--,U33G" #		 4rV   c                 ,    t          | t                    S rz   )r   _CONTEXT_LENGTH_KEYSr   s    rT   _extract_context_lengthr         g';<<<rV   c                 ,    t          | t                    S rz   )r   _MAX_COMPLETION_KEYSr   s    rT   _extract_max_completion_tokensr     r   rV   c                 X   dddddd}t          |           D ]}d |                                D             t          fd|                                D                       sNi }|                                D ]%\  }}|D ]}|v r|         d	vr|         ||<    n&|r|c S i S )
N)promptinputinput_cost_per_tokenprompt_token_cost)
completionoutputoutput_cost_per_tokencompletion_token_cost)requestrequest_cost)
cache_readcached_promptinput_cache_readcache_read_cost_per_token)cache_writecache_creationinput_cache_writecache_write_cost_per_token)r   r   r   r   r   c                 X    i | ]'\  }}t          |                                          |(S r   )r   rN   )r   r   r   s      rT   
<dictcomp>z$_extract_pricing.<locals>.<dictcomp>  s.    PPP*#uc#hhnn&&PPPrV   c              3   N   K   | ]}t          fd |D                       V   dS )c              3       K   | ]}|v V  	d S rz   r   )r   aliasr|   s     rT   r   z-_extract_pricing.<locals>.<genexpr>.<genexpr>  s(      @@uu
*@@@@@@rV   N)r   )r   aliasesr|   s     rT   r   z#_extract_pricing.<locals>.<genexpr>  s@      ccW3@@@@@@@@@ccccccrV   )Nrq   )r   r   r   r   )r   	alias_mapr   pricingtargetr  r  r|   s          @rT   _extract_pricingr    s   R`.fk I &g..  PPPPP
ccccPYP`P`PbPbccccc 	"$(00 	 	OFG   J&&:e+<J+N+N&0&7GFOE 	NNN	IrV   cachemodel_identryc                     || |<   d|v r4|                     dd          d         }|                     ||           d S d S )Nrr   rJ   )rL   
setdefault)r  r  r  
bare_models       rT   _add_model_aliasesr    sO    E(O
h^^C++A.
U+++++ rV   Fforce_refreshc                    | s2t           r+t          j                    t          z
  t          k     rt           S 	 t	          j        t          d          }|                                 |                                }i }|                    dg           D ]}|                    dd          }|                    dd          |                    di                               d	d
          |                    d|          |                    di           d}t          |||           |                    dd          }|r||k    rt          |||           |a t          j                    at                              dt          |                     |S # t          $ r*}t          j        d|            t           pi cY d}~S d}~ww xY w)z9Fetch model metadata from OpenRouter (cached for 1 hour).r   r   r   idrq   ra   r[   top_providerrk   i   namer	  )ra   rk   r  r	  canonical_slugz.Fetched metadata for %s models from OpenRouterz0Failed to fetch model metadata from OpenRouter: N)rW   timerX   _MODEL_CACHE_TTLrequestsr   r	   raise_for_statusr   r  loggerdebugr   r   loggingwarning)	r  responser   r  rE   r  r  	canonicales	            rT   fetch_model_metadatar$    s     %2 %	F`8`dt7t7t$$+< 5rBBB!!###}}XXfb)) 	< 	<Eyyr**H"')),<f"E"E).>2)F)F)J)JKbdh)i)i		&(33 99Y33	 E uh666		"2B77I <Y(22"5)U;;; %%)Y[["Es5zzRRR + + +N1NNOOO$*******+s   EF 
GG=GGrq   api_keyc                    t          |           }|rt          |          ri S |sXt                              |          }t                              |d          }|!t          j                    |z
  t          k     r|S |g}|                    d          r|dd                             d          }n|dz   }|r||vr|	                    |           |rdd| ini }d}	|D ]d}
|
                    d          dz   }	 t          j        ||d	
          }|                                 |                                }i }|                    dg           D ]}t          |t                    s|                    d          }|s0d|                    d|          i}t          |          }|||d<   t!          |          }|||d<   t#          |          }|r||d<   t%          |||           t'          d |                    dg           D                       }|r	 |
                    d                              dd          }t          j        |dz   |d
          }|j        st          j        |dz   |d
          }|j        rh|                                }|                    di           }|                    d          }|                    dd          }|r|r||v r|||         d<   n# t,          $ r Y nw xY w|t          |<   t          j                    t          |<   |c S # t,          $ r}|}	Y d}~^d}~ww xY w|	rt.                              d||	           i t          |<   t          j                    t          |<   i S )zFetch model metadata from an OpenAI-compatible ``/models`` endpoint.

    This is used for explicit custom endpoints where hardcoded global model-name
    defaults are unreliable. Results are cached in memory per base URL.
    r   Nr   r   rr   AuthorizationzBearer z/modelsr   headersr   r   r  r  ra   rk   r	  c              3   r   K   | ]2}t          |t                    |                    d           dk    V  3dS )owned_byr   N)r   r   r   )r   ms     rT   r   z0fetch_endpoint_model_metadata.<locals>.<genexpr>  sV        Jq$4G4Gj!!Z/     rV   rq   r      r   r   rj   model_aliasz1Failed to fetch model metadata from %s/models: %s)ru   rx   rY   r   rZ   r  _ENDPOINT_MODEL_CACHE_TTLr   rs   appendr  r  r   r   r   r   r   r  r  r   r   okr   r  r  )ro   r%  r  r|   cached	cached_at
candidates	alternater)  
last_error	candidater   r!  r   r  rE   r  r  ra   rk   r	  is_llamacppbase
props_resppropsgen_settingsrj   r.  excs                                rT   fetch_endpoint_model_metadatar>    s/    %X..J 0<< 	 /33J??7;;JJJ	49;;#:>W"W"WMJ5!! 'ssO**3//		&	 %Yj00)$$$8?G 3' 3 344RG&*J 3 3	s##i/1	|C"EEEH%%'''mmooG/1E VR00 ; ;!%..  99T?? )/681L1L(M!8!?!?!-.<E*+(Fu(M(M%(45JE12*511 /'.E)$"5(E::::    VR00    K  $++C0088CCD!)d[.@'[\!]!]!]J%= _%-\$/7\]%^%^%^
!} I * 1 1',yy1NPR'S'S , 0 0 9 9&+iir&B&B  I[ I[E5I5ICHE+./?@    D :?*:6>Bikk/
;LLL 	 	 	JJJJJJ	  bH*V`aaa13":.6:ikk'
3Is>   >D"L.!CK54L.5
L?L.L)L..
M8M  Mc                  (    ddl m}   |             dz  S )z8Return path to the persistent context length cache file.r   get_hermes_homezcontext_length_cache.yaml)hermes_constantsrA  r@  s    rT   _get_context_cache_pathrC  :  s(    000000?:::rV   c                  Z   t                      } |                                 si S 	 t          |           5 }t          j        |          pi }ddd           n# 1 swxY w Y   |                    di           S # t          $ r'}t                              d|           i cY d}~S d}~ww xY w)z:Load the model+provider -> context_length cache from disk.Ncontext_lengthsz'Failed to load context length cache: %s)	rC  existsopenyaml	safe_loadr   r   r  r  )r   fr   r#  s       rT   _load_context_cacherK  @  s    "$$D;;== 	$ZZ 	+1>!$$*D	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+xx)2...   >BBB						s@   A9 AA9 AA9 A A9 9
B*B%B*%B*lengthc                    |  d| }t                      }|                    |          |k    rdS |||<   t                      }	 |j                            dd           t          |d          5 }t          j        d|i|d           ddd           n# 1 swxY w Y   t          	                    d	||d
           dS # t          $ r&}t                              d|           Y d}~dS d}~ww xY w)zPersist a discovered context length for a model+provider combo.

    Cache key is ``model@base_url`` so the same model name served from
    different providers can have different limits.
    @NT)parentsexist_okwrE  F)default_flow_stylez%Cached context length %s -> %s tokensr   z'Failed to save context length cache: %s)rK  r   rC  parentmkdirrG  rH  dumpr  infor   r  )rE   ro   rL  r   r  r   rJ  r#  s           rT   save_context_lengthrW  N  si    

X

C!!Eyy~~E#J"$$DC$666$__ 	OI(%0!NNNN	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O;SV--PPPPP C C C>BBBBBBBBBCs<   ,C 1BC BC B!C 
C2C--C2c                 V    |  d| }t                      }|                    |          S )zBLook up a previously discovered context length for model+provider.rN  )rK  r   )rE   ro   r   r  s       rT   get_cached_context_lengthrY  c  s0    

X

C!!E99S>>rV   current_lengthc                 .    t           D ]}|| k     r|c S dS )z@Return the next lower probe tier, or None if already at minimum.N)CONTEXT_PROBE_TIERS)rZ  tiers     rT   get_next_probe_tierr^  j  s/    #  .  KKK !4rV   	error_msgc                     |                                  }g d}|D ]O}t          j        ||          }|r6t          |                    d                    }d|cxk    rdk    rn K|c S PdS )a?  Try to extract the actual context limit from an API error message.

    Many providers include the limit in their error text, e.g.:
      - "maximum context length is 32768 tokens"
      - "context_length_exceeded: 131072"
      - "Maximum context size 32768 exceeded"
      - "model's max context length is 65536"
    )zY(?:max(?:imum)?|limit)\s*(?:context\s*)?(?:length|size|window)?\s*(?:is|of|:)?\s*(\d{4,})z:context\s*(?:length|size|window)\s*(?:is|of|:)?\s*(\d{4,})z)(\d{4,})\s*(?:token)?\s*(?:context|limit)z">\s*(\d{4,})\s*(?:max|limit|token)z(\d{4,})\s*(?:max(?:imum)?)\brJ   r   r   NrN   researchr   group)r_  error_lowerpatternspatternrP   limits         rT   parse_context_limit_from_errorri  r  s     //##K  H   	';// 	A''Eu****
*****4rV   c                     |                                  }d|v od|v pd|v }|sdS g d}|D ]E}t          j        ||          }|r,t          |                    d                    }|dk    r|c S FdS )u#  Detect an "output cap too large" error and return how many output tokens are available.

    Background — two distinct context errors exist:
      1. "Prompt too long"  — the INPUT itself exceeds the context window.
           Fix: compress history and/or halve context_length.
      2. "max_tokens too large" — input is fine, but input + requested_output > window.
           Fix: reduce max_tokens (the output cap) for this call.
           Do NOT touch context_length — the window hasn't shrunk.

    Anthropic's API returns errors like:
      "max_tokens: 32768 > context_window: 200000 - input_tokens: 190000 = available_tokens: 10000"

    Returns the number of output tokens that would fit (e.g. 10000 above), or None if
    the error does not look like a max_tokens-too-large error.
    rm   available_tokenszavailable tokensN)zavailable_tokens[:\s]+(\d+)zavailable\s+tokens[:\s]+(\d+)z=\s*(\d+)\s*$rJ   ra  )r_  re  is_output_cap_errorrf  rg  rP   tokenss          rT   (parse_available_output_tokens_from_errorrn    s      //##K 	# 	U;.S2D2S   t  H   	';// 	Q((F{{4rV   candidate_idlookup_modelc                 b    | |k    rdS d| v r"|                      dd          d         |k    rdS dS )a  Return True if *candidate_id* (from server) matches *lookup_model* (configured).

    Supports two forms:
    - Exact match:  "nvidia-nemotron-super-49b-v1" == "nvidia-nemotron-super-49b-v1"
    - Slug match:   "nvidia/nvidia-nemotron-super-49b-v1" matches "nvidia-nemotron-super-49b-v1"
                    (the part after the last "/" equals lookup_model)

    This covers LM Studio's native API which stores models as "publisher/slug"
    while users typically configure only the slug after the "local:" prefix.
    Trr   rJ   F)rsplit)ro  rp  s     rT   _model_id_matchesrs    sH     |##t
l|223::1=MMt5rV   c                    ddl }t          |           }|                    d          }|                    d          r
|dd         }	 t	          |          }n# t
          $ r Y dS w xY w|dk    rdS 	 |                    d          5 }|                    | d	d
|i          }|j        dk    r	 ddd           dS |	                                }|
                    dd          }	d|	v r|	                    d          D ]s}
d|
v rm|
                                                                }t          |          dk    r4	 t          |d                   c cddd           S # t          $ r Y ow xY wt|
                    di           }|                                D ]B\  }}d|v r9t#          |t          t$          f          rt          |          c cddd           S C	 ddd           n# 1 swxY w Y   n# t
          $ r Y nw xY wdS )ay  Query an Ollama server for the model's context length.

    Returns the model's maximum context from GGUF metadata via ``/api/show``,
    or the explicit ``num_ctx`` from the Modelfile if set.  Returns None if
    the server is unreachable or not Ollama.

    This is the value that should be passed as ``num_ctx`` in Ollama chat
    requests to override the default 2048.
    r   Nrr   r   r   r1         @r   	/api/showr  r   r   
parametersrq   num_ctx
   
model_infora   )r   rU   rs   r   r   r   r   postr   r   r   rL   rM   r   r   r   r   r   float)rE   ro   r   r  r   server_typer   respr   paramsliner   r}  r   r   s                  rT   query_ollama_num_ctxr    s    LLL'..J%%J5!! %_
.x88   ttht\\#\&& 	&&;;*777vz>R;SSD3&&	& 	& 	& 	& 	& 	& 	& 	& 99;;D XXlB//FF"""LL.. % %D D(( $

 2 2 4 4u::??%'*59~~ 5 5	& 	& 	& 	& 	& 	& 	& 	& $. % % % $% ,33J(..00 & &
U#s**z%#u/N/N*u::%%-	& 	& 	& 	& 	& 	& 	& 	&(&)	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&.    4s   	A 
A'&A'3G< 	)G02G< ?BG0E%G0G< %
E2/G01E22A#G0G< "G0$G< 0G44G< 7G48G< <
H	H	c                 R
   ddl }t          |           } |                    d          }|                    d          r
|dd         }	 t	          |          }n# t
          $ r d}Y nw xY w	 |                    d          5 }|dk    rL|                    | d	d
| i          }|j        dk    r$|	                                }|
                    dd          }d|v r|                    d          D ]s}	d|	v rm|	                                                                }
t          |
          dk    r4	 t          |
d                   c cddd           S # t          $ r Y ow xY wt|
                    di           }|                                D ]B\  }}d|v r9t#          |t          t$          f          rt          |          c cddd           S C|dk    r|
                    | d          }|j        dk    r]|	                                }|
                    dg           D ]1}t'          |
                    dd          |           s$t'          |
                    dd          |           r|
                    dg           D ]j}|
                    di           }|
                    d          }|r;t#          |t          t$          f          rt          |          c c cddd           S k|
                    d          p|
                    d          }|r9t#          |t          t$          f          rt          |          c cddd           S 3|
                    | d|            }|j        dk    r|	                                }|
                    d          p)|
                    d          p|
                    d          }|r7t#          |t          t$          f          rt          |          cddd           S |
                    | d           }|j        dk    r|	                                }|
                    d!g           }|D ]}t'          |
                    dd          |           rz|
                    d          p)|
                    d          p|
                    d          }|r9t#          |t          t$          f          rt          |          c cddd           S ddd           n# 1 swxY w Y   n# t
          $ r Y nw xY wdS )"z4Query a local server for the model's context length.r   Nrr   r   r   ru  r   r1   rv  r  rw  r   rx  rq   ry  rz  r{  r|  r}  ra   r   r   r   r   r  loaded_instancesconfigrc   z/v1/models/re   rm   z
/v1/modelsr   )r   rU   rs   r   r   r   r   r~  r   r   r   rL   rM   r   r   r   r   r   r  rs  )rE   ro   r   r   r  r   r  r   r  r  r   r}  r   r   r,  instcfgctxmodels_lists                      rT   _query_local_context_lengthr    s   LLL #5))E %%J5!! %_
.x88   H\\#\&& E	,&h&&{{j#;#;#;65/{RR#s**99;;D "XXlB77F F**$*LL$6$6 - -D(D00(,

(:(:(<(<#&u::??%-/259~~(=(='E	, E	, E	, E	, E	, E	, E	, E	,( ,6 %- %- %-(,%- "&,!;!;J&0&6&6&8&8 . .
U+s22z%#u7V7V2#&u::--5E	, E	, E	, E	, E	, E	, E	, E	,B k))zzZ"?"?"?@@#s**99;;D!XXh33 0 0,QUU5"-=-=uEE 
0IZ[\[`[`aegi[j[jlqIrIr 
0().@"(E(E 4 4&*hhx&<&<&)gg.>&?&?#& !4:cC<+H+H !4+.s88OOOOYE	, E	, E	, E	, E	, E	, E	, E	,\ #$%%(<"="="XGWAXAXC" 0z#U|'D'D 0'*3xxaE	, E	, E	, E	, E	, E	, E	, E	,f ::????@@D3&&yy{{hh//g488<L3M3MgQUQYQYZfQgQg $:cC<88 $s88sE	, E	, E	, E	, E	, E	, E	, E	,z ::77788D3&&yy{{"hhvr22$ , ,A(tR%@@ ,eeO44f>N8O8OfSTSXSXYeSfSf ,:cC<#@#@ ,#&s88OOKE	, E	, E	, E	, E	, E	, E	, E	, E	, E	, E	, E	, E	, E	, E	, E	, E	, E	, E	, E	, E	, E	, E	,L     4s   	A A('A(,T B2T5E	TT 
E%"T$E%%A#TT DT)T 6ATT B&TT C"T1T >T?T TT TT 
T$#T$c                 .    |                      dd          S )zNormalize version separators for matching.

    Nous uses dashes: claude-opus-4-6, claude-sonnet-4-5
    OpenRouter uses dots: claude-opus-4.6, claude-sonnet-4.5
    Normalize both to dashes for comparison.
    r   -)r   )rE   s    rT   _normalize_model_versionr  `  s     ==c"""rV   c                 X   |r|                     d          rdS 	 |                    d          }|                    d          r
|dd         }| d}|dd}t          j        ||d	
          }|j        dk    rdS |                                }|                    dg           D ]O}|                    d          | k    r4|                    d          }	t          |	t                    r
|	dk    r|	c S Pn2# t          $ r%}
t                              d|
           Y d}
~
nd}
~
ww xY wdS )zQuery Anthropic's /v1/models endpoint for context length.

    Only works with regular ANTHROPIC_API_KEY (sk-ant-api*).
    OAuth tokens (sk-ant-oat*) from Claude Code return 401.
    z
sk-ant-oatNrr   r   r   z/v1/models?limit=1000z
2023-06-01)z	x-api-keyzanthropic-versionr   r(  r   r   r  rf   r   z%Anthropic /v1/models query failed: %s)rK   rs   r   r  r   r   r   r   r   r   r  r  )rE   ro   r%  r9  r   r)  r  r   r,  r  r#  s              rT   _query_anthropic_context_lengthr  j  sq     g((66 tAs##== 	9D,,, !-
 
 |C"===s""4yy{{&"%% 	 	AuuT{{e##ee.//c3'' C!GGJJJ		
  A A A<a@@@@@@@@A4s%   A C8 =A8C8 6C8 8
D'D""D'c                    t                      }| |v r||                              d          S t          |                                           }|                                D ]\  }}d|v r|                    dd          d         n|}|                                |                                 k    s%t          |                                          |k    r|                    d          c S |                                 }|                                D ]\  }}d|v r|                    dd          d         n|}|                                |ft          |                                          |ffD ]j\  }}|                    |          rPt          |          t          |          k    s|t          |                   dv r|                    d          c c S kdS )u  Resolve Nous Portal model context length via OpenRouter metadata.

    Nous model IDs are bare (e.g. 'claude-opus-4-6') while OpenRouter uses
    prefixed IDs (e.g. 'anthropic/claude-opus-4.6'). Try suffix matching
    with version normalization (dot↔dash).
    ra   rr   rJ   z-:.N)r$  r   r  rN   r   rL   rK   r   )	rE   metadatar|   or_idr  baremodel_lowerr7  querys	            rT   _resolve_nous_context_lengthr    s    $%%H""#3444)%006688J (( / /u),u{{3""1%%5::<<5;;==((,DT,J,J,P,P,R,RV`,`,`99-..... -a
 ++--K (( 3 3u),u{{3""1%%5"&**,,!<?WX\?]?]?c?c?e?egq>r s 	3 	3Iu##E** 3I#e**,,	#e**0E0N0Nyy!12222222		3 4rV   config_context_lengthr   c                    |t          |t                    r|dk    r|S t          |           } |rt          | |          }||S t	          |          rBt          |          s2t          ||          }|                    |           }|sht          |          dk    r/t          t          |                                                    }n&|                                D ]\  }}	| |v s|| v r|	} n|r,|                    d          }
t          |
t                    r|
S t          |          set          |          r+t          | |          }|r|dk    rt          | ||           |S t                               d| |t$          d           t$          S |dk    s|rd	|v rt'          | |pd
|          }|r|S |dk    s|r&d|v r"	 ddlm}  ||           S # t,          $ r Y nw xY w|}|r|dv r|rt/          |          }|r|}|dk    rt1          |           }|r|S |rddlm}  |||           }|r|S t7                      }| |v r||                              dd          S |                                 }t;          t<                                          d d          D ]\  }}||v r|c S |r:t          |          r+t          | |          }|r|dk    rt          | ||           |S t$          S )ac  Get the context length for a model.

    Resolution order:
    0. Explicit config override (model.context_length or custom_providers per-model)
    1. Persistent cache (previously discovered via probing)
    2. Active endpoint metadata (/models for explicit custom endpoints)
    3. Local server query (for local endpoints)
    4. Anthropic /v1/models API (API-key users only, not OAuth)
    5. OpenRouter live API metadata
    6. Nous suffix-match via OpenRouter cache
    7. models.dev registry lookup (provider-aware)
    8. Thin hardcoded defaults (broad family patterns)
    9. Default fallback (128K)
    Nr   )r%  rJ   ra   u   Could not detect context length for model %r at %s — defaulting to %s tokens (probe-down). Set model.context_length in config.yaml to override.r   r<   r   zhttps://api.anthropic.combedrockzbedrock-runtime)get_bedrock_context_length)r>   r-   r&   )lookup_models_dev_contextr[   c                 ,    t          | d                   S )Nr   r   )xs    rT   <lambda>z*get_model_context_length.<locals>.<lambda>&  s    s1Q4yy rV   T)r   reverse)r   r   rU   rY  r}   r   r>  r   r   nextiterr   r   r   r  rW  r  rV  DEFAULT_FALLBACK_CONTEXTr  agent.bedrock_adapterr  ImportErrorr   r  agent.models_devr  r$  rN   sortedDEFAULT_CONTEXT_LENGTHS)rE   ro   r%  r  r   r2  endpoint_metadatamatchedr   r  ra   	local_ctxr  r  effective_providerinferredr  r  r  default_modelrL  s                        rT   get_model_context_lengthr    s   , (Z8Ms-S-S(XmpqXqXq$$
 #5))E  *5(;;M 8$$ ,-H-R-R ,9(GTTT#''.. 		$%%**t$5$<$<$>$>??@@ #4"9"9";";  JC||se||"' (4  	&$[[)9::N.#.. &%%*844 	, ** %7xHH	 %Q'xCCC$$KK. x$<!@!@	   ,+ ; (H44-eX5\A\^eff 	J
 9.?8.K.K	HHHHHH--e444 	 	 	D	 " .!37O!O!O 	./99H .%-"V##*511 	J >>>>>>''(:EBB 	J $%%H""#3V<<< ++--K!'%%''-@-@$" " "  v K''MMM (  %h// /x@@	 	Qx;;; $#s   G 
G%$G%r   c                 4    | sdS t          |           dz   dz  S )a  Rough token estimate (~4 chars/token) for pre-flight checks.

    Uses ceiling division so short texts (1-3 chars) never estimate as
    0 tokens, which would cause the compressor and pre-flight checks to
    systematically undercount when many short tool results are present.
    r      r   r  )r   s    rT   estimate_tokens_roughr  6  s&      qIIMarV   messagesc                 D    t          d | D                       }|dz   dz  S )z:Rough token estimate for a message list (pre-flight only).c              3   N   K   | ] }t          t          |                    V  !d S rz   r   r   r   msgs     rT   r   z1estimate_messages_tokens_rough.<locals>.<genexpr>D  s.      88c#c((mm888888rV   r  r   )sum)r  total_charss     rT   estimate_messages_tokens_roughr  B  s-    88x88888K!O!!rV   )system_prompttoolsr  r  c                    d}|r|t          |          z  }| r|t          d | D                       z  }|r|t          t          |                    z  }|dz   dz  S )u<  Rough token estimate for a full chat-completions request.

    Includes the major payload buckets Hermes sends to providers:
    system prompt, conversation messages, and tool schemas.  With 50+
    tools enabled, schemas alone can add 20-30K tokens — a significant
    blind spot when only counting messages.
    r   c              3   N   K   | ] }t          t          |                    V  !d S rz   r  r  s     rT   r   z0estimate_request_tokens_rough.<locals>.<genexpr>Y  s.      ==S3s3xx========rV   r  r   )r   r  r   )r  r  r  r  s       rT   estimate_request_tokens_roughr  H  sy     K *s=))) >s==H====== 's3u::&!O!!rV   )r   r   )F)rq   F)rq   rq   Nrq   )P__doc__r  rb  r  pathlibr   typingr   r   r   r   urllib.parser   r  rH  rB  r	   	getLogger__name__r  	frozensetrD   r   __annotations__compile
IGNORECASErO   rU   rW   rX   r  r  rY   rZ   r/  r\  r  MINIMUM_CONTEXT_LENGTHr  r   r   r   r   ru   r{   rx   r}   r   r   r   r   r   r   r   r   tupler   r   r   r  r  r$  r>  rC  rK  rW  rY  r^  ri  rn  rs  r  r  r  r  r  r  r  r  r  r   rV   rT   <module>r     sS      				        , , , , , , , , , , , , ! ! ! ! ! !   2 2 2 2 2 2		8	$	$
 &/Y 0 0 0 & & IcN   * !bjLM  # #    & 46 tCc3h/0 5 5 5$% E % % % GI S$sDcN/B*C%C D I I I8: #T#u*%5 : : : 
    /q1 
   M
 wM wM wM M wM M fM FM  F!M" w#M$ 6%M& F'M( V)M* w+M, V-M0 g1M4 65M M6 67M8 v9M: T;M> ?MB VCMH IMJ 6KML FMMR vSMV 
6WMd feMf WgMh TiMj 7kMl mMn foMp fqM M Mr fsMt FuMx FyM| v}M@ AMD fEMF FGMH  IMJ FKML "6MMN fOMP QMR 7SMT FUMV VWMX VYM M ^   < 0# 0# 0 0 0 0Dc Dd D D D DH# H$ H H H H
$h$8$ $ 	$
 }$ '$ M$ G$ 9$ i$ "9$ l$ \$ ($ %f$  
!$" Y#$$ "# " 1$ $ $ $sCx.   8s x}    $:# :$ : : : :$ $ $ $ $ $N8s 8x} 8 8 8 8v0c 0 0 0 0 #  S ZbcfZg    	S#X 	eCHo 	(SV- 	 	 	 	=T#s(^ = = = = ==DcN =x} = = = =d38n c3h    .,d3S#X#67 ,3 ,tTWY\T\~ ,bf , , , ,!+ !+ !+c4S>>Q9R !+ !+ !+ !+L X XXX X 
#tCH~
	X X X Xv; ; ; ; ;T#s(^    Cs Cc C3 C4 C C C C*S C HSM         c hsm    8( ( ( ( ( (VC s t    &2 2s 2x} 2 2 2 2j\s \c \hsm \ \ \ \~#C #C # # # #3 #  PXY\P]    >     F (,I$ I$I$I$ I$ :	I$
 I$ 	I$ I$ I$ I$X	  	  	  	  	  	 "T$sCx.-A "c " " " " ,0	" " "4S>"" " Dc3h()	"
 	" " " " " "rV   