
    i                        d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlZddlZddlmZ ddlmZmZmZmZ ddlmZ ddlmZ  ej        e          ZddlmZ ddlmZm Z m!Z! dd	l"m#Z# d
 Z$d Z%d Z&d Z'd Z(dZ)dZ*dZ+dZ,dZ-dZ.dZ/dZ0dZ1dZ2dZ3dZ4dZ5dZ6dZ7dZ8dZ9d Z:d!Z;d"Z<d#Z=dZ>d$Z?d%Z@d&eAfd'ZB eB            ZCd(ZDd&eeAef         fd)ZEd*eeAef         d&eAfd+ZFd&eGfd,ZHd-eAd&eeA         fd.ZId/eAd0eAd*eeAef         d&eAfd1ZJd/eAd0eAd*eeAef         d&eAfd2ZKd/eAd0eAd*eeAef         d&eAfd3ZLd/eAd0eAd*eeAef         d&eAfd4ZMd/eAd0eAd*eeAef         d&eAfd5ZNd/eAd0eAd*eeAef         d&eAfd6ZOe>e?e@fd7ePd8eQd9eQd:eQd&ePf
d;ZRd/eAd0eAd*eeAef         d&eAfd<ZSd&eGfd=ZTd&eAfd>ZUd&eAfd?ZVd/eAd0eAd*eeAef         d&eAfd@ZW	 dd/eAd0eeA         d&eAfdAZXd&eGfdBZYd&eZeAeAf         fdCZ[d&eGfdDZ\ ej]        dE          Z^ ej]        dF          Z_ ej]        dG          Z` ej]        dH          Za ej]        dI          Zb ej]        dJ          Zc ej]        dK          Zd ej]        dLeje        M          Zf ej]        dNeje        M          Zg ej]        dO          Zh ej]        dP          Zid/eAd&eAfdQZj	 ddRejk        dSejl        dTejl        dUeeeAgdf                  fdVZmedWk    r endX            endY           dZ Zo end[            end\ eoe$d          rd]nd^             end_ eoe%d`          rd]nda             endb ejp        dc          rddnde             endf eoe&dg          rd]ndh             endb e!            rddndi             endj ejp        dk          rdlndm             endn eH            rdondp             endqeC             eE            Zq eFeq          Zr endrer            ddslsmtZtmuZu dtdudvdwdxdydwdz e             d{dyd|d/gd}d~Zv etjw        dtdevd eYd           dS )a  
Text-to-Speech Tool Module

Supports seven TTS providers:
- Edge TTS (default, free, no API key): Microsoft Edge neural voices
- ElevenLabs (premium): High-quality voices, needs ELEVENLABS_API_KEY
- OpenAI TTS: Good quality, needs OPENAI_API_KEY
- MiniMax TTS: High-quality with voice cloning, needs MINIMAX_API_KEY
- Mistral (Voxtral TTS): Multilingual, native Opus, needs MISTRAL_API_KEY
- Google Gemini TTS: Controllable, 30 prebuilt voices, needs GEMINI_API_KEY
- NeuTTS (local, free, no API key): On-device TTS via neutts_cli, needs neutts installed

Output formats:
- Opus (.ogg) for Telegram voice bubbles (requires ffmpeg for Edge TTS)
- MP3 (.mp3) for everything else (CLI, Discord, WhatsApp)

Configuration is loaded from ~/.hermes/config.yaml under the 'tts:' key.
The user chooses the provider and voice; the model just sends text.

Usage:
    from tools.tts_tool import text_to_speech_tool, check_tts_requirements

    result = text_to_speech_tool(text="Hello world")
    N)Path)CallableDictAnyOptional)urljoin)display_hermes_home)resolve_managed_tool_gateway)managed_nous_tools_enabledprefers_gatewayresolve_openai_audio_api_key)hermes_xai_user_agentc                      ddl } | S )z?Lazy import edge_tts. Returns the module or raises ImportError.r   Nedge_ttsr   s    6/home/agentuser/.hermes/hermes-agent/tools/tts_tool.py_import_edge_ttsr   8   s    OOOO    c                      ddl m}  | S )zGLazy import ElevenLabs client. Returns the class or raises ImportError.r   
ElevenLabs)elevenlabs.clientr   r   s    r   _import_elevenlabsr   =   s    ,,,,,,r   c                      ddl m}  | S )zCLazy import OpenAI client. Returns the class or raises ImportError.r   )OpenAI)openair   )OpenAIClients    r   _import_openai_clientr   B   s    ------r   c                      ddl m}  | S )zDLazy import Mistral client. Returns the class or raises ImportError.r   Mistral)mistralai.clientr!   r    s    r   _import_mistral_clientr#   G   s    ((((((Nr   c                      ddl } | S )zJLazy import sounddevice. Returns the module or raises ImportError/OSError.r   N)sounddevice)sds    r   _import_sounddevicer'   L   s    Ir   edgezen-US-AriaNeuralpNInz6obpgDQGcFmaJgBeleven_multilingual_v2eleven_flash_v2_5zgpt-4o-mini-ttsalloyzhttps://api.openai.com/v1zspeech-2.8-hdEnglish_Graceful_Ladyz https://api.minimax.io/v1/t2a_v2zvoxtral-mini-tts-2603z$c69964a6-ab8b-4f8a-9465-ec0925096ec8eveen]    zhttps://api.x.ai/v1zgemini-2.5-flash-preview-ttsKorez0https://generativelanguage.googleapis.com/v1beta      returnc                  @    ddl m}  t           | dd                    S )Nr   get_hermes_dirzcache/audioaudio_cache)hermes_constantsr8   strr7   s    r   _get_default_output_dirr<   o   s.    //////~~m];;<<<r   i  c                  
   	 ddl m}   |             }|                    di           S # t          $ r t                              d           i cY S t          $ r)}t                              d|d           i cY d}~S d}~ww xY w)	z
    Load TTS configuration from ~/.hermes/config.yaml.

    Returns a dict with provider settings. Falls back to defaults
    for any missing fields.
    r   )load_configttsz9hermes_cli.config not available, using default TTS configzFailed to load TTS config: %sTexc_infoN)hermes_cli.configr>   getImportErrorloggerdebug	Exceptionwarning)r>   configes      r   _load_tts_configrK   z   s    	111111zz%$$$   PQQQ			   6DIII						s!   %( &B	BA=7B=B
tts_configc                     |                      d          pt                                                                          S )z%Get the configured TTS provider name.provider)rC   DEFAULT_PROVIDERlowerstrip)rL   s    r   _get_providerrR      s2    NN:&&:*:AACCIIKKKr   c                  .    t          j        d          duS )z+Check if ffmpeg is available on the system.ffmpegN)shutilwhich r   r   _has_ffmpegrX      s    <!!--r   mp3_pathc                    t                      sdS |                     dd          d         dz   }	 t          j        dd| dd	d
ddddd|dgdd          }|j        dk    rEt
                              d|j        |j                            dd          dd                    dS t          j
                            |          r%t          j
                            |          dk    r|S n# t          j        $ r t
                              d           Y nXt          $ r t
                              d           Y n3t          $ r'}t
                              d|d           Y d}~nd}~ww xY wdS )z
    Convert an MP3 file to OGG Opus format for Telegram voice bubbles.

    Args:
        mp3_path: Path to the input MP3 file.

    Returns:
        Path to the .ogg file, or None if conversion fails.
    N.r3   r   .oggrT   -i-acodeclibopus-ac1-b:a64k-vbroff-yT   capture_outputtimeoutz0ffmpeg conversion failed with return code %d: %sutf-8ignoreerrors   z)ffmpeg OGG conversion timed out after 30szffmpeg not found in PATHz ffmpeg OGG conversion failed: %sr@   )rX   rsplit
subprocessrun
returncoderE   rH   stderrdecodeospathexistsgetsizeTimeoutExpiredFileNotFoundErrorrG   )rY   ogg_pathresultrJ   s       r   _convert_to_opusr~      s    == tsA&&q)F2HMtXy)CxG
 
 

 !!NNM +V]-A-A'RZ-A-[-[\`]`\`-ac c c47>>(## 	(A(AA(E(EO$ D D DBCCCCC 3 3 3122222 M M M91tLLLLLLLLM4s+   A2C* %AC* *)E*#E*:	E*E%%E*textoutput_pathc           	        K   t                      }|                    di           }|                    dt                    }t          |                    d|                    dd                              }d|i}|dk    rt	          |dz
  dz            }|dd|d<    |j        | fi |}	|	                    |           d	{V  |S )
z
    Generate audio using Edge TTS.

    Args:
        text: Text to convert.
        output_path: Where to save the MP3 file.
        tts_config: TTS config dict.

    Returns:
        Path to the saved audio file.
    r(   voicespeed      ?d   z+d%rateN)r   rC   DEFAULT_EDGE_VOICEfloatroundCommunicatesave)
r   r   rL   	_edge_ttsedge_configr   r   kwargspctcommunicates
             r   _generate_edge_ttsr      s       !""I..,,KOOG%788E+//':>>'3+G+GHHIIEuF||US[C'((v')'7777K


;
'
''''''''r   c                    t          j        dd          }|st          d          |                    di           }|                    dt                    }|                    dt
                    }|                    d          rd}nd	}t                      } ||
          }	|	j        	                    | |||          }
t          |d          5 }|
D ]}|                    |           	 ddd           n# 1 swxY w Y   |S )z
    Generate audio using ElevenLabs.

    Args:
        text: Text to convert.
        output_path: Where to save the audio file.
        tts_config: TTS config dict.

    Returns:
        Path to the saved audio file.
    ELEVENLABS_API_KEY z=ELEVENLABS_API_KEY not set. Get one at https://elevenlabs.io/
elevenlabsvoice_idmodel_idr\   opus_48000_64mp3_44100_128api_keyr   r   r   output_formatwbN)rv   getenv
ValueErrorrC   DEFAULT_ELEVENLABS_VOICE_IDDEFAULT_ELEVENLABS_MODEL_IDendswithr   text_to_speechconvertopenwrite)r   r   rL   r   	el_configr   r   r   r   clientaudio_generatorfchunks                r   _generate_elevenlabsr      sd    i,b11G ZXYYY|R00I}}Z)DEEH}}Z)DEEH F## (''#%%JZ(((F+33#	 4  O 
k4	 	  A$ 	 	EGGENNNN	               s   C==DDc                    t                      \  }}|                    di           }|                    dt                    }|                    dt                    }|                    d|          }t	          |                    d|                    dd                              }|                    d          rd}	nd	}	t                      }
 |
||
          }	 t          ||| |	dt          t          j
                              i          }|dk    r!t          dt          d|                    |d<    |j        j        j        di |}|                    |           |t#          |dd          }t%          |          r |             S S # t#          |dd          }t%          |          r |             w w xY w)z
    Generate audio using OpenAI TTS.

    Args:
        text: Text to convert.
        output_path: Where to save the audio file.
        tts_config: TTS config dict.

    Returns:
        Path to the saved audio file.
    r   modelr   base_urlr   r   r\   opusmp3)r   r   zx-idempotency-key)r   r   inputresponse_formatextra_headersg      ?g      @closeNrW   )#_resolve_openai_audio_client_configrC   DEFAULT_OPENAI_MODELDEFAULT_OPENAI_VOICEr   r   r   dictr;   uuiduuid4maxminaudiospeechcreatestream_to_filegetattrcallable)r   r   rL   r   r   
oai_configr   r   r   r   r   r   create_kwargsresponser   s                  r   _generate_openai_ttsr     s    <==GX"--JNN7$899ENN7$899E~~j(33H*..*..#*F*FGGHHE F##   (**L\'H===F+.DJLL0A0AB
 
 
 C<<%(s3%?%?M'"-6<&->>>>,,,..E?? 	EGGGG	 ..E?? 	EGGGG	s   !BF -Gc                    ddl }t          j        dd                                          }|st	          d          |                    di           }t          |                    dt                                                              pt          }t          |                    dt                                                              pt          }t          |                    d	t                              }t          |                    d
t                              }	t          |                    d          pt          j        d          pt                                                                        d          }
|                    d          rdnd}| ||d}|dk    s|t          k    s|dk    r(|	t          k    rd|i}|r||d	<   |dk    r|	r|	|d
<   ||d<   |                    |
 dd| dt!                      d|d          }|                                 t%          |d          5 }|                    |j                   ddd           n# 1 swxY w Y   |S )z
    Generate audio using xAI TTS.

    xAI exposes a dedicated /v1/tts endpoint instead of the OpenAI audio.speech
    API shape, so this is implemented as a separate backend.
    r   NXAI_API_KEYr   z5XAI_API_KEY not set. Get one at https://console.x.ai/xair   languagesample_ratebit_rater   XAI_BASE_URL/.wavwavr   )r   r   r   codecr   z/ttsBearer application/json)AuthorizationContent-Typez
User-Agent<   )headersjsonrj   r   )requestsrv   r   rQ   r   rC   r;   DEFAULT_XAI_VOICE_IDDEFAULT_XAI_LANGUAGEintDEFAULT_XAI_SAMPLE_RATEDEFAULT_XAI_BIT_RATEDEFAULT_XAI_BASE_URLrstripr   postr   raise_for_statusr   r   content)r   r   rL   r   r   
xai_configr   r   r   r   r   r   payloadr   r   r   s                   r   _generate_xai_ttsr   ?  s    OOOir**0022G RPQQQr**J:>>*.BCCDDJJLLdPdH:>>*.BCCDDJJLLdPdHjnn]4KLLMMK:>>*.BCCDDHz"" 	 9^$$	   eggffSkk	  !))&11<EEuE G 	111UNNx+???)0%(8 	7+6M-(E>>h>(0M*%#0 }}0w00./11
 

   	 	H 	k4	 	  "A	 !!!" " " " " " " " " " " " " " " s   8II#&I#c           	         ddl }t          j        dd          }|st          d          |                    di           }|                    dt
                    }|                    dt                    }|                    d	|                    d	d
                    }|                    dd
          }	|                    dd          }
|                    dt                    }|                    d          rd}n|                    d          rd}nd}|| d|||	|
ddd|d
dd}dd| d}|	                    |||d          }|
                                 |                                }|                    di           }|                    dd           }|dk    r+|                    d!d"          }t          d#| d$|           |                    d%i                               d&d          }|st          d'          t                              |          }t          |d(          5 }|                    |           ddd           n# 1 swxY w Y   |S ))a  
    Generate audio using MiniMax TTS API.

    MiniMax returns hex-encoded audio data. Supports streaming (SSE) and
    non-streaming modes. This implementation uses non-streaming for simplicity.

    Args:
        text: Text to convert (max 10,000 characters).
        output_path: Where to save the audio file.
        tts_config: TTS config dict.

    Returns:
        Path to the saved audio file.
    r   NMINIMAX_API_KEYr   z@MINIMAX_API_KEY not set. Get one at https://platform.minimax.io/minimaxr   r   r   r3   volpitchr   r   r   .flacflacr   F)r   r   r   r   i }  r1   )r   bitrateformatchannel)r   r   streamvoice_settingaudio_settingr   r   )r   r   r   )r   r   rj   	base_respstatus_code
status_msgunknown errorzMiniMax TTS API error (code ): datar   z%MiniMax TTS returned empty audio datar   )r   rv   r   r   rC   DEFAULT_MINIMAX_MODELDEFAULT_MINIMAX_VOICE_IDDEFAULT_MINIMAX_BASE_URLr   r   r   r   RuntimeErrorbytesfromhexr   r   )r   r   rL   r   r   	mm_configr   r   r   r   r   r   audio_formatr   r   r   r}   r   r   r   	hex_audioaudio_bytesr   s                          r   _generate_minimax_ttsr    s    OOOi)2..G ][\\\y"--IMM'#899E}}Z)ABBHMM':>>'1#=#=>>E
--q
!
!CMM'1%%E}}Z)ABBH F## 			g	&	&   	
 
 !"	
 
 G& +,7,, G
 }}XGWb}QQH]]__F

;++I--r22Ka]]<AA
V+VV*VVWWW

62&&**7B77I DBCCC --	**K	k4	 	  A	               s   7II Ic                    t          j        dd          }|st          d          |                    di           }|                    dt                    }|                    d          pt
          }|                    d          rd}n2|                    d	          rd
}n|                    d          rd}nd}t                      }	  ||          5 }	|	j        j	        
                    || ||          }
t          j        |
j                  }ddd           n# 1 swxY w Y   n^# t          $ r  t          $ rG}t                              d|d           t#          dt%          |          j                   |d}~ww xY wt)          |d          5 }|                    |           ddd           n# 1 swxY w Y   |S )zGenerate audio using Mistral Voxtral TTS API.

    The API returns base64-encoded audio; this function decodes it
    and writes the raw bytes to *output_path*.
    Supports native Opus output for Telegram voice bubbles.
    MISTRAL_API_KEYr   z?MISTRAL_API_KEY not set. Get one at https://console.mistral.ai/mistralr   r   r\   r   r   r   r   r   r   r   )r   r   r   r   NzMistral TTS failed: %sTr@   zMistral TTS failed: r   )rv   r   r   rC   DEFAULT_MISTRAL_TTS_MODELDEFAULT_MISTRAL_TTS_VOICE_IDr   r#   r   r   completebase64	b64decode
audio_datarG   rE   errorr  type__name__r   r   )r   r   rL   r   	mi_configr   r   r   r!   r   r   r  rJ   r   s                 r   _generate_mistral_ttsr    s    i)2..G \Z[[[y"--IMM'#<==E}}Z((H,HHF##   			f	%	%  			g	&	&   $&&GMWW%%% 	@|*33! /	 4  H !*8+>??K	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@     M M M-q4@@@D$q''2BDDEE1LM 
k4	 	  A	               sO   D. =D"D. "D&&D. )D&*D. .F	AFF	F>>GG	pcm_bytesr   channelssample_widthc                 @   ddl }||z  |z  }||z  }t          |           }|                    dddd|||||dz  	  	        }|                    dd	|          }	d
t          |          z   t          |	          z   |z   }
|                    dd|
d          }||z   |	z   | z   S )a  Wrap raw signed-little-endian PCM with a standard WAV RIFF header.

    Gemini TTS returns audio/L16;codec=pcm;rate=24000 -- raw PCM samples with
    no container. We add a minimal WAV header so the file is playable and
    ffmpeg can re-encode it to MP3/Opus downstream.
    r   Nz
<4sIHHIIHHs   fmt    r3      z<4sIs   data   z<4sI4ss   RIFFs   WAVE)structlenpack)r  r   r  r  r  	byte_rateblock_align	data_size	fmt_chunkdata_chunk_header	riff_sizeriff_headers               r   _wrap_pcm_as_wavr(    s     MMMh&5I\)KII
	q
 
I FGY??C	NN"S):%;%;;iGI++hGDDK"%66BBr   c                  
   ddl }t          j        d          pt          j        d          pd                                }|st	          d          |                    di           }t          |                    dt                                                              pt          }t          |                    d	t                                                              pt          }t          |                    d
          pt          j        d          pt                                                    
                    d          }dd| igigdgddd|iiidd}	| d| d}
|                    |
d|iddi|	d          }|j        dk    r	 |                                                    di           }|                    d          p|j        dd         }n# t          $ r |j        dd         }Y nw xY wt!          d |j         d!|           	 |                                }|d"         d         d#         d         }t#          d$ |D             d          }|t!          d%          |                    d&          p|                    d'          pi }|                    d(d          }n2# t$          t&          t(          f$ r}t!          d)|           |d}~ww xY w|st!          d*          t+          j        |          }t/          |          }|                                                    d+          r?t5          |d,          5 }|                    |           ddd           n# 1 swxY w Y   |S t9          j        d+d-.          5 }|                    |           |j        }ddd           n# 1 swxY w Y   	 t?          j         d/          }|r|                                                    d0          r|d1|d2d3d4d5d6d7d8d9d:d;d|g}n	|d1|d:d;d|g}tC          j"        |d<d=>          }|j#        dk    r6|j$        %                    d?d@A          dd         }t!          dB|           n0tL          '                    dC|           t?          j(        ||           	 t          j)        |           n:# tT          $ r Y n.w xY w# 	 t          j)        |           w # tT          $ r Y w w xY wxY w|S )Da  Generate audio using Google Gemini TTS.

    Gemini's generateContent endpoint with responseModalities=["AUDIO"] returns
    raw 24kHz mono 16-bit PCM (L16) as base64. We wrap it with a WAV RIFF
    header to produce a playable file, then ffmpeg-convert to MP3 / Opus if
    the caller requested those formats (same pattern as NeuTTS).

    Args:
        text: Text to convert (prompt-style; supports inline direction like
              "Say cheerfully:" and audio tags like [whispers]).
        output_path: Where to save the audio file (.wav, .mp3, or .ogg).
        tts_config: TTS config dict.

    Returns:
        Path to the saved audio file.
    r   NGEMINI_API_KEYGOOGLE_API_KEYr   zIGEMINI_API_KEY not set. Get one at https://aistudio.google.com/app/apikeygeminir   r   r   GEMINI_BASE_URLr   partsr   AUDIOvoiceConfigprebuiltVoiceConfig	voiceName)responseModalitiesspeechConfig)contentsgenerationConfigz/models/z:generateContentkeyr   r   r   )paramsr   r   rj   ro   r  messagei,  zGemini TTS API error (HTTP r   
candidatesr   c              3   *   K   | ]}d |v sd|v 
|V  dS )
inlineDatainline_dataNrW   ).0ps     r   	<genexpr>z'_generate_gemini_tts.<locals>.<genexpr>n  s7      WW|q/@/@MUVDVDV1DVDVDVDVWWr   z+Gemini TTS response contained no audio datar<  r=  r   z#Gemini TTS response was malformed: z$Gemini TTS returned empty audio datar   r   FsuffixdeleterT   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   	-loglevelTrg   rh   rk   rl   rm   zffmpeg conversion failed: zEffmpeg not found; writing raw WAV to %s (extension may be misleading))+r   rv   r   rQ   r   rC   r;   DEFAULT_GEMINI_TTS_MODELDEFAULT_GEMINI_TTS_VOICEDEFAULT_GEMINI_TTS_BASE_URLr   r   r   r   r   rG   r  nextKeyError
IndexError	TypeErrorr  r  r(  rP   r   r   r   tempfileNamedTemporaryFilenamerU   rV   rq   rr   rs   rt   ru   rE   rH   copyfileremoveOSError)r   r   rL   r   r   gemini_configr   r   r   r   endpointr   errdetailr   r.  
audio_partinline	audio_b64rJ   r  	wav_bytesr   tmpwav_pathrT   cmdr}   rt   s                                r   _generate_gemini_ttsr]  *  s+   " OOOy)**Obi8H.I.IORVVXXG 
W
 
 	
 NN8R00M!!'+CDDEEKKMMiQiE!!'+CDDEEKKMMiQiE*%% 	'9&''	'&  eggffSkk	  /01#*))K+? 
 

 
G ;;E;;;H}}w!34   H s""	)--//%%gr22CWWY''>8=#+>FF 	) 	) 	)]4C4(FFF	)K(*>KK6KK
 
 	
	M}}\"1%i09WWeWWWY]^^
LMMM--T1N1NTRTJJvr**		j), M M MDDDEE1LM  CABBB ++I ++I ##F++ +t$$ 	GGI	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 
	$F5	A	A	A S		)8              h'' 	3   ""++F33 	XD(y%E65+w tXt[';W^CbIIIF A%%--gh-GGM"#H#H#HIII & NNW   OHk222	Ih 	 	 	D		Ih 	 	 	D	 s   AG   G<;G<BJ6 6K%K  K%M55M9<M9OOOC!S /S 
SSS;S+*S;+
S85S;7S88S;c                  f    	 ddl } | j                            d          duS # t          $ r Y dS w xY w)z=Check if the neutts engine is importable (installed locally).r   NneuttsF)importlib.utilutil	find_specrG   )	importlibs    r   _check_neutts_availablerd    sP    ~''11==   uus   " 
00c                  Z    t          t          t                    j        dz  dz            S )z9Return path to the bundled default voice reference audio.neutts_sampleszjo.wavr;   r   __file__parentrW   r   r   _default_neutts_ref_audiorj    $    tH~~$'77(BCCCr   c                  Z    t          t          t                    j        dz  dz            S )z>Return path to the bundled default voice reference transcript.rf  zjo.txtrg  rW   r   r   _default_neutts_ref_textrm    rk  r   c                    ddl }|                    di           }|                    dd          pt                      }|                    dd          pt                      }|                    dd          }|                    d	d
          }|}	|                    d          s|                    dd          d         dz   }	t          t          t                    j	        dz            }
|j
        |
d| d|	d|d|d|d|g}t          j        |ddd          }|j        dk    rk|j                                        }d |                                D             }t#          dt%          d                              |          pd           |	|k    r`t)          j        d          }|r5|d|	ddd |g}t          j        |dd!"           t-          j        |	           nt-          j        |	|           |S )#a  Generate speech using the local NeuTTS engine.

    Runs synthesis in a subprocess via tools/neutts_synth.py to keep the
    ~500MB model in a separate process that exits after synthesis.
    Outputs WAV; the caller handles conversion for Telegram if needed.
    r   Nr_  	ref_audior   ref_textr   zneuphonic/neutts-air-q4-ggufdevicecpur   r[   r3   zneutts_synth.pyz--textz--outz--ref-audioz
--ref-textz--modelz--deviceTx   )ri   r   rj   c                 <    g | ]}|                     d           |S )zOK:)
startswith)r>  ls     r   
<listcomp>z$_generate_neutts.<locals>.<listcomp>  s)    QQQQQ\\%=P=PQqQQQr   zNeuTTS synthesis failed: 
   r   rT   r]   rf   rD  r  rg   )checkrj   )sysrC   rj  rm  r   rp   r;   r   rh  ri  
executablerq   rr   rs   rt   rQ   
splitlinesr  chrjoinrU   rV   rv   rP  rename)r   r   rL   rz  neutts_configro  rp  r   rq  r[  synth_scriptr\  r}   rt   error_linesrT   conv_cmds                    r   _generate_neuttsr    s    JJJNN8R00M!!+r22Q6O6Q6QI  R00N4L4N4NHg'EFFEx//F H'' :%%c1--a069tH~~,/@@AAL$yh5FC ^C4MMMFA$$&&QQ&"3"3"5"5QQQes2ww||K7P7P7cTceefff ;h'' 	-hk7KXHN84<<<<Ih Ih,,,r   c                      r                                  st          dd          S t                     t          k    r=t                              dt                     t                      dt                    t                      t                    }ddlm	}  |dd	          
                                }|d
k    }|r"t          |                                          }nut          j                                                            d          }t          t                     }|                    dd           |r|dv r
|d| dz  }n	|d| dz  }|j                            dd           t'          |          	 |dk    rf	 t)                       n)# t*          $ r t-          j        dddd          cY S w xY wt                              d           t3                      n|dk    rf	 t5                       n)# t*          $ r t-          j        dddd          cY S w xY wt                              d           t7                      n|dk    r-t                              d           t9                      n[|dk    r-t                              d           t;                      n(|dk    rf	 t=                       n)# t*          $ r t-          j        dddd          cY S w xY wt                              d            t?                      n|d!k    r-t                              d"           tA                      n|d#k    rTtC                      st-          j        dd$dd          S t                              d%           tE                      n/d}		 tG                       n# t*          $ r d}	Y nw xY w|	rt                              d&           	 ddl$}
|
j%        &                    d'(          5 }|'                     fd)          (                    d*+           ddd           n# 1 swxY w Y   n# tR          $ r& tU          j+        tY                                Y nYw xY wtC                      r.t                              d,           d#}tE                      nt-          j        dd-dd          S tZ          j.        /                              r#tZ          j.        0                              dk    rt-          j        dd.| d/dd          S d}|d0v r+1                    d          ste                    }|r|d}n|d1v r1                    d          }tZ          j.        0                              }t                              d2|d3|           d4 }|rd5| }t-          j        d|||d6d          S # tf          $ r>}d7| d8| }t          4                    d9|           t          |d          cY d}~S d}~wtj          $ r@}d:| d8| }t          4                    d9|d;           t          |d          cY d}~S d}~wtl          $ r@}d<| d8| }t          4                    d9|d;           t          |d          cY d}~S d}~ww xY w)=ac  
    Convert text to speech audio.

    Reads provider/voice config from ~/.hermes/config.yaml (tts: section).
    The model sends text; the user configures voice and provider.

    On messaging platforms, the returned MEDIA:<path> tag is intercepted
    by the send pipeline and delivered as a native voice message.
    In CLI mode, the file is saved to ~/voice-memos/.

    Args:
        text: The text to convert to speech.
        output_path: Optional custom save path. Defaults to ~/voice-memos/<timestamp>.mp3

    Returns:
        str: JSON result with success, file_path, and optionally MEDIA tag.
    zText is requiredF)successz.TTS text too long (%d chars), truncating to %dNr   )get_session_envHERMES_SESSION_PLATFORMr   telegramz%Y%m%d_%H%M%ST)parentsexist_ok)r   r   r  r,  tts_r\   z.mp3r   z`ElevenLabs provider selected but 'elevenlabs' package not installed. Run: pip install elevenlabs)r  r  )ensure_asciiz$Generating speech with ElevenLabs...r   z<OpenAI provider selected but 'openai' package not installed.z$Generating speech with OpenAI TTS...r   z%Generating speech with MiniMax TTS...r   z!Generating speech with xAI TTS...r  ziMistral provider selected but 'mistralai' package not installed. Run: pip install 'hermes-agent[mistral]'z-Generating speech with Mistral Voxtral TTS...r,  z+Generating speech with Google Gemini TTS...r_  zNeuTTS provider selected but neutts is not installed. Run hermes setup and choose NeuTTS, or install espeak-ng and run python -m pip install -U neutts[all].z(Generating speech with NeuTTS (local)...z"Generating speech with Edge TTS...r3   )max_workersc                  J    t          j        t                               S N)asynciorr   r   )file_strr   rL   s   r   <lambda>z%text_to_speech_tool.<locals>.<lambda>~  s    GK0B4S]0^0^$_$_ r   r   rj   z9Edge TTS not available, falling back to NeuTTS (local)...zhNo TTS provider available. Install edge-tts (pip install edge-tts) or set up NeuTTS for local synthesis.z-TTS generation produced no output (provider: ))r(   r_  r   r   )r   r   r  r,  z,TTS audio saved: %s (%s bytes, provider: %s),zMEDIA:z[[audio_as_voice]]
)r  	file_path	media_tagrN   voice_compatiblezTTS configuration error (r   z%szTTS dependency missing (r@   zTTS generation failed ()7rQ   
tool_errorr  MAX_TEXT_LENGTHrE   rH   rK   rR   gateway.session_contextr  rP   r   
expanduserdatetimenowstrftimeDEFAULT_OUTPUT_DIRmkdirri  r;   r   rD   r   dumpsinfor   r   r   r  r   r#   r  r]  rd  r  r   concurrent.futuresfuturesThreadPoolExecutorsubmitr}   r  r  rr   r   rv   rw   rx   ry   r   r~   r   r  r{   rG   )r   r   rN   r  platform	want_opusr  	timestampout_diredge_available
concurrentpoolr  	opus_path	file_sizer  rJ   	error_msgr  rL   s   `                 @@r   text_to_speech_toolr    s   *  =tzz|| =,e<<<< 4yy?""GTTcddd$_$%!##JZ((H 8777778"==CCEEHZ'I  9%%0022		%))++44_EE	)**dT222  	9%RRR"8"8"8"88II"8"8"8"88I 4$7779~~HG4|##'"$$$$ ' ' 'z$# # !&' ' ' ' ' ''
 KK>??? x<<<<!!'%'''' ' ' 'z$[# # !&' ' ' ' ' ''
 KK>??? x<<<<""KK?@@@!$*====KK;<<<dHj9999""'&(((( ' ' 'z$H# # !&	' ' ' ' ' '' KKGHHH!$*====!!KKEFFF x<<<<!!*,, 'z$F# # !&	' ' ' '
 KKBCCCT8Z8888 "N' """" ' ' '!&'  '@AAAP----#+>>1>MM -QU______  &&,,,- - - - - - - - - - - - - - - $ P P PK 24: N NOOOOOP(** 	'WXXX# x<<<<z$E# # !&	' ' ' ' w~~h'' 	#27??8+D+D+I+I: TTTT  "# # # # !;;;HDUDUV\D]D];(22I ($#' FFF'0088GOOH--	BHQZN^N^`hiii (X''	 	;:y::Iz!"  0
 
    	  4 4 4@@@Q@@	T9%%%)U333333333 4 4 4?x??A??	T9t444)U333333333 4 4 4>h>>1>>	T9t444)U333333333	4s1  X F" !X "#GX G6X ?H X #H41X 3H44BX K  X  #LX LBX /X O X O$!X #O$$X Q* #/QQ* Q""Q* %Q"&Q* )X *-RX RAX 2AX B/X 
[3Y>[[5Z[[5[[[c                  L   	 t                       dS # t          $ r Y nw xY w	 t                       t          j        d          rdS n# t          $ r Y nw xY w	 t                       t                      rdS n# t          $ r Y nw xY wt          j        d          rdS t          j        d          rdS t          j        d          st          j        d          rdS 	 t                       t          j        d          rdS n# t          $ r Y nw xY wt                      rdS dS )	z
    Check if at least one TTS provider is available.

    Edge TTS needs no API key and is the default, so if the package
    is installed, TTS is available.

    Returns:
        bool: True if at least one provider can work.
    Tr   r   r   r*  r+  r
  F)	r   rD   r   rv   r   r   _has_openai_audio_backendr#   rd  rW   r   r   check_tts_requirementsr    s   t   9)** 	4	   $&& 	4	   	y"## t	y t	y!"" bi0@&A&A t   9&'' 	4	      t5sA    
"A 
AAA8 8
BB"D 
DDc                      t                      } | rt          d          s	| t          fS t          d          }|$d}t	                      r|dz  }t          |          |j        t          |j        	                    d           dd          fS )zReturn direct OpenAI audio config or a managed gateway fallback.

    When ``tts.use_gateway`` is set in config, the Tool Gateway is preferred
    even if direct OpenAI credentials are present.
    r?   openai-audioNz8Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is setz5, and the managed OpenAI audio gateway is unavailabler   v1)
r   r   DEFAULT_OPENAI_BASE_URLr
   r   r   nous_user_tokenr   gateway_originr   )direct_api_keymanaged_gatewayr9  s      r   r   r     s     233N 7oe44 76662>BBOL%'' 	ONNG!!!*G)0055888$- -  r   c                  V    t          t                      pt          d                    S )zPReturn True when OpenAI audio can use direct credentials or the managed gateway.r  )boolr   r
   rW   r   r   r  r    s%    ,..^2N~2^2^___r   z(?<=[.!?])(?:\s|\n)|(?:\n\n)z```[\s\S]*?```z\[([^\]]+)\]\([^)]+\)zhttps?://\S+z\*\*(.+?)\*\*z	\*(.+?)\*z`(.+?)`z^#+\s*flagsz^\s*[-*]\s+z---+z\n{3,}c                 F   t                               d|           } t                              d|           } t                              d|           } t                              d|           } t
                              d|           } t                              d|           } t                              d|           } t                              d|           } t                              d|           } t                              d|           } |                                 S )z:Remove markdown formatting that shouldn't be spoken aloud. z\1r   z

)_MD_CODE_BLOCKsub_MD_LINK_MD_URL_MD_BOLD
_MD_ITALIC_MD_INLINE_CODE
_MD_HEADER_MD_LIST_ITEM_MD_HR_MD_EXCESS_NLrQ   )r   s    r   _strip_markdown_for_ttsr    s    c4((D<<t$$D;;r4  D<<t$$D>>%&&Dud++D>>"d##DR&&D::b$DVT**D::<<r   
text_queue
stop_eventtts_done_eventdisplay_callbackc           	         |                                  	 ddt          t          t                      }|                    di           }|                    d          |                    d|                    d                    t          j        dd          }|st                              d           n	 t                      } ||	          n*# t          $ r t                              d
           Y nw xY w	 t                      }|                    ddd                                           nj# t          t          f$ r'}	t                              d|	           dY d}	~	n7d}	~	wt           $ r'}	t                              d|	           dY d}	~	nd}	~	ww xY wd}
d}d}d}g t#          j        dt"          j                  }dt(          ffd}d                                 s;	 |                     |          }n5# t,          j        $ r# t1          |
          |k    r ||
           d}
Y ^w xY w|6|                    d|
          }
|
                                r ||
           n|
|z  }
|                    d|
          }
d|
v rd|
vr	 t6                              |
          }|n_|                                }|
d|         }|
|d         }
t1          |                                          |k     r||
z   }
n ||           |                                ;	 	 |                                  n# t,          j        $ r Y nw xY w,n2# t           $ r%}	t                              d|	           Y d}	~	nd}	~	ww xY w:	                                                                    n# t           $ r Y nw xY w|!                                 dS # :	                                                                    n# t           $ r Y nw xY w|!                                 w xY w)a  Consume text deltas from *text_queue*, buffer them into sentences,
    and stream each sentence through ElevenLabs TTS to the speaker in
    real-time.

    Protocol:
        * The producer puts ``str`` deltas onto *text_queue*.
        * A ``None`` sentinel signals end-of-text (flush remaining buffer).
        * *stop_event* can be set to abort early (e.g. user interrupt).
        * *tts_done_event* is **set** in the ``finally`` block so callers
          waiting on it (continuous voice mode) know playback is finished.
    Nr   r   streaming_model_idr   r   r   z8ELEVENLABS_API_KEY not set; streaming TTS audio disabledr   z8elevenlabs package not installed; streaming TTS disabledr0   r3   int16)
samplerater  dtypezsounddevice not available: %sz#sounddevice OutputStream failed: %s   r   g      ?z<think[\s>].*?</think>r  sentencec                 \                                    rdS t          |                                           }|sdS |                                                    d          }
D ]0}|                                                    d          |k    r dS 1
                    |            |            dS t          |          t          k    r|dt                   }	 j        	                    |d          }h|D ]a}                                 r nLddl
}|                    ||j                  }                    |                    dd                     bdS dS  	|           dS # t          $ r&}t                               d	|           Y d}~dS d}~ww xY w)
z6Display sentence and optionally generate + play audio.Nz.!,	pcm_24000r   r   )r  r   r3   z!Streaming TTS sentence failed: %s)is_setr  rQ   rP   r   appendr  r  r   r   numpy
frombufferr  r   reshaperG   rE   rH   )r  cleanedcleaned_lowerprev
audio_iterr   _npaudio_arrayexc_play_via_tempfile_spoken_sentencesr   r  r   output_streamr  r   s            r   _speak_sentencez.stream_tts_to_speaker.<locals>._speak_sentenceo  s     "" -h77==??G #MMOO22599M)  ::<<&&u-->>FF ?$$W---+  ***~7||o--!"2?"23I#2:: %%"-	 ;  
 !,!+ H H%,,.. "!E++++&)nnU#)n&L&L%++K,?,?A,F,FGGGGH H!E '&z:>>>>> I I IBCHHHHHHHHHIs   %BE; -E; ;
F+F&&F+c                    d}	 ddl }t          j        dd          }|j        }|                    |d          5 }|                    d           |                    d           |                    d	           | D ]-}|                                r n|	                    |           .ddd           n# 1 swxY w Y   dd
l
m}  ||           n2# t          $ r%}t                              d|           Y d}~nd}~ww xY w|r(	 t          j        |           dS # t"          $ r Y dS w xY wdS # |r&	 t          j        |           w # t"          $ r Y w w xY ww xY w)z0Write PCM chunks to a temp WAV file and play it.Nr   r   FrA  r   r3   r4   r0   )play_audio_filez!Temp-file TTS fallback failed: %s)waverL  rM  rN  r   setnchannelssetsampwidthsetframerater  writeframestools.voice_moder  rG   rE   rH   rv   unlinkrQ  )	r  stop_evttmp_pathr  rZ  wfr   r  r  s	            r   r  z1stream_tts_to_speaker.<locals>._play_via_tempfile  s   H1NNN8YYsD)) .ROOA&&&OOA&&&OOE***!+ . .#??,, "!Eu----. . . . . . . . . . . . . . . =<<<<<)))) I I IBCHHHHHHHHI  	(+++++"    8 	(++++"   s   7C A0B7+C 7B;;C >B;?C D2 
DC>9D2 >DD2 	D 
D-,D-2E6E
E
EEEEr  z<thinkz</think>Tz Streaming TTS pipeline error: %s)"clearr   %DEFAULT_ELEVENLABS_STREAMING_MODEL_IDrK   rC   rv   r   rE   rH   r   rD   r'   OutputStreamstartrQ  rF   rG   recompileDOTALLr;   r  queueEmptyr  r  rQ   _SENTENCE_BOUNDARY_REsearchend
get_nowaitstopr   set)r  r  r  r  rL   r   r   r   r&   r  sentence_bufmin_sentence_lenlong_flush_lenqueue_timeout_think_block_rer  deltamend_posr  r  r  r   r   r  r   s    ` `                @@@@@@r   stream_tts_to_speakerr  ,  s   " s.8%''
NN<44	==X66==!5!*z8!D!DF F )0"55 	)NNUVVVV[/11
#G444 [ [ [YZZZZZ[ !),..B$&OO#(1G %4 % %M "''))))#W- ) ) )LL!@#FFF$(MMMMMM  ) ) )NN#H#NNN$(MMMMMM) ')*%>biPPP(	Ic (	I (	I (	I (	I (	I (	I (	I (	I (	I (	I (	I (	I (	IT	 	 	4 ##%% *	*"}==;   |$$~55#OL111#%L }.222|DD%%'' 2#OL111E!L
 +..r<@@L <''Jl,J,J*)00>>9%%'''1+GHH5x~~''((+;;;#+l#:L)))*? ##%% *	*Z	%%'''';   	   @ @ @93????????@ $""$$$##%%%%    $""$$$##%%%%   s  B(M3 C" !M3 "$D	M3 D		M3 :E
 	M3 
F1E=8M3 =F1
F,'M3 ,F11AM3 H" !M3 "/IM3 IC2M3 M M3 M.+M3 -M..M3 2O7 3
N"=NO7 N""O7 ((O 
OO7Q	;(P$#Q	$
P1.Q	0P11Q	__main__u   🔊 Text-to-Speech Tool Modulez2==================================================c                 >    	  |              dS # t           $ r Y dS w xY w)NTF)rD   )importerlabels     r   _checkr    s9    	HJJJ4 	 	 	55	s   
 
z
Provider availability:z  Edge TTS:   	installedz$not installed (pip install edge-tts)z  ElevenLabs: elz&not installed (pip install elevenlabs)z    API Key:  r   r  znot setz  OpenAI:     oaiznot installedz2not set (VOICE_TOOLS_OPENAI_KEY or OPENAI_API_KEY)z  MiniMax:    r   zAPI key setznot set (MINIMAX_API_KEY)z  ffmpeg:     u	   ✅ foundu(   ❌ not found (needed for Telegram Opus)z
  Output dir: z  Configured provider: )registryr  r   a  Convert text to speech audio. Returns a MEDIA: path that the platform delivers as a voice message. On Telegram it plays as a voice bubble, on Discord/WhatsApp as an audio attachment. In CLI mode, saves to ~/voice-memos/. Voice and provider are user-configured, not model-selected.objectstringz:The text to convert to speech. Keep under 4000 characters.)r  descriptionz9Optional custom file path to save the audio. Defaults to z/audio_cache/<timestamp>.mp3r   r   )r  
propertiesrequired)rN  r  
parametersr?   c                 r    t          |                     dd          |                     d                    S )Nr   r   r   r  )r  rC   )argskws     r   r  r  1  s6    2XXfb!!HH]++ -  -  - r   u   🔊)rN  toolsetschemahandlercheck_fnemojir  )x__doc__r  r  r  r   loggingrv   r  r  rU   rq   rL  	threadingr   pathlibr   typingr   r   r   r   urllib.parser   r:   r	   	getLoggerr  rE   tools.managed_tool_gatewayr
   tools.tool_backend_helpersr   r   r   tools.xai_httpr   r   r   r   r#   r'   rO   r   r   r   r  r   r   r  r   r   r   r  r  r   r   r   r   r   rE  rF  rG  GEMINI_TTS_SAMPLE_RATEGEMINI_TTS_CHANNELSGEMINI_TTS_SAMPLE_WIDTHr;   r<   r  r  rK   rR   r  rX   r~   r   r   r   r   r  r  r  r   r(  r]  rd  rj  rm  r  r  r  tupler   r  r  r  r  r  r  r  r  r  	MULTILINEr  r  r  r  r  QueueEventr  printr  r   rI   rN   tools.registryr  r  
TTS_SCHEMAregisterrW   r   r   <module>r?     s9
   2      				  				                  0 0 0 0 0 0 0 0 0 0 0 0             0 0 0 0 0 0		8	$	$ C C C C C C p p p p p p p p p p 0 0 0 0 0 0  
  
  
  
    ' 4 6 (; %(  5 ' 2 = 3 E     , 9 ! P    = = = = = -,.. $sCx.    &Ld38n L L L L L.T . . . .
 s  x}        L3 S d3PS8n Y\    <(s ( ($sCx. (UX ( ( ( (\-s - -$sCx. -UX - - - -f;C ;c ;tCH~ ;RU ; ; ; ;BQ Q# Q4S> QVY Q Q Q Qn+ +# +4S> +VY + + + +f .'/	C CCC C 	C
 C C C CD@s @ @$sCx. @UX @ @ @ @N    D3 D D D D
D# D D D D
23 2S 2d38n 2QT 2 2 2 2t "&A4 A4
A4#A4 	A4 A4 A4 A4N) ) ) ) )XU38_    ,`4 ` ` ` ` #
#BCC  -..2:.//
"*_
%
%2:&''RZ%%
"*Z((RZ	666

>>>>	G		
9%%# #    & 9=	F FFF OF xt45	F F F FX z	E
+,,,	E(OOO   
E
$%%%	E
x&&1A6*J*Jv;;Pv
x
xyyy	E
z&&1CT*J*Jx;;Px
z
z{{{	E
TIBI.B$C$CR55
T
TUUU	E
e&&1F*N*Nc;;Tc
e
efff	E	o0022l558l	o 	o   
E
kIBI6G,H,Hi==Ni
k
klll	E
i++--g;;=g
i
ijjj	E
1/
1
1222F}V$$H	E
.H
.
./// 0 / / / / / / /  n ![ 
 !  O[n[n[p[p   O   O   O 	
 	
 H  
&  	- - $
	 	 	 	 	 	r   