Pendrokar commited on
Commit
e08a9be
ยท
verified ยท
1 Parent(s): 57b5f27

MCP attempt

Browse files
Files changed (1) hide show
  1. gr_client.py +19 -0
gr_client.py CHANGED
@@ -540,6 +540,25 @@ class BlocksDemo:
540
  surprise,
541
  deepmoji_checked
542
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
543
  wav_path, arpabet_html, angry, happy, sad, surprise, response = client.predict(
544
  input_text, # str in 'Input Text' Textbox component
545
  voice, # Literal['ccby_nvidia_hifi_6670_M', 'ccby_nv_hifi_11614_F', 'ccby_nvidia_hifi_11697_F', 'ccby_nvidia_hifi_12787_F', 'ccby_nvidia_hifi_6097_M', 'ccby_nvidia_hifi_6671_M', 'ccby_nvidia_hifi_8051_F', 'ccby_nvidia_hifi_9017_M', 'ccby_nvidia_hifi_9136_F', 'ccby_nvidia_hifi_92_F'] in 'Voice' Radio component
 
540
  surprise,
541
  deepmoji_checked
542
  ):
543
+ """
544
+ Convert the text to speech using xVASynth xVAPitch models.
545
+
546
+ Args:
547
+ input_text: string; from which to create the audio
548
+ voice: Literal['ccby_nvidia_hifi_6670_M', 'ccby_nv_hifi_11614_F', 'ccby_nvidia_hifi_11697_F', 'ccby_nvidia_hifi_12787_F', 'ccby_nvidia_hifi_6097_M', 'ccby_nvidia_hifi_6671_M', 'ccby_nvidia_hifi_8051_F', 'ccby_nvidia_hifi_9017_M', 'ccby_nvidia_hifi_9136_F', 'ccby_nvidia_hifi_92_F']; the only viable Voice model filenames
549
+ lang: Literal['en', 'de', 'es', 'it', 'fr', 'ru', 'tr', 'la', 'ro', 'da', 'vi', 'ha', 'nl', 'zh', 'ar', 'uk', 'hi', 'ko', 'pl', 'sw', 'fi', 'hu', 'pt', 'yo', 'sv', 'el', 'wo', 'jp']; the language of input_text
550
+ pacing: float (numeric value between 0.5 and 2.0); Duration
551
+ pitch: float (numeric value between 0 and 1.0); Pitch
552
+ energy: float (numeric value between 0.1 and 1.0); Energy
553
+ anger: float (numeric value between 0 and 1.0); ๐Ÿ˜  Anger
554
+ happy: float (numeric value between 0 and 1.0); ๐Ÿ˜ƒ Happiness
555
+ sad: float (numeric value between 0 and 1.0); ๐Ÿ˜ญ Sadness
556
+ surprise: float (numeric value between 0 and 1.0); ๐Ÿ˜ฎ Surprise
557
+ deepmoji_checked: bool; use DeepMoji to parse English text and fill the emotional values
558
+
559
+ Returns:
560
+ Tuple of (output_audio_path, arpabet_html, final_anger_ratio, final_happiness_ratio, final_sadness_ratio, final_surprise_ratio, response) where output_audio_path is the filepath of output audio
561
+ """
562
  wav_path, arpabet_html, angry, happy, sad, surprise, response = client.predict(
563
  input_text, # str in 'Input Text' Textbox component
564
  voice, # Literal['ccby_nvidia_hifi_6670_M', 'ccby_nv_hifi_11614_F', 'ccby_nvidia_hifi_11697_F', 'ccby_nvidia_hifi_12787_F', 'ccby_nvidia_hifi_6097_M', 'ccby_nvidia_hifi_6671_M', 'ccby_nvidia_hifi_8051_F', 'ccby_nvidia_hifi_9017_M', 'ccby_nvidia_hifi_9136_F', 'ccby_nvidia_hifi_92_F'] in 'Voice' Radio component