Kuberwastaken commited on
Commit
f599acb
·
1 Parent(s): 5dfc9ca

made share true

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -121,11 +121,12 @@ def parler_preprocess(text):
121
  return text
122
 
123
  def text_to_speech(text):
124
- # Asian mom nagging style description
125
  description = ("Elisabeth speaks in a mature, strict, nagging, and slightly disappointed tone, "
126
  "with a hint of love and high expectations, at a moderate pace with high quality audio. "
127
  "She sounds like a stereotypical Asian mother who compares you to your cousins, "
128
  "questions your life choices, and threatens you with a slipper, but ultimately wants the best for you.")
 
 
129
  inputs = parler_tokenizer(description, return_tensors="pt").to(parler_device)
130
  prompt = parler_tokenizer(parler_preprocess(text), return_tensors="pt").to(parler_device)
131
  set_seed(PARLER_SEED)
@@ -136,7 +137,10 @@ def text_to_speech(text):
136
  def process_frame(image, vision_components, llm_components):
137
  caption = analyze_image(image, vision_components)
138
  roast = generate_roast(caption, llm_components)
139
- audio = text_to_speech(roast)
 
 
 
140
  return caption, roast, audio
141
 
142
  def setup_processing_chain(video_feed, analysis_output, roast_output, audio_output):
@@ -185,4 +189,4 @@ if __name__ == "__main__":
185
  os.system('python -m unidic download')
186
  nltk.download('averaged_perceptron_tagger_eng')
187
  app = create_app()
188
- app.launch()
 
121
  return text
122
 
123
  def text_to_speech(text):
 
124
  description = ("Elisabeth speaks in a mature, strict, nagging, and slightly disappointed tone, "
125
  "with a hint of love and high expectations, at a moderate pace with high quality audio. "
126
  "She sounds like a stereotypical Asian mother who compares you to your cousins, "
127
  "questions your life choices, and threatens you with a slipper, but ultimately wants the best for you.")
128
+ if not text or not isinstance(text, str):
129
+ return (PARLER_SAMPLE_RATE, np.zeros(1))
130
  inputs = parler_tokenizer(description, return_tensors="pt").to(parler_device)
131
  prompt = parler_tokenizer(parler_preprocess(text), return_tensors="pt").to(parler_device)
132
  set_seed(PARLER_SEED)
 
137
  def process_frame(image, vision_components, llm_components):
138
  caption = analyze_image(image, vision_components)
139
  roast = generate_roast(caption, llm_components)
140
+ if not roast or not isinstance(roast, str):
141
+ audio = (PARLER_SAMPLE_RATE, np.zeros(1))
142
+ else:
143
+ audio = text_to_speech(roast)
144
  return caption, roast, audio
145
 
146
  def setup_processing_chain(video_feed, analysis_output, roast_output, audio_output):
 
189
  os.system('python -m unidic download')
190
  nltk.download('averaged_perceptron_tagger_eng')
191
  app = create_app()
192
+ app.launch(share=True)