Hasti11 commited on
Commit
838123a
·
verified ·
1 Parent(s): 8c1c639

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -27
app.py CHANGED
@@ -9,7 +9,6 @@ import moviepy.editor as mp
9
  from gtts import gTTS
10
  from groq import Groq
11
 
12
-
13
  client = Groq(
14
  api_key="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",
15
  )
@@ -20,8 +19,9 @@ audio_pipeline = pipeline("audio-classification", model="audeering/wav2vec2-larg
20
  text_pipeline = pipeline("text-classification", model="SamLowe/roberta-base-go_emotions", top_k=2)
21
 
22
  conversation_history = []
 
23
 
24
- def process_input(video_stream):
25
  if isinstance(video_stream, str):
26
  video_file_path = video_stream
27
 
@@ -117,16 +117,28 @@ def process_input(video_stream):
117
 
118
  except Exception as e:
119
  print(f"Error processing text data: {e}")
120
-
121
- print("Text emotions:", text_emotions)
122
-
123
- conversation_history.append({
124
- "user_input": text_input,
125
- "image_features": image_features,
126
- "audio_emotion": audio_emotion,
127
- "text_emotions": text_emotions
128
- })
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  prompt = "User said: " + text_input
131
  if image_features:
132
  prompt += "\nImage features: " + ', '.join(image_features)
@@ -135,9 +147,9 @@ def process_input(video_stream):
135
  if text_emotions:
136
  prompt += "\nText emotions: " + text_emotions
137
 
138
- print("image_feature",image_features)
139
- print("Audio",audio_emotion)
140
- print("text emotions",text_emotions)
141
 
142
  chat_completion = client.chat.completions.create(
143
  messages=[
@@ -145,7 +157,10 @@ def process_input(video_stream):
145
  "content": "As a mental health therapist, you're speaking to a user who is seeking guidance and support. They may be experiencing various challenges and are looking for solutions to improve their mental well-being. Your responses should be empathetic, supportive, and offer practical advice tailored to the user's specific issues. Remember to maintain a positive and non-judgmental tone throughout the interaction."
146
  },
147
  {"role": "user",
148
- "content": prompt
 
 
 
149
  }
150
  ],
151
  model="llama3-70b-8192",
@@ -165,26 +180,46 @@ def process_input(video_stream):
165
  audio_file_path = "/tmp/ai_response.wav"
166
  tts.save(audio_file_path)
167
 
168
- return ai_response,audio_file_path,display_history() # Return the generated response
169
 
170
- def display_history():
171
  history_str = ""
172
  for i, turn in enumerate(conversation_history):
173
- # if "user_input" in turn:
174
- # history_str += f"User: {turn['user_input']}\n"
175
  if "ai_response" in turn:
176
- history_str += f"{turn['ai_response']}\n\n"
177
- return history_str
 
178
 
179
  # Create the Gradio interface
180
- input_video = gr.Video(sources="webcam",label="Your Video", include_audio=True)
181
  output_text = gr.Textbox(label="Therapist Response")
182
- output_audio=gr.Audio(autoplay=True,visible=False)
183
- history_text = gr.Textbox(display_history(), label="Conversation History", placeholder="")
184
 
185
 
186
-
187
- iface = gr.Interface(fn=process_input, inputs=input_video, outputs=[output_text,output_audio,history_text], title="Mental Health Therapist", description="Speak to the AI through video input and get responses.",theme=gr.themes.Default(primary_hue="teal", secondary_hue="cyan"),allow_flagging="auto")
188
- iface.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
 
 
 
9
  from gtts import gTTS
10
  from groq import Groq
11
 
 
12
  client = Groq(
13
  api_key="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",
14
  )
 
19
  text_pipeline = pipeline("text-classification", model="SamLowe/roberta-base-go_emotions", top_k=2)
20
 
21
  conversation_history = []
22
+ max_history_length = 3
23
 
24
+ def process_input(video_stream, conversation_history):
25
  if isinstance(video_stream, str):
26
  video_file_path = video_stream
27
 
 
117
 
118
  except Exception as e:
119
  print(f"Error processing text data: {e}")
120
+
121
+
 
 
 
 
 
 
 
122
 
123
+
124
+
125
+
126
+ if conversation_history is not None:
127
+ conversation_history = conversation_history[-max_history_length:] # Keep most recent entries
128
+ conversation_history.append({
129
+ "user_input": text_input,
130
+ "image_features": image_features,
131
+ "audio_emotion": audio_emotion,
132
+ "text_emotions": text_emotions
133
+ })
134
+ else:
135
+ conversation_history = [{
136
+ "user_input": text_input,
137
+ "image_features": image_features,
138
+ "audio_emotion": audio_emotion,
139
+ "text_emotions": text_emotions
140
+ }]
141
+
142
  prompt = "User said: " + text_input
143
  if image_features:
144
  prompt += "\nImage features: " + ', '.join(image_features)
 
147
  if text_emotions:
148
  prompt += "\nText emotions: " + text_emotions
149
 
150
+
151
+ # Get conversation history text
152
+ history_text = display_history(conversation_history)
153
 
154
  chat_completion = client.chat.completions.create(
155
  messages=[
 
157
  "content": "As a mental health therapist, you're speaking to a user who is seeking guidance and support. They may be experiencing various challenges and are looking for solutions to improve their mental well-being. Your responses should be empathetic, supportive, and offer practical advice tailored to the user's specific issues. Remember to maintain a positive and non-judgmental tone throughout the interaction."
158
  },
159
  {"role": "user",
160
+ "content": prompt+history_text
161
+ },
162
+ {"role": "assistant",
163
+ "content": history_text
164
  }
165
  ],
166
  model="llama3-70b-8192",
 
180
  audio_file_path = "/tmp/ai_response.wav"
181
  tts.save(audio_file_path)
182
 
183
+ return ai_response,audio_file_path,conversation_history
184
 
185
+ def display_history(conversation_history):
186
  history_str = ""
187
  for i, turn in enumerate(conversation_history):
188
+ if "user_input" in turn:
189
+ history_str += f"User: {turn['user_input']}\n"
190
  if "ai_response" in turn:
191
+ history_str += f"Therapist: {turn['ai_response']}\n\n"
192
+ return history_str # Truncate long histories for model input
193
+
194
 
195
  # Create the Gradio interface
196
+ input_video = gr.Video(sources="webcam", label="Your Video", include_audio=True)
197
  output_text = gr.Textbox(label="Therapist Response")
198
+ output_audio = gr.Audio(autoplay=True, visible=False)
 
199
 
200
 
201
+ custom_css = """
202
+ gr.Interface .gradio-title{
203
+ text-align: center;
204
+ font-size: 24px;
205
+ font-weight: bold;
206
+ margin-left:123px;
207
+ }
208
+
209
+ gr.Interface .gradio-description {
210
+ text-align: center;
211
+ font-size: 16px;
212
+ margin-top: 10px;
213
+ }
214
+
215
+
216
+
217
+ """
218
+
219
+ description = """
220
+ Speak to the AI through video input and get personalized responses from our mental health therapist. Whether you need guidance, support, or just someone to talk to, our AI is here to help you navigate life's challenges with empathy and understanding.
221
+ """
222
+ iface = gr.Interface(fn=process_input, inputs=input_video, outputs=[output_text, output_audio], title="MindWave: Real-Time Mental Health Therapist through GenAI and Multimodal Interaction", description=description, theme=gr.themes.Default(primary_hue="teal", secondary_hue="cyan"), allow_flagging=False,css=custom_css)
223
 
224
 
225
+ iface.launch()