Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,6 @@ import moviepy.editor as mp
|
|
9 |
from gtts import gTTS
|
10 |
from groq import Groq
|
11 |
|
12 |
-
|
13 |
client = Groq(
|
14 |
api_key="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",
|
15 |
)
|
@@ -20,8 +19,9 @@ audio_pipeline = pipeline("audio-classification", model="audeering/wav2vec2-larg
|
|
20 |
text_pipeline = pipeline("text-classification", model="SamLowe/roberta-base-go_emotions", top_k=2)
|
21 |
|
22 |
conversation_history = []
|
|
|
23 |
|
24 |
-
def process_input(video_stream):
|
25 |
if isinstance(video_stream, str):
|
26 |
video_file_path = video_stream
|
27 |
|
@@ -117,16 +117,28 @@ def process_input(video_stream):
|
|
117 |
|
118 |
except Exception as e:
|
119 |
print(f"Error processing text data: {e}")
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
conversation_history.append({
|
124 |
-
"user_input": text_input,
|
125 |
-
"image_features": image_features,
|
126 |
-
"audio_emotion": audio_emotion,
|
127 |
-
"text_emotions": text_emotions
|
128 |
-
})
|
129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
prompt = "User said: " + text_input
|
131 |
if image_features:
|
132 |
prompt += "\nImage features: " + ', '.join(image_features)
|
@@ -135,9 +147,9 @@ def process_input(video_stream):
|
|
135 |
if text_emotions:
|
136 |
prompt += "\nText emotions: " + text_emotions
|
137 |
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
|
142 |
chat_completion = client.chat.completions.create(
|
143 |
messages=[
|
@@ -145,7 +157,10 @@ def process_input(video_stream):
|
|
145 |
"content": "As a mental health therapist, you're speaking to a user who is seeking guidance and support. They may be experiencing various challenges and are looking for solutions to improve their mental well-being. Your responses should be empathetic, supportive, and offer practical advice tailored to the user's specific issues. Remember to maintain a positive and non-judgmental tone throughout the interaction."
|
146 |
},
|
147 |
{"role": "user",
|
148 |
-
"content": prompt
|
|
|
|
|
|
|
149 |
}
|
150 |
],
|
151 |
model="llama3-70b-8192",
|
@@ -165,26 +180,46 @@ def process_input(video_stream):
|
|
165 |
audio_file_path = "/tmp/ai_response.wav"
|
166 |
tts.save(audio_file_path)
|
167 |
|
168 |
-
return ai_response,audio_file_path,
|
169 |
|
170 |
-
def display_history():
|
171 |
history_str = ""
|
172 |
for i, turn in enumerate(conversation_history):
|
173 |
-
|
174 |
-
|
175 |
if "ai_response" in turn:
|
176 |
-
history_str += f"{turn['ai_response']}\n\n"
|
177 |
-
return history_str
|
|
|
178 |
|
179 |
# Create the Gradio interface
|
180 |
-
input_video = gr.Video(sources="webcam",label="Your Video", include_audio=True)
|
181 |
output_text = gr.Textbox(label="Therapist Response")
|
182 |
-
output_audio=gr.Audio(autoplay=True,visible=False)
|
183 |
-
history_text = gr.Textbox(display_history(), label="Conversation History", placeholder="")
|
184 |
|
185 |
|
186 |
-
|
187 |
-
|
188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
|
190 |
|
|
|
|
9 |
from gtts import gTTS
|
10 |
from groq import Groq
|
11 |
|
|
|
12 |
client = Groq(
|
13 |
api_key="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",
|
14 |
)
|
|
|
19 |
text_pipeline = pipeline("text-classification", model="SamLowe/roberta-base-go_emotions", top_k=2)
|
20 |
|
21 |
conversation_history = []
|
22 |
+
max_history_length = 3
|
23 |
|
24 |
+
def process_input(video_stream, conversation_history):
|
25 |
if isinstance(video_stream, str):
|
26 |
video_file_path = video_stream
|
27 |
|
|
|
117 |
|
118 |
except Exception as e:
|
119 |
print(f"Error processing text data: {e}")
|
120 |
+
|
121 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
|
123 |
+
|
124 |
+
|
125 |
+
|
126 |
+
if conversation_history is not None:
|
127 |
+
conversation_history = conversation_history[-max_history_length:] # Keep most recent entries
|
128 |
+
conversation_history.append({
|
129 |
+
"user_input": text_input,
|
130 |
+
"image_features": image_features,
|
131 |
+
"audio_emotion": audio_emotion,
|
132 |
+
"text_emotions": text_emotions
|
133 |
+
})
|
134 |
+
else:
|
135 |
+
conversation_history = [{
|
136 |
+
"user_input": text_input,
|
137 |
+
"image_features": image_features,
|
138 |
+
"audio_emotion": audio_emotion,
|
139 |
+
"text_emotions": text_emotions
|
140 |
+
}]
|
141 |
+
|
142 |
prompt = "User said: " + text_input
|
143 |
if image_features:
|
144 |
prompt += "\nImage features: " + ', '.join(image_features)
|
|
|
147 |
if text_emotions:
|
148 |
prompt += "\nText emotions: " + text_emotions
|
149 |
|
150 |
+
|
151 |
+
# Get conversation history text
|
152 |
+
history_text = display_history(conversation_history)
|
153 |
|
154 |
chat_completion = client.chat.completions.create(
|
155 |
messages=[
|
|
|
157 |
"content": "As a mental health therapist, you're speaking to a user who is seeking guidance and support. They may be experiencing various challenges and are looking for solutions to improve their mental well-being. Your responses should be empathetic, supportive, and offer practical advice tailored to the user's specific issues. Remember to maintain a positive and non-judgmental tone throughout the interaction."
|
158 |
},
|
159 |
{"role": "user",
|
160 |
+
"content": prompt+history_text
|
161 |
+
},
|
162 |
+
{"role": "assistant",
|
163 |
+
"content": history_text
|
164 |
}
|
165 |
],
|
166 |
model="llama3-70b-8192",
|
|
|
180 |
audio_file_path = "/tmp/ai_response.wav"
|
181 |
tts.save(audio_file_path)
|
182 |
|
183 |
+
return ai_response,audio_file_path,conversation_history
|
184 |
|
185 |
+
def display_history(conversation_history):
|
186 |
history_str = ""
|
187 |
for i, turn in enumerate(conversation_history):
|
188 |
+
if "user_input" in turn:
|
189 |
+
history_str += f"User: {turn['user_input']}\n"
|
190 |
if "ai_response" in turn:
|
191 |
+
history_str += f"Therapist: {turn['ai_response']}\n\n"
|
192 |
+
return history_str # Truncate long histories for model input
|
193 |
+
|
194 |
|
195 |
# Create the Gradio interface
|
196 |
+
input_video = gr.Video(sources="webcam", label="Your Video", include_audio=True)
|
197 |
output_text = gr.Textbox(label="Therapist Response")
|
198 |
+
output_audio = gr.Audio(autoplay=True, visible=False)
|
|
|
199 |
|
200 |
|
201 |
+
custom_css = """
|
202 |
+
gr.Interface .gradio-title{
|
203 |
+
text-align: center;
|
204 |
+
font-size: 24px;
|
205 |
+
font-weight: bold;
|
206 |
+
margin-left:123px;
|
207 |
+
}
|
208 |
+
|
209 |
+
gr.Interface .gradio-description {
|
210 |
+
text-align: center;
|
211 |
+
font-size: 16px;
|
212 |
+
margin-top: 10px;
|
213 |
+
}
|
214 |
+
|
215 |
+
|
216 |
+
|
217 |
+
"""
|
218 |
+
|
219 |
+
description = """
|
220 |
+
Speak to the AI through video input and get personalized responses from our mental health therapist. Whether you need guidance, support, or just someone to talk to, our AI is here to help you navigate life's challenges with empathy and understanding.
|
221 |
+
"""
|
222 |
+
iface = gr.Interface(fn=process_input, inputs=input_video, outputs=[output_text, output_audio], title="MindWave: Real-Time Mental Health Therapist through GenAI and Multimodal Interaction", description=description, theme=gr.themes.Default(primary_hue="teal", secondary_hue="cyan"), allow_flagging=False,css=custom_css)
|
223 |
|
224 |
|
225 |
+
iface.launch()
|