Spaces:

Hasti11
/

MultiModal-Mental-Health-Therapist

Running

App Files Files Community

Hasti11 commited on May 9, 2024

Commit

838123a

verified ·

1 Parent(s): 8c1c639

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -27

app.py CHANGED Viewed

@@ -9,7 +9,6 @@ import moviepy.editor as mp
 from gtts import gTTS
 from groq import Groq
 client = Groq(
     api_key="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",
 )
@@ -20,8 +19,9 @@ audio_pipeline = pipeline("audio-classification", model="audeering/wav2vec2-larg
 text_pipeline = pipeline("text-classification", model="SamLowe/roberta-base-go_emotions", top_k=2)
 conversation_history = []
-def process_input(video_stream):
     if isinstance(video_stream, str):
         video_file_path = video_stream
@@ -117,16 +117,28 @@ def process_input(video_stream):
     except Exception as e:
         print(f"Error processing text data: {e}")
-    print("Text emotions:", text_emotions)
-    conversation_history.append({
-                "user_input": text_input,
-                "image_features": image_features,
-                "audio_emotion": audio_emotion,
-                "text_emotions": text_emotions
-            })
     prompt = "User said: " + text_input
     if image_features:
         prompt += "\nImage features: " + ', '.join(image_features)
@@ -135,9 +147,9 @@ def process_input(video_stream):
     if text_emotions:
         prompt += "\nText emotions: " + text_emotions
-    print("image_feature",image_features)
-    print("Audio",audio_emotion)
-    print("text emotions",text_emotions)
     chat_completion = client.chat.completions.create(
         messages=[
@@ -145,7 +157,10 @@ def process_input(video_stream):
                 "content": "As a mental health therapist, you're speaking to a user who is seeking guidance and support. They may be experiencing various challenges and are looking for solutions to improve their mental well-being. Your responses should be empathetic, supportive, and offer practical advice tailored to the user's specific issues. Remember to maintain a positive and non-judgmental tone throughout the interaction."
                 },
             {"role": "user",
-                "content": prompt
             }
         ],
         model="llama3-70b-8192",
@@ -165,26 +180,46 @@ def process_input(video_stream):
     audio_file_path = "/tmp/ai_response.wav"
     tts.save(audio_file_path)
-    return ai_response,audio_file_path,display_history()  # Return the generated response
-def display_history():
     history_str = ""
     for i, turn in enumerate(conversation_history):
-        # if "user_input" in turn:
-        #     history_str += f"User: {turn['user_input']}\n"
         if "ai_response" in turn:
-            history_str += f"{turn['ai_response']}\n\n"
-    return history_str
 # Create the Gradio interface
-input_video = gr.Video(sources="webcam",label="Your Video", include_audio=True)
 output_text = gr.Textbox(label="Therapist Response")
-output_audio=gr.Audio(autoplay=True,visible=False)
-history_text = gr.Textbox(display_history(), label="Conversation History", placeholder="")
-iface = gr.Interface(fn=process_input, inputs=input_video, outputs=[output_text,output_audio,history_text], title="Mental Health Therapist", description="Speak to the AI through video input and get responses.",theme=gr.themes.Default(primary_hue="teal", secondary_hue="cyan"),allow_flagging="auto")
-iface.launch(debug=True)

 from gtts import gTTS
 from groq import Groq
 client = Groq(
     api_key="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",
 )
 text_pipeline = pipeline("text-classification", model="SamLowe/roberta-base-go_emotions", top_k=2)
 conversation_history = []
+max_history_length = 3
+def process_input(video_stream, conversation_history):
     if isinstance(video_stream, str):
         video_file_path = video_stream
     except Exception as e:
         print(f"Error processing text data: {e}")
+    if conversation_history is not None:
+        conversation_history = conversation_history[-max_history_length:]  # Keep most recent entries
+        conversation_history.append({
+            "user_input": text_input,
+            "image_features": image_features,
+            "audio_emotion": audio_emotion,
+            "text_emotions": text_emotions
+        })
+    else:
+        conversation_history = [{
+            "user_input": text_input,
+            "image_features": image_features,
+            "audio_emotion": audio_emotion,
+            "text_emotions": text_emotions
+        }]
     prompt = "User said: " + text_input
     if image_features:
         prompt += "\nImage features: " + ', '.join(image_features)
     if text_emotions:
         prompt += "\nText emotions: " + text_emotions
+    # Get conversation history text
+    history_text = display_history(conversation_history)
     chat_completion = client.chat.completions.create(
         messages=[
                 "content": "As a mental health therapist, you're speaking to a user who is seeking guidance and support. They may be experiencing various challenges and are looking for solutions to improve their mental well-being. Your responses should be empathetic, supportive, and offer practical advice tailored to the user's specific issues. Remember to maintain a positive and non-judgmental tone throughout the interaction."
                 },
             {"role": "user",
+                "content": prompt+history_text
+            },
+            {"role": "assistant",
+                "content": history_text
             }
         ],
         model="llama3-70b-8192",
     audio_file_path = "/tmp/ai_response.wav"
     tts.save(audio_file_path)
+    return ai_response,audio_file_path,conversation_history
+def display_history(conversation_history):
     history_str = ""
     for i, turn in enumerate(conversation_history):
+        if "user_input" in turn:
+            history_str += f"User: {turn['user_input']}\n"
         if "ai_response" in turn:
+            history_str += f"Therapist: {turn['ai_response']}\n\n"
+    return history_str  # Truncate long histories for model input
 # Create the Gradio interface
+input_video = gr.Video(sources="webcam", label="Your Video", include_audio=True)
 output_text = gr.Textbox(label="Therapist Response")
+output_audio = gr.Audio(autoplay=True, visible=False)
+custom_css = """
+gr.Interface .gradio-title{
+    text-align: center;
+    font-size: 24px;
+    font-weight: bold;
+    margin-left:123px;
+}
+gr.Interface .gradio-description {
+    text-align: center;
+    font-size: 16px;
+    margin-top: 10px;
+}
+"""
+description = """
+Speak to the AI through video input and get personalized responses from our mental health therapist. Whether you need guidance, support, or just someone to talk to, our AI is here to help you navigate life's challenges with empathy and understanding.
+"""
+iface = gr.Interface(fn=process_input, inputs=input_video, outputs=[output_text, output_audio], title="MindWave: Real-Time Mental Health Therapist through GenAI and  Multimodal Interaction", description=description, theme=gr.themes.Default(primary_hue="teal", secondary_hue="cyan"), allow_flagging=False,css=custom_css)
+iface.launch()