Spaces:
Runtime error
Runtime error
Commit
·
e8c4e1d
1
Parent(s):
aad2e7d
Integrate Google Gemini API for conversation generation
Browse files- app.py +86 -41
- requirements.txt +0 -0
app.py
CHANGED
@@ -1,57 +1,102 @@
|
|
1 |
import gradio as gr
|
2 |
-
|
3 |
from gradio_client import Client, handle_file
|
4 |
import tempfile
|
5 |
import os
|
6 |
|
|
|
|
|
|
|
|
|
|
|
7 |
# Load TTS client - using the correct Space URL
|
8 |
tts_client = Client("https://medmac01-darija-arabic-tts.hf.space/")
|
9 |
|
10 |
-
# Load text generation client
|
11 |
-
text_client = InferenceClient("deepseek-ai/DeepSeek-R1-Distill-Qwen-32B")
|
12 |
-
|
13 |
def generate_conversation(subject, speaker1_audio, speaker2_audio):
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
with gr.Blocks() as demo:
|
57 |
gr.Markdown("# 🗣️ Moroccan Darija Conversation Generator")
|
|
|
1 |
import gradio as gr
|
2 |
+
import google.generativeai as genai
|
3 |
from gradio_client import Client, handle_file
|
4 |
import tempfile
|
5 |
import os
|
6 |
|
7 |
+
# Configure Gemini API
|
8 |
+
GOOGLE_API_KEY = "AIzaSyBN_GLXq2EGdZx9BXMdH58X2ida_y3Ea_M"
|
9 |
+
genai.configure(api_key=GOOGLE_API_KEY)
|
10 |
+
model = genai.GenerativeModel('gemini-2.0-flash')
|
11 |
+
|
12 |
# Load TTS client - using the correct Space URL
|
13 |
tts_client = Client("https://medmac01-darija-arabic-tts.hf.space/")
|
14 |
|
|
|
|
|
|
|
15 |
def generate_conversation(subject, speaker1_audio, speaker2_audio):
|
16 |
+
try:
|
17 |
+
prompt = f"""
|
18 |
+
Generate a natural Moroccan Darija conversation in Arabic script only between two people about: "{subject}".
|
19 |
+
Rules:
|
20 |
+
- Use only Arabic script for Darija
|
21 |
+
- Do not include any transliterations or translations
|
22 |
+
- Do not include any Latin characters or parentheses
|
23 |
+
- Use "Speaker 1" and "Speaker 2" as the speaker names
|
24 |
+
|
25 |
+
Format:
|
26 |
+
Speaker 1: [Arabic Darija text only]
|
27 |
+
Speaker 2: [Arabic Darija text only]
|
28 |
+
Speaker 1: [Arabic Darija text only]
|
29 |
+
Speaker 2: [Arabic Darija text only]
|
30 |
+
|
31 |
+
Keep it short and casual (4 lines).
|
32 |
+
"""
|
33 |
|
34 |
+
print("Sending prompt to Gemini API...")
|
35 |
+
response = model.generate_content(prompt)
|
36 |
+
print(f"Gemini API Response: {response}")
|
37 |
+
|
38 |
+
if not response or not response.text:
|
39 |
+
print("No response text received from Gemini API")
|
40 |
+
return ["Error: No response from the model"] + [None] * 4
|
41 |
+
|
42 |
+
result = response.text
|
43 |
+
print(f"Generated text: {result}")
|
44 |
+
|
45 |
+
# Split the text into lines and process each line
|
46 |
+
lines = []
|
47 |
+
for line in result.split('\n'):
|
48 |
+
line = line.strip()
|
49 |
+
if ':' in line:
|
50 |
+
# Extract the text after the colon
|
51 |
+
text = line.split(':', 1)[1].strip()
|
52 |
+
# Add the appropriate speaker prefix
|
53 |
+
if len(lines) % 2 == 0:
|
54 |
+
lines.append(f"Speaker 1: {text}")
|
55 |
+
else:
|
56 |
+
lines.append(f"Speaker 2: {text}")
|
57 |
+
|
58 |
+
print(f"Processed lines: {lines}")
|
59 |
+
|
60 |
+
if not lines:
|
61 |
+
print("No valid lines found in the response")
|
62 |
+
return ["Error: No valid conversation generated"] + [None] * 4
|
63 |
|
64 |
+
# Generate audio files using TTS
|
65 |
+
audio_paths = []
|
66 |
+
idx = 0
|
67 |
+
for line in lines:
|
68 |
+
speaker_audio = speaker1_audio if line.startswith("Speaker 1") else speaker2_audio
|
69 |
+
text = line.split(":", 1)[1].strip()
|
70 |
|
71 |
+
# Create TTS audio using the correct API call
|
72 |
+
result = tts_client.predict(
|
73 |
+
text=text,
|
74 |
+
speaker_audio_path=handle_file(speaker_audio),
|
75 |
+
temperature=0.75,
|
76 |
+
api_name="/infer_EGTTS"
|
77 |
+
)
|
78 |
+
|
79 |
+
# Save the result to a temporary file
|
80 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
81 |
+
with open(result, "rb") as f:
|
82 |
+
tmp.write(f.read())
|
83 |
+
tmp.flush()
|
84 |
+
audio_paths.append(tmp.name)
|
85 |
+
idx += 1
|
86 |
|
87 |
+
# Format the conversation text
|
88 |
+
conversation_text = "\n".join(lines)
|
89 |
+
|
90 |
+
# Ensure we have exactly 4 audio paths
|
91 |
+
while len(audio_paths) < 4:
|
92 |
+
audio_paths.append(None)
|
93 |
+
|
94 |
+
# Return all outputs in the correct order
|
95 |
+
return [conversation_text] + audio_paths[:4]
|
96 |
+
|
97 |
+
except Exception as e:
|
98 |
+
print(f"Error occurred: {str(e)}")
|
99 |
+
return [f"Error: {str(e)}"] + [None] * 4
|
100 |
|
101 |
with gr.Blocks() as demo:
|
102 |
gr.Markdown("# 🗣️ Moroccan Darija Conversation Generator")
|
requirements.txt
CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
|
|