ayatnourhachmi commited on
Commit
e8c4e1d
·
1 Parent(s): aad2e7d

Integrate Google Gemini API for conversation generation

Browse files
Files changed (2) hide show
  1. app.py +86 -41
  2. requirements.txt +0 -0
app.py CHANGED
@@ -1,57 +1,102 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
  from gradio_client import Client, handle_file
4
  import tempfile
5
  import os
6
 
 
 
 
 
 
7
  # Load TTS client - using the correct Space URL
8
  tts_client = Client("https://medmac01-darija-arabic-tts.hf.space/")
9
 
10
- # Load text generation client
11
- text_client = InferenceClient("deepseek-ai/DeepSeek-R1-Distill-Qwen-32B")
12
-
13
  def generate_conversation(subject, speaker1_audio, speaker2_audio):
14
- prompt = f"""
15
- Generate a natural Moroccan Darija conversation between two people about: "{subject}".
16
- Format:
17
- Speaker 1: ...
18
- Speaker 2: ...
19
- Speaker 1: ...
20
- Speaker 2: ...
21
- Keep it short and casual (4 lines).
22
- """
 
 
 
 
 
 
 
 
23
 
24
- result = text_client.text_generation(prompt, max_new_tokens=300, temperature=0.7)
25
- lines = [line.strip() for line in result.split('\n') if line.strip().startswith("Speaker")]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- # Generate audio files using TTS
28
- audio_paths = []
29
- idx = 0
30
- for line in lines:
31
- speaker_audio = speaker1_audio if line.startswith("Speaker 1") else speaker2_audio
32
- text = line.split(":", 1)[1].strip()
33
 
34
- # Create TTS audio using the correct API call
35
- result = tts_client.predict(
36
- text=text,
37
- speaker_audio_path=handle_file(speaker_audio),
38
- temperature=0.75,
39
- api_name="/infer_EGTTS"
40
- )
41
-
42
- # Save the result to a temporary file
43
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
44
- with open(result, "rb") as f:
45
- tmp.write(f.read())
46
- tmp.flush()
47
- audio_paths.append(tmp.name)
48
- idx += 1
49
 
50
- # Format the conversation text
51
- conversation_text = "\n".join(lines)
52
-
53
- # Return all outputs in the correct order
54
- return [conversation_text] + audio_paths
 
 
 
 
 
 
 
 
55
 
56
  with gr.Blocks() as demo:
57
  gr.Markdown("# 🗣️ Moroccan Darija Conversation Generator")
 
1
  import gradio as gr
2
+ import google.generativeai as genai
3
  from gradio_client import Client, handle_file
4
  import tempfile
5
  import os
6
 
7
+ # Configure Gemini API
8
+ GOOGLE_API_KEY = "AIzaSyBN_GLXq2EGdZx9BXMdH58X2ida_y3Ea_M"
9
+ genai.configure(api_key=GOOGLE_API_KEY)
10
+ model = genai.GenerativeModel('gemini-2.0-flash')
11
+
12
  # Load TTS client - using the correct Space URL
13
  tts_client = Client("https://medmac01-darija-arabic-tts.hf.space/")
14
 
 
 
 
15
  def generate_conversation(subject, speaker1_audio, speaker2_audio):
16
+ try:
17
+ prompt = f"""
18
+ Generate a natural Moroccan Darija conversation in Arabic script only between two people about: "{subject}".
19
+ Rules:
20
+ - Use only Arabic script for Darija
21
+ - Do not include any transliterations or translations
22
+ - Do not include any Latin characters or parentheses
23
+ - Use "Speaker 1" and "Speaker 2" as the speaker names
24
+
25
+ Format:
26
+ Speaker 1: [Arabic Darija text only]
27
+ Speaker 2: [Arabic Darija text only]
28
+ Speaker 1: [Arabic Darija text only]
29
+ Speaker 2: [Arabic Darija text only]
30
+
31
+ Keep it short and casual (4 lines).
32
+ """
33
 
34
+ print("Sending prompt to Gemini API...")
35
+ response = model.generate_content(prompt)
36
+ print(f"Gemini API Response: {response}")
37
+
38
+ if not response or not response.text:
39
+ print("No response text received from Gemini API")
40
+ return ["Error: No response from the model"] + [None] * 4
41
+
42
+ result = response.text
43
+ print(f"Generated text: {result}")
44
+
45
+ # Split the text into lines and process each line
46
+ lines = []
47
+ for line in result.split('\n'):
48
+ line = line.strip()
49
+ if ':' in line:
50
+ # Extract the text after the colon
51
+ text = line.split(':', 1)[1].strip()
52
+ # Add the appropriate speaker prefix
53
+ if len(lines) % 2 == 0:
54
+ lines.append(f"Speaker 1: {text}")
55
+ else:
56
+ lines.append(f"Speaker 2: {text}")
57
+
58
+ print(f"Processed lines: {lines}")
59
+
60
+ if not lines:
61
+ print("No valid lines found in the response")
62
+ return ["Error: No valid conversation generated"] + [None] * 4
63
 
64
+ # Generate audio files using TTS
65
+ audio_paths = []
66
+ idx = 0
67
+ for line in lines:
68
+ speaker_audio = speaker1_audio if line.startswith("Speaker 1") else speaker2_audio
69
+ text = line.split(":", 1)[1].strip()
70
 
71
+ # Create TTS audio using the correct API call
72
+ result = tts_client.predict(
73
+ text=text,
74
+ speaker_audio_path=handle_file(speaker_audio),
75
+ temperature=0.75,
76
+ api_name="/infer_EGTTS"
77
+ )
78
+
79
+ # Save the result to a temporary file
80
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
81
+ with open(result, "rb") as f:
82
+ tmp.write(f.read())
83
+ tmp.flush()
84
+ audio_paths.append(tmp.name)
85
+ idx += 1
86
 
87
+ # Format the conversation text
88
+ conversation_text = "\n".join(lines)
89
+
90
+ # Ensure we have exactly 4 audio paths
91
+ while len(audio_paths) < 4:
92
+ audio_paths.append(None)
93
+
94
+ # Return all outputs in the correct order
95
+ return [conversation_text] + audio_paths[:4]
96
+
97
+ except Exception as e:
98
+ print(f"Error occurred: {str(e)}")
99
+ return [f"Error: {str(e)}"] + [None] * 4
100
 
101
  with gr.Blocks() as demo:
102
  gr.Markdown("# 🗣️ Moroccan Darija Conversation Generator")
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ