Rausda6 commited on
Commit
cda1077
·
verified ·
1 Parent(s): 139ac7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -121
app.py CHANGED
@@ -9,139 +9,132 @@ import os
9
  import time
10
  import mimetypes
11
  from typing import List, Dict
12
- from transformers import AutoTokenizer, AutoModelForCausalLM # Replaced gemini
13
- import torch # Replaced gemini
 
 
 
 
 
14
 
15
  # Constants
16
  MAX_FILE_SIZE_MB = 20
17
  MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024 # Convert MB to bytes
18
 
19
- # Model setup
20
- MODEL_ID = "tabularisai/german-gemma-3-1b-it"
21
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
22
- model = AutoModelForCausalLM.from_pretrained(
23
- MODEL_ID,
24
- torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
25
- device_map="auto",
26
- ).eval()
27
-
28
 
29
  class PodcastGenerator:
30
  def __init__(self):
31
- pass
32
-
33
- async def generate_script(self, prompt: str, language: str, api_key: str, file_obj=None, progress=None) -> Dict:
 
 
 
 
 
 
 
 
 
 
 
 
34
  example = """
35
  {
36
- "topic": "AGI",
37
- "podcast": [
38
- {
39
- "speaker": "Alex",
40
- "text": "Hallo und willkommen zur heutigen Folge über AGI."
41
- },
42
- {
43
- "speaker": "Ben",
44
- "text": "Danke Alex! Wir sprechen heute über Artificial General Intelligence und ihre möglichen Auswirkungen."
45
- }
46
- ]
47
  }
48
  """
49
- full_prompt = (
50
- f"Lies den Inhalt und fasse ihn in einem kurzen Podcast-Drehbuch in folgender JSON-Struktur zusammen:\n"
51
- f"{example}\n\n"
52
- f"Inhalt:\n{prompt}"
53
- )
54
 
55
- inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
56
- output = model.generate(**inputs, max_new_tokens=1024, do_sample=True)
57
- script_text = tokenizer.decode(output[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  try:
60
- podcast_json = json.loads(script_text)
61
- except json.JSONDecodeError:
62
- return {"error": "Fehler beim Parsen der Antwort: Kein gültiges JSON erkannt."}
63
-
64
- return podcast_json
65
-
66
- async def text_to_audio(self, segments: List[Dict], language: str, progress=None) -> str:
67
- audio_segments = []
68
- temp_files = []
69
-
70
- for idx, segment in enumerate(segments):
71
- speaker = segment.get("speaker", "Sprecher")
72
- text = segment.get("text", "")
73
- tts_text = f"{speaker} sagt: {text}"
74
-
75
- filename = f"temp_{uuid.uuid4()}.mp3"
76
- communicate = edge_tts.Communicate(tts_text, "de-DE-KasperNeural")
77
- await communicate.save(filename)
78
-
79
- audio = AudioSegment.from_file(filename, format="mp3")
80
- audio_segments.append(audio)
81
- temp_files.append(filename)
82
-
83
- if progress:
84
- progress((idx + 1) / len(segments), f"Segment {idx+1} erstellt...")
85
-
86
- podcast = sum(audio_segments[1:], audio_segments[0])
87
- final_filename = f"podcast_{uuid.uuid4()}.mp3"
88
- podcast.export(final_filename, format="mp3")
89
-
90
- for f in temp_files:
91
- os.remove(f)
92
-
93
- return final_filename
94
-
95
-
96
- async def generate_podcast(prompt, language, api_key, file, progress=gr.Progress(track_tqdm=True)):
97
- gen = PodcastGenerator()
98
-
99
- text = ""
100
- if file is not None:
101
- if file.size > MAX_FILE_SIZE_BYTES:
102
- return "Datei zu groß. Maximal erlaubt sind 20 MB.", None
103
-
104
- mime_type, _ = mimetypes.guess_type(file.name)
105
- if not mime_type or not mime_type.endswith("pdf"):
106
- return "Nur PDF-Dateien sind erlaubt.", None
107
-
108
- async with aiofiles.open(file.name, mode="rb") as f:
109
- pdf_data = await f.read()
110
- import fitz # PyMuPDF
111
- with fitz.open(stream=pdf_data, filetype="pdf") as doc:
112
- text = "\n".join([page.get_text() for page in doc])
113
- else:
114
- text = prompt
115
-
116
- progress(0.1, "Erzeuge Skript...")
117
- script = await gen.generate_script(text, language, api_key, file_obj=file, progress=progress)
118
-
119
- if "error" in script:
120
- return script["error"], None
121
-
122
- progress(0.5, "Erzeuge Audio...")
123
- podcast_path = await gen.text_to_audio(script["podcast"], language, progress=progress)
124
-
125
- progress(1.0, "Fertig!")
126
- return "Podcast erfolgreich erstellt!", podcast_path
127
-
128
-
129
- demo = gr.Interface(
130
- fn=generate_podcast,
131
- inputs=[
132
- gr.Textbox(label="Thema oder Text", lines=5, placeholder="Worum soll es im Podcast gehen?"),
133
- gr.Radio(["de"], label="Sprache", value="de"),
134
- gr.Textbox(label="API Key (nicht benötigt für dieses Modell)", type="password", placeholder=""),
135
- gr.File(label="Optional: PDF-Datei hochladen", file_types=[".pdf"])
136
- ],
137
- outputs=[
138
- gr.Textbox(label="Status"),
139
- gr.Audio(label="Erstellter Podcast", type="filepath")
140
- ],
141
- title="Podcast Generator (German-Gemma)",
142
- description="Erstelle Podcasts aus Text oder PDFs mithilfe eines KI-Modells. Nutzt das Modell 'tabularisai/german-gemma-3-1b-it'.",
143
- allow_flagging="never"
144
- )
145
-
146
- if __name__ == "__main__":
147
- demo.launch()
 
9
  import time
10
  import mimetypes
11
  from typing import List, Dict
12
+
13
+ # NEW – Hugging Face Transformers
14
+ from transformers import AutoTokenizer, AutoModelForCausalLM
15
+ import torch
16
+
17
+ # NEW – external model id
18
+ MODEL_ID = "tabularisai/german-gemma-3-1b-it"
19
 
20
  # Constants
21
  MAX_FILE_SIZE_MB = 20
22
  MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024 # Convert MB to bytes
23
 
 
 
 
 
 
 
 
 
 
24
 
25
  class PodcastGenerator:
26
  def __init__(self):
27
+ self.tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
28
+ self.model = AutoModelForCausalLM.from_pretrained(
29
+ MODEL_ID,
30
+ torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
31
+ device_map="auto",
32
+ ).eval()
33
+
34
+ async def generate_script(
35
+ self,
36
+ prompt: str,
37
+ language: str,
38
+ api_key: str,
39
+ file_obj=None,
40
+ progress=None,
41
+ ) -> Dict:
42
  example = """
43
  {
44
+ "topic": "AGI",
45
+ "podcast": [
46
+ {
47
+ "speaker": 2,
48
+ "line": "So, AGI, huh? Seems like everyone's talking about it these days."
49
+ },
50
+ {
51
+ "speaker": 1,
52
+ "line": "Yeah, it's definitely having a moment, isn't it?"
53
+ }
54
+ ]
55
  }
56
  """
 
 
 
 
 
57
 
58
+ if language == "Auto Detect":
59
+ language_instruction = (
60
+ "- The podcast MUST be in the same language as the user input."
61
+ )
62
+ else:
63
+ language_instruction = f"- The podcast MUST be in {language} language"
64
+
65
+ system_prompt = f"""
66
+ You are a professional podcast generator. Your task is to generate a professional podcast script based on the user input.
67
+ {language_instruction}
68
+ - The podcast should have 2 speakers.
69
+ - The podcast should be long.
70
+ - Do not use names for the speakers.
71
+ - The podcast should be interesting, lively, and engaging, and hook the listener from the start.
72
+ - The input text might be disorganized or unformatted, originating from sources like PDFs or text files. Ignore any formatting inconsistencies or irrelevant details; your task is to distill the essential points, identify key definitions, and highlight intriguing facts that would be suitable for discussion in a podcast.
73
+ - The script must be in JSON format.
74
+
75
+ Follow this example structure:
76
+ {example}
77
+ """
78
+
79
+ if prompt and file_obj:
80
+ user_prompt = (
81
+ f"Please generate a podcast script based on the uploaded file following user input:\n{prompt}"
82
+ )
83
+ elif prompt:
84
+ user_prompt = (
85
+ f"Please generate a podcast script based on the following user input:\n{prompt}"
86
+ )
87
+ else:
88
+ user_prompt = "Please generate a podcast script based on the uploaded file."
89
+
90
+ # If a file is provided we still read it for completeness (not required for HF generation)
91
+ if file_obj:
92
+ _ = await self._read_file_bytes(file_obj)
93
+
94
+ if progress:
95
+ progress(0.3, "Generating podcast script...")
96
+
97
+ inputs = self.tokenizer(
98
+ f"{system_prompt}\n\n{user_prompt}", return_tensors="pt"
99
+ ).to(self.model.device)
100
 
101
  try:
102
+ output = self.model.generate(**inputs, max_new_tokens=2048, temperature=1.0)
103
+ response_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
104
+ except Exception as e:
105
+ raise Exception(f"Failed to generate podcast script: {e}")
106
+
107
+ print(f"Generated podcast script:\n{response_text}")
108
+
109
+ if progress:
110
+ progress(0.4, "Script generated successfully!")
111
+
112
+ return json.loads(response_text)
113
+
114
+ async def _read_file_bytes(self, file_obj) -> bytes:
115
+ if hasattr(file_obj, "size"):
116
+ file_size = file_obj.size
117
+ else:
118
+ file_size = os.path.getsize(file_obj.name)
119
+
120
+ if file_size > MAX_FILE_SIZE_BYTES:
121
+ raise Exception(
122
+ f"File size exceeds the {MAX_FILE_SIZE_MB}MB limit. Please upload a smaller file."
123
+ )
124
+
125
+ if hasattr(file_obj, "read"):
126
+ return file_obj.read()
127
+ else:
128
+ async with aiofiles.open(file_obj.name, "rb") as f:
129
+ return await f.read()
130
+
131
+ @staticmethod
132
+ def _get_mime_type(filename: str) -> str:
133
+ ext = os.path.splitext(filename)[1].lower()
134
+ if ext == ".pdf":
135
+ return "application/pdf"
136
+ elif ext == ".txt":
137
+ return "text/plain"
138
+ else:
139
+ mime_type, _ = mimetypes.guess_type(filename)
140
+ return mime_type or "application/octet-stream"