das1mtb56 commited on
Commit
b057493
·
verified ·
1 Parent(s): 0a4f03c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -41
app.py CHANGED
@@ -1,23 +1,34 @@
1
  import os
 
 
 
 
 
 
2
  import gradio as gr
3
  import yt_dlp
4
  import whisper
5
  from transformers import pipeline, MarianMTModel, MarianTokenizer
6
- import torch
7
- import shutil
8
- import warnings
9
 
10
- # Suppress harmless CPU warning from Whisper
11
  warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
12
 
13
- # Load models
14
- whisper_model = whisper.load_model("small")
15
- summarizer = pipeline("summarization", model="Falconsai/text_summarization")
16
- translation_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-mul-en")
17
- translation_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-mul-en")
18
 
 
 
 
 
19
  COOKIES_PATH = "cookies.txt"
20
 
 
 
 
 
 
 
21
  def save_cookies(file):
22
  if file:
23
  shutil.copy(file.name, COOKIES_PATH)
@@ -25,26 +36,29 @@ def save_cookies(file):
25
  return "⚠️ Please upload a valid cookies.txt file."
26
 
27
  def download_audio(youtube_url):
28
- output_file = "audio.webm"
29
  ydl_opts = {
30
  'format': 'bestaudio/best',
31
- 'outtmpl': output_file,
32
  'quiet': True,
33
  }
34
  if os.path.exists(COOKIES_PATH):
35
  ydl_opts['cookiefile'] = COOKIES_PATH
36
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
37
  ydl.download([youtube_url])
38
- return output_file
39
 
40
  def get_thumbnail(youtube_url):
41
- ydl_opts = {'quiet': True}
42
- if os.path.exists(COOKIES_PATH):
43
- ydl_opts['cookiefile'] = COOKIES_PATH
44
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
45
- info = ydl.extract_info(youtube_url, download=False)
46
- return info.get("thumbnail", "")
47
- return ""
 
 
 
48
 
49
  def translate_to_english(text):
50
  chunks = [text[i:i+500] for i in range(0, len(text), 500)]
@@ -56,23 +70,29 @@ def translate_to_english(text):
56
  return " ".join(translated)
57
 
58
  def process_video(url):
59
- audio_path = download_audio(url)
60
- result = whisper_model.transcribe(audio_path)
61
- transcription = result["text"]
 
62
 
63
- translated_text = translate_to_english(transcription)
64
- summary = summarizer(translated_text, max_length=130, min_length=30, do_sample=False)[0]["summary_text"]
65
- thumbnail_url = get_thumbnail(url)
66
 
67
- return transcription, translated_text, summary, thumbnail_url
 
 
 
 
 
68
 
69
  def download_summary(text):
70
- filename = "summary.txt"
71
  with open(filename, "w", encoding="utf-8") as f:
72
  f.write(text)
73
  return filename
74
 
75
- # UI
76
  with gr.Blocks(theme=gr.themes.Soft(), title="🎥 YouTube Video Summarizer") as demo:
77
  gr.Markdown("## 🧠 Multilingual YouTube Summarizer")
78
  gr.Markdown("Upload a video link and get the transcript, English translation, and summary.")
@@ -103,16 +123,4 @@ with gr.Blocks(theme=gr.themes.Soft(), title="🎥 YouTube Video Summarizer") as
103
  outputs=[transcript_output, translation_output, summary_output, video_thumb]
104
  )
105
 
106
- download_btn.click(
107
- fn=download_summary,
108
- inputs=[summary_output],
109
- outputs=[download_file]
110
- )
111
-
112
- upload_btn.click(
113
- fn=save_cookies,
114
- inputs=[cookies_file],
115
- outputs=[cookie_status]
116
- )
117
-
118
- demo.launch(share=True)
 
1
  import os
2
+ import tempfile
3
+ import uuid
4
+ import shutil
5
+ import warnings
6
+ import logging
7
+
8
  import gradio as gr
9
  import yt_dlp
10
  import whisper
11
  from transformers import pipeline, MarianMTModel, MarianTokenizer
 
 
 
12
 
13
+ # Suppress Whisper CPU warning
14
  warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
15
 
16
+ # Setup logging
17
+ logging.basicConfig(level=logging.INFO)
18
+ logger = logging.getLogger(__name__)
 
 
19
 
20
+ # Constants
21
+ WHISPER_MODEL_SIZE = "small"
22
+ SUMMARIZER_MODEL_NAME = "Falconsai/text_summarization"
23
+ TRANSLATION_MODEL_NAME = "Helsinki-NLP/opus-mt-mul-en"
24
  COOKIES_PATH = "cookies.txt"
25
 
26
+ # Load models once at startup
27
+ whisper_model = whisper.load_model(WHISPER_MODEL_SIZE)
28
+ summarizer = pipeline("summarization", model=SUMMARIZER_MODEL_NAME)
29
+ translation_tokenizer = MarianTokenizer.from_pretrained(TRANSLATION_MODEL_NAME)
30
+ translation_model = MarianMTModel.from_pretrained(TRANSLATION_MODEL_NAME)
31
+
32
  def save_cookies(file):
33
  if file:
34
  shutil.copy(file.name, COOKIES_PATH)
 
36
  return "⚠️ Please upload a valid cookies.txt file."
37
 
38
  def download_audio(youtube_url):
39
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".webm")
40
  ydl_opts = {
41
  'format': 'bestaudio/best',
42
+ 'outtmpl': temp_file.name,
43
  'quiet': True,
44
  }
45
  if os.path.exists(COOKIES_PATH):
46
  ydl_opts['cookiefile'] = COOKIES_PATH
47
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
48
  ydl.download([youtube_url])
49
+ return temp_file.name
50
 
51
  def get_thumbnail(youtube_url):
52
+ try:
53
+ ydl_opts = {'quiet': True}
54
+ if os.path.exists(COOKIES_PATH):
55
+ ydl_opts['cookiefile'] = COOKIES_PATH
56
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
57
+ info = ydl.extract_info(youtube_url, download=False)
58
+ return info.get("thumbnail", "")
59
+ except Exception as e:
60
+ logger.error(f"Thumbnail fetch error: {e}")
61
+ return ""
62
 
63
  def translate_to_english(text):
64
  chunks = [text[i:i+500] for i in range(0, len(text), 500)]
 
70
  return " ".join(translated)
71
 
72
  def process_video(url):
73
+ try:
74
+ audio_path = download_audio(url)
75
+ result = whisper_model.transcribe(audio_path)
76
+ transcription = result["text"]
77
 
78
+ translated_text = translate_to_english(transcription)
79
+ summary = summarizer(translated_text, max_length=130, min_length=30, do_sample=False)[0]["summary_text"]
80
+ thumbnail_url = get_thumbnail(url)
81
 
82
+ os.remove(audio_path) # Clean up temp audio file
83
+
84
+ return transcription, translated_text, summary, thumbnail_url
85
+ except Exception as e:
86
+ logger.exception("Error processing video")
87
+ return f"❌ Error: {str(e)}", "", "", ""
88
 
89
  def download_summary(text):
90
+ filename = os.path.join(tempfile.gettempdir(), f"summary_{uuid.uuid4().hex}.txt")
91
  with open(filename, "w", encoding="utf-8") as f:
92
  f.write(text)
93
  return filename
94
 
95
+ # Gradio UI
96
  with gr.Blocks(theme=gr.themes.Soft(), title="🎥 YouTube Video Summarizer") as demo:
97
  gr.Markdown("## 🧠 Multilingual YouTube Summarizer")
98
  gr.Markdown("Upload a video link and get the transcript, English translation, and summary.")
 
123
  outputs=[transcript_output, translation_output, summary_output, video_thumb]
124
  )
125
 
126
+ dow