Spaces:

httpdaniel
/

YouTubeTranscriber

Sleeping

App Files Files Community

httpdaniel commited on Oct 5, 2024

Commit

b13d50e

1 Parent(s): 1802844

Adding summariser

Browse files

Files changed (2) hide show

app.py +106 -4
requirements.txt +6 -0

app.py CHANGED Viewed

@@ -1,7 +1,109 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+from youtube_transcript_api import YouTubeTranscriptApi
+from urllib.parse import urlparse, parse_qs
+from pytube import YouTube
+from huggingface_hub import InferenceClient
 import gradio as gr
+model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"
+client = InferenceClient(model=model_name)
+def transcribe_video(url):
+    video_id = parse_youtube_url(url)
+    if video_id:
+        video_metadata = get_video_metadata(video_id)
+        transcript_content = get_transcript_content(video_id)
+        transcript_summary = summarise_transcript(transcript_content)
+        return (
+            f"Title: {video_metadata['title']}\nAuthor: {video_metadata['author']}",
+            transcript_content,
+            transcript_summary,
+        )
+    else:
+        return None
+def parse_youtube_url(url):
+    parsed_url = urlparse(url)
+    video_id = parse_qs(parsed_url.query).get("v")
+    if video_id:
+        return video_id[0]
+    return None
+def get_video_metadata(video_id):
+    yt = YouTube(f"https://www.youtube.com/watch?v={video_id}")
+    title = yt.title or "Unknown"
+    author = yt.author or "Unknown"
+    metadata = {"title": title, "author": author}
+    return metadata
+def get_transcript_content(video_id):
+    transcript = YouTubeTranscriptApi.get_transcript(video_id)
+    transcript_content = parse_transcript(transcript)
+    return transcript_content
+def parse_transcript(transcript):
+    content = " ".join(
+        map(
+            lambda transcript_piece: transcript_piece["text"].strip(" "),
+            transcript,
+        )
+    )
+    return content
+def summarise_transcript(transcript_content):
+    prompt = f"""Provide a summary of the following video transcription in 150-350 words, focusing on the key points and core ideas discussed: {transcript_content}"""
+    message = [{"role": "user", "content": prompt}]
+    result = client.chat_completion(
+        messages=message,
+        max_tokens=2048,
+        temperature=0.1,
+    )
+    return result.choices[0].message["content"].strip()
+with gr.Blocks(theme=gr.themes.Base()) as demo:
+    gr.Markdown("<H1>YoutTube Transcriber</H1>")
+    gr.Markdown(
+        "<H3>Provide a link to a YouTube video and get a transcription and summary</H3>"
+    )
+    gr.Markdown(
+        "<H6>This project uses the youtube_transcript_api to fetch a transcript from a YouTube link, pytube to get video metadata, and Mistral 7B to generate a summary.</H6>"
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            video_link = gr.Textbox(
+                label="Link to video",
+                value="https://www.youtube.com/watch?v=ZIyB9e_7a4c",
+            )
+            transcribe_btn = gr.Button(
+                value="Transcribe & Summarise ⚡️", variant="primary"
+            )
+        with gr.Column(scale=5):
+            video_info = gr.Textbox(label="Video Info")
+            transcription = gr.TextArea(
+                label="Transcription", scale=1, lines=12, max_lines=12
+            )
+            transcription_summary = gr.TextArea(
+                label="Summary", scale=1, lines=12, max_lines=12
+            )
+    transcribe_btn.click(
+        fn=transcribe_video,
+        inputs=video_link,
+        outputs=[video_info, transcription, transcription_summary],
+    )
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio
+gradio_client
+huggingface-hub
+pytube
+urllib3
+youtube-transcript-api