httpdaniel commited on
Commit
b13d50e
·
1 Parent(s): 1802844

Adding summariser

Browse files
Files changed (2) hide show
  1. app.py +106 -4
  2. requirements.txt +6 -0
app.py CHANGED
@@ -1,7 +1,109 @@
 
 
 
 
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from youtube_transcript_api import YouTubeTranscriptApi
2
+ from urllib.parse import urlparse, parse_qs
3
+ from pytube import YouTube
4
+ from huggingface_hub import InferenceClient
5
  import gradio as gr
6
 
7
+ model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"
8
+ client = InferenceClient(model=model_name)
9
 
10
+
11
+ def transcribe_video(url):
12
+ video_id = parse_youtube_url(url)
13
+ if video_id:
14
+ video_metadata = get_video_metadata(video_id)
15
+ transcript_content = get_transcript_content(video_id)
16
+ transcript_summary = summarise_transcript(transcript_content)
17
+ return (
18
+ f"Title: {video_metadata['title']}\nAuthor: {video_metadata['author']}",
19
+ transcript_content,
20
+ transcript_summary,
21
+ )
22
+ else:
23
+ return None
24
+
25
+
26
+ def parse_youtube_url(url):
27
+ parsed_url = urlparse(url)
28
+ video_id = parse_qs(parsed_url.query).get("v")
29
+ if video_id:
30
+ return video_id[0]
31
+ return None
32
+
33
+
34
+ def get_video_metadata(video_id):
35
+ yt = YouTube(f"https://www.youtube.com/watch?v={video_id}")
36
+ title = yt.title or "Unknown"
37
+ author = yt.author or "Unknown"
38
+
39
+ metadata = {"title": title, "author": author}
40
+
41
+ return metadata
42
+
43
+
44
+ def get_transcript_content(video_id):
45
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
46
+ transcript_content = parse_transcript(transcript)
47
+
48
+ return transcript_content
49
+
50
+
51
+ def parse_transcript(transcript):
52
+ content = " ".join(
53
+ map(
54
+ lambda transcript_piece: transcript_piece["text"].strip(" "),
55
+ transcript,
56
+ )
57
+ )
58
+ return content
59
+
60
+
61
+ def summarise_transcript(transcript_content):
62
+ prompt = f"""Provide a summary of the following video transcription in 150-350 words, focusing on the key points and core ideas discussed: {transcript_content}"""
63
+
64
+ message = [{"role": "user", "content": prompt}]
65
+
66
+ result = client.chat_completion(
67
+ messages=message,
68
+ max_tokens=2048,
69
+ temperature=0.1,
70
+ )
71
+
72
+ return result.choices[0].message["content"].strip()
73
+
74
+
75
+ with gr.Blocks(theme=gr.themes.Base()) as demo:
76
+ gr.Markdown("<H1>YoutTube Transcriber</H1>")
77
+ gr.Markdown(
78
+ "<H3>Provide a link to a YouTube video and get a transcription and summary</H3>"
79
+ )
80
+ gr.Markdown(
81
+ "<H6>This project uses the youtube_transcript_api to fetch a transcript from a YouTube link, pytube to get video metadata, and Mistral 7B to generate a summary.</H6>"
82
+ )
83
+
84
+ with gr.Row():
85
+ with gr.Column(scale=1):
86
+ video_link = gr.Textbox(
87
+ label="Link to video",
88
+ value="https://www.youtube.com/watch?v=ZIyB9e_7a4c",
89
+ )
90
+ transcribe_btn = gr.Button(
91
+ value="Transcribe & Summarise ⚡️", variant="primary"
92
+ )
93
+
94
+ with gr.Column(scale=5):
95
+ video_info = gr.Textbox(label="Video Info")
96
+ transcription = gr.TextArea(
97
+ label="Transcription", scale=1, lines=12, max_lines=12
98
+ )
99
+ transcription_summary = gr.TextArea(
100
+ label="Summary", scale=1, lines=12, max_lines=12
101
+ )
102
+
103
+ transcribe_btn.click(
104
+ fn=transcribe_video,
105
+ inputs=video_link,
106
+ outputs=[video_info, transcription, transcription_summary],
107
+ )
108
+
109
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ gradio_client
3
+ huggingface-hub
4
+ pytube
5
+ urllib3
6
+ youtube-transcript-api