httpdaniel commited on
Commit
b22e0b7
·
1 Parent(s): 690d8c4

Adding language and error handling

Browse files
Files changed (1) hide show
  1. app.py +24 -17
app.py CHANGED
@@ -1,29 +1,20 @@
1
- from youtube_transcript_api import YouTubeTranscriptApi
2
  from urllib.parse import urlparse, parse_qs
3
  from pytube import YouTube
4
  from huggingface_hub import InferenceClient
5
  import gradio as gr
6
  from langchain_community.document_loaders import YoutubeLoader
 
7
 
8
  model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"
9
  client = InferenceClient(model=model_name)
10
 
11
 
12
- def langhchain_summary(link):
13
- loader = YoutubeLoader.from_youtube_url(link, add_video_info=False)
14
-
15
- documents = loader.load()
16
-
17
- transcription = " ".join([doc.page_content for doc in documents])
18
- return transcription
19
-
20
-
21
  def transcribe_video(url):
22
  video_id = parse_youtube_url(url)
23
  if video_id:
24
  video_metadata = get_video_metadata(video_id)
25
- # transcript_content = get_transcript_content(video_id)
26
- transcript_content = langhchain_summary(url)
27
  transcript_summary = summarise_transcript(transcript_content)
28
  return (
29
  f"Title: {video_metadata['title']}\nAuthor: {video_metadata['author']}",
@@ -53,12 +44,28 @@ def get_video_metadata(video_id):
53
 
54
 
55
  def get_transcript_content(video_id):
 
 
 
 
 
 
 
56
  try:
57
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
58
- transcript_content = parse_transcript(transcript)
59
- return transcript_content
60
- except Exception as e:
61
- raise e
 
 
 
 
 
 
 
 
 
62
 
63
 
64
  def parse_transcript(transcript):
 
1
+ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
2
  from urllib.parse import urlparse, parse_qs
3
  from pytube import YouTube
4
  from huggingface_hub import InferenceClient
5
  import gradio as gr
6
  from langchain_community.document_loaders import YoutubeLoader
7
+ from typing import Any, Dict, List
8
 
9
  model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"
10
  client = InferenceClient(model=model_name)
11
 
12
 
 
 
 
 
 
 
 
 
 
13
  def transcribe_video(url):
14
  video_id = parse_youtube_url(url)
15
  if video_id:
16
  video_metadata = get_video_metadata(video_id)
17
+ transcript_content = get_transcript_content(video_id)
 
18
  transcript_summary = summarise_transcript(transcript_content)
19
  return (
20
  f"Title: {video_metadata['title']}\nAuthor: {video_metadata['author']}",
 
44
 
45
 
46
  def get_transcript_content(video_id):
47
+ # try:
48
+ # transcript = YouTubeTranscriptApi.get_transcript(video_id)
49
+ # transcript_content = parse_transcript(transcript)
50
+ # return transcript_content
51
+ # except Exception as e:
52
+ # raise e
53
+
54
  try:
55
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
56
+ except TranscriptsDisabled:
57
+ return []
58
+
59
+ transcript = transcript_list.find_transcript(["en"])
60
+ transcript_pieces: List[Dict[str, Any]] = transcript.fetch()
61
+
62
+ transcript = " ".join(
63
+ map(
64
+ lambda transcript_piece: transcript_piece["text"].strip(" "),
65
+ transcript_pieces,
66
+ )
67
+ )
68
+ return transcript
69
 
70
 
71
  def parse_transcript(transcript):