httpdaniel commited on
Commit
c4ce778
·
1 Parent(s): fd2d49f

trying langchain transcriber

Browse files
Files changed (1) hide show
  1. app.py +12 -1
app.py CHANGED
@@ -3,16 +3,27 @@ from urllib.parse import urlparse, parse_qs
3
  from pytube import YouTube
4
  from huggingface_hub import InferenceClient
5
  import gradio as gr
 
6
 
7
  model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"
8
  client = InferenceClient(model=model_name)
9
 
10
 
 
 
 
 
 
 
 
 
 
11
  def transcribe_video(url):
12
  video_id = parse_youtube_url(url)
13
  if video_id:
14
  video_metadata = get_video_metadata(video_id)
15
- transcript_content = get_transcript_content(video_id)
 
16
  transcript_summary = summarise_transcript(transcript_content)
17
  return (
18
  f"Title: {video_metadata['title']}\nAuthor: {video_metadata['author']}",
 
3
  from pytube import YouTube
4
  from huggingface_hub import InferenceClient
5
  import gradio as gr
6
+ from langchain_community.document_loaders import YoutubeLoader
7
 
8
  model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"
9
  client = InferenceClient(model=model_name)
10
 
11
 
12
+ def langhchain_summary(link):
13
+ loader = YoutubeLoader.from_youtube_url(link, add_video_info=False)
14
+
15
+ documents = loader.load()
16
+
17
+ transcription = " ".join([doc.page_content for doc in documents])
18
+ return transcription
19
+
20
+
21
  def transcribe_video(url):
22
  video_id = parse_youtube_url(url)
23
  if video_id:
24
  video_metadata = get_video_metadata(video_id)
25
+ # transcript_content = get_transcript_content(video_id)
26
+ transcript_content = langhchain_summary(url)
27
  transcript_summary = summarise_transcript(transcript_content)
28
  return (
29
  f"Title: {video_metadata['title']}\nAuthor: {video_metadata['author']}",