Spaces:
Sleeping
Sleeping
httpdaniel
commited on
Commit
·
c4ce778
1
Parent(s):
fd2d49f
trying langchain transcriber
Browse files
app.py
CHANGED
@@ -3,16 +3,27 @@ from urllib.parse import urlparse, parse_qs
|
|
3 |
from pytube import YouTube
|
4 |
from huggingface_hub import InferenceClient
|
5 |
import gradio as gr
|
|
|
6 |
|
7 |
model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
8 |
client = InferenceClient(model=model_name)
|
9 |
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
def transcribe_video(url):
|
12 |
video_id = parse_youtube_url(url)
|
13 |
if video_id:
|
14 |
video_metadata = get_video_metadata(video_id)
|
15 |
-
transcript_content = get_transcript_content(video_id)
|
|
|
16 |
transcript_summary = summarise_transcript(transcript_content)
|
17 |
return (
|
18 |
f"Title: {video_metadata['title']}\nAuthor: {video_metadata['author']}",
|
|
|
3 |
from pytube import YouTube
|
4 |
from huggingface_hub import InferenceClient
|
5 |
import gradio as gr
|
6 |
+
from langchain_community.document_loaders import YoutubeLoader
|
7 |
|
8 |
model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
9 |
client = InferenceClient(model=model_name)
|
10 |
|
11 |
|
12 |
+
def langhchain_summary(link):
|
13 |
+
loader = YoutubeLoader.from_youtube_url(link, add_video_info=False)
|
14 |
+
|
15 |
+
documents = loader.load()
|
16 |
+
|
17 |
+
transcription = " ".join([doc.page_content for doc in documents])
|
18 |
+
return transcription
|
19 |
+
|
20 |
+
|
21 |
def transcribe_video(url):
|
22 |
video_id = parse_youtube_url(url)
|
23 |
if video_id:
|
24 |
video_metadata = get_video_metadata(video_id)
|
25 |
+
# transcript_content = get_transcript_content(video_id)
|
26 |
+
transcript_content = langhchain_summary(url)
|
27 |
transcript_summary = summarise_transcript(transcript_content)
|
28 |
return (
|
29 |
f"Title: {video_metadata['title']}\nAuthor: {video_metadata['author']}",
|