File size: 828 Bytes
6530149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
from youtube_transcript_api import YouTubeTranscriptApi
import re

def get_youtube_video_id(query):
    try:
        match = re.search(r'(?:youtu\.be/|youtube\.com/(?:watch\?v=|embed/|v/|shorts/))([\w-]{11})', query)
        if match:
            video_id = match.group(1)
            print(video_id)
            return video_id
    except:
        print("Did not find youtube video id from query ", query)

def fetch_transcript_english(video_id):
    try:
        ytt_api = YouTubeTranscriptApi()
        transcript = ytt_api.fetch(video_id,languages=['en'])
        return transcript
    except:
        print("Error ")

def post_process_transcript(transcript_snippets):
    full_transcript = " ".join([transcript_snippet.text for transcript_snippet in transcript_snippets])
    return full_transcript