File size: 2,295 Bytes
7e4124b
 
 
 
 
 
ef6c3af
809df2f
 
7e4124b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os
import math
import tempfile
from pydub import AudioSegment
from langchain.schema import Document
from openai import OpenAI
from moviepy import *
from dotenv import load_dotenv
load_dotenv()

def get_langchain_Document_for_rag(video_path):
    # Extract audio from video file
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
        video_clip = moviepy.editor.VideoFileClip(video_path)
        video_clip.audio.write_audiofile(temp_audio_file.name, logger=None)
        temp_audio_path = temp_audio_file.name
        video_clip.close()

    # Instantiate llm client
    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

    # Load extracted audio
    audio = AudioSegment.from_file(temp_audio_path)

    # Chunk audio for translation
    translations = []
    chunk_duration_ms = 5 * 60 * 1000  # 5 minutes
    num_chunks = math.ceil(len(audio) / chunk_duration_ms)
    for i in range(num_chunks):
        start = i * chunk_duration_ms
        end = min((i + 1) * chunk_duration_ms, len(audio))
        chunk = audio[start:end]

        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as chunk_file:
            chunk.export(chunk_file.name, format="mp3")
            chunk_file.seek(0)
            with open(chunk_file.name, "rb") as f:
                translation = client.audio.translations.create(
                    model="whisper-1",  # or use your preferred model
                    file=f,
                )

                translations.append({
                    'chunk_id': i,
                    'start_time': start,
                    'end_time': end,
                    'transcript': translation.text,
                })

        os.unlink(chunk_file.name)  # clean up chunk file

    os.unlink(temp_audio_path)  # clean up extracted audio file

    # Create LangChain documents
    langchain_documents = []
    for data in translations:
        content = f"Transcript: {data['transcript']}"
        doc = Document(
            page_content=content,
            metadata={
                "start_time": data['start_time'],
                "end_time": data['end_time'],
                "chunk_id": data['chunk_id']
            }
        )
        langchain_documents.append(doc)

    return langchain_documents