File size: 3,143 Bytes
de90052
 
 
54f3aa3
 
 
de90052
 
 
 
 
54f3aa3
 
 
 
 
19e123a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92

import subprocess
import sys
from huggingface_hub import login
import os

try:
    import en_core_web_sm
except ImportError:
    subprocess.run([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])

# Authenticate with Hugging Face Hub
hf_token = os.environ.get("HF_TOKEN")
login(token=hf_token)


import gradio as gr
from vector_search import HybridVectorSearch
from whisper_asr import WhisperAutomaticSpeechRecognizer

with gr.Blocks() as demo:
    with gr.Tab("Live Mode"):
        full_stream = gr.State()
        transcript = gr.State(value="")
        chats = gr.State(value=[])

        with gr.Row(variant="panel"):
            audio_input = gr.Audio(sources=["microphone"], streaming=True)
        with gr.Row(variant="panel", equal_height=True):
            with gr.Column(scale=1):
                chatbot = gr.Chatbot(
                    bubble_full_width=True, height="65vh", show_copy_button=True
                )
                chat_input = gr.Textbox(
                    interactive=True, placeholder="Type Search Query...."
                )
            with gr.Column(scale=1):
                transcript_textbox = gr.Textbox(
                    lines=40,
                    placeholder="Transcript",
                    max_lines=40,
                    label="Transcript",
                    show_label=True,
                    autoscroll=True,
                )

        chat_input.submit(
            HybridVectorSearch.chat_search, [chat_input, chatbot], [chat_input, chatbot]
        )
        audio_input.stream(
            WhisperAutomaticSpeechRecognizer.transcribe_with_diarization,
            [audio_input, full_stream, transcript],
            [transcript_textbox, full_stream, transcript],
        )

    with gr.Tab("Offline Mode"):
        full_stream = gr.State()
        transcript = gr.State(value="")
        chats = gr.State(value=[])

        with gr.Row(variant="panel"):
            audio_input = gr.Audio(sources=["upload"], type="filepath")
        with gr.Row(variant="panel", equal_height=True):
            with gr.Column(scale=1):
                chatbot = gr.Chatbot(
                    bubble_full_width=True, height="55vh", show_copy_button=True
                )
                chat_input = gr.Textbox(
                    interactive=True, placeholder="Type Search Query...."
                )
            with gr.Column(scale=1):
                transcript_textbox = gr.Textbox(
                    lines=35,
                    placeholder="Transcripts",
                    max_lines=35,
                    label="Transcript",
                    show_label=True,
                    autoscroll=True,
                )

        chat_input.submit(
            HybridVectorSearch.chat_search, [chat_input, chatbot], [chat_input, chatbot]
        )
        audio_input.upload(
            WhisperAutomaticSpeechRecognizer.transcribe_with_diarization_file,
            [audio_input],
            [transcript_textbox, full_stream, transcript],
        )

if __name__ == "__main__":
    demo.launch()
    # demo.launch(server_name="0.0.0.0", server_port=7860, share=True)