Update app.py
Browse files
app.py
CHANGED
@@ -4,8 +4,7 @@ import requests
|
|
4 |
import pandas as pd
|
5 |
from io import BytesIO
|
6 |
import re
|
7 |
-
import
|
8 |
-
import ffmpeg
|
9 |
|
10 |
# --- Tool-specific Imports ---
|
11 |
from pytube import YouTube
|
@@ -55,10 +54,17 @@ def transcribe_youtube_video(video_url: str) -> str:
|
|
55 |
stream = yt.streams.filter(only_audio=True).first()
|
56 |
video_path = stream.download(output_path=TEMP_DIR)
|
57 |
audio_path = os.path.join(TEMP_DIR, "output.mp3")
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
63 |
with open(audio_path, "rb") as audio_file:
|
64 |
transcription = client.audio.transcriptions.create(file=audio_file, model="whisper-large-v3", response_format="text")
|
@@ -66,8 +72,10 @@ def transcribe_youtube_video(video_url: str) -> str:
|
|
66 |
except Exception as e:
|
67 |
return f"Error during YouTube transcription: {e}"
|
68 |
finally:
|
|
|
69 |
if video_path and os.path.exists(video_path): os.remove(video_path)
|
70 |
if audio_path and os.path.exists(audio_path): os.remove(audio_path)
|
|
|
71 |
|
72 |
# --- Agent Definition ---
|
73 |
class LangChainAgent:
|
@@ -161,7 +169,7 @@ with gr.Blocks() as demo:
|
|
161 |
|
162 |
if __name__ == "__main__":
|
163 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
164 |
-
for key in ["GROQ_API_KEY", "TAVILY_API_KEY"
|
165 |
print(f"✅ {key} secret is set." if os.getenv(key) else f"⚠️ WARNING: {key} secret is not set.")
|
166 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
167 |
-
demo.launch(debug=True, share=False)
|
|
|
4 |
import pandas as pd
|
5 |
from io import BytesIO
|
6 |
import re
|
7 |
+
import ffmpeg # Using the ffmpeg-python wrapper
|
|
|
8 |
|
9 |
# --- Tool-specific Imports ---
|
10 |
from pytube import YouTube
|
|
|
54 |
stream = yt.streams.filter(only_audio=True).first()
|
55 |
video_path = stream.download(output_path=TEMP_DIR)
|
56 |
audio_path = os.path.join(TEMP_DIR, "output.mp3")
|
57 |
+
|
58 |
+
# Use ffmpeg-python to convert the downloaded file to mp3
|
59 |
+
(
|
60 |
+
ffmpeg
|
61 |
+
.input(video_path)
|
62 |
+
.output(audio_path, **{'q:a': 0, 'map': 'a'}) # Set audio quality and select audio stream
|
63 |
+
.overwrite_output() # Corresponds to the -y flag
|
64 |
+
.run(quiet=True) # Use quiet=True to avoid printing ffmpeg logs
|
65 |
+
)
|
66 |
+
|
67 |
+
print(f"Audio extracted to: {audio_path}")
|
68 |
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
69 |
with open(audio_path, "rb") as audio_file:
|
70 |
transcription = client.audio.transcriptions.create(file=audio_file, model="whisper-large-v3", response_format="text")
|
|
|
72 |
except Exception as e:
|
73 |
return f"Error during YouTube transcription: {e}"
|
74 |
finally:
|
75 |
+
# Clean up temporary files
|
76 |
if video_path and os.path.exists(video_path): os.remove(video_path)
|
77 |
if audio_path and os.path.exists(audio_path): os.remove(audio_path)
|
78 |
+
print("Cleaned up temporary files.")
|
79 |
|
80 |
# --- Agent Definition ---
|
81 |
class LangChainAgent:
|
|
|
169 |
|
170 |
if __name__ == "__main__":
|
171 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
172 |
+
for key in ["GROQ_API_KEY", "TAVILY_API_KEY"]:
|
173 |
print(f"✅ {key} secret is set." if os.getenv(key) else f"⚠️ WARNING: {key} secret is not set.")
|
174 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
175 |
+
demo.launch(debug=True, share=False)
|