mchinea commited on
Commit
e71f323
·
1 Parent(s): f8e3605

add new tool

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -0
  2. tools.py +32 -1
requirements.txt CHANGED
@@ -13,3 +13,4 @@ pydub
13
  tavily-python
14
  wikipedia
15
  pytesseract
 
 
13
  tavily-python
14
  wikipedia
15
  pytesseract
16
+ openai-whisper
tools.py CHANGED
@@ -8,6 +8,7 @@ from typing import Dict
8
  from pathlib import Path
9
  #from markitdown import MarkItDown
10
  from urllib.parse import urlparse
 
11
 
12
  from langchain_core.tools import tool
13
 
@@ -364,6 +365,35 @@ def extract_text_from_image(image_path: str) -> str:
364
  return f"Unexpected error during OCR: {str(e)}"
365
 
366
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
  level1_tools = [
368
  multiply,
369
  add,
@@ -379,5 +409,6 @@ level1_tools = [
379
  download_file_from_url,
380
  save_and_read_file,
381
  read_python_file,
382
- extract_text_from_image
 
383
  ]
 
8
  from pathlib import Path
9
  #from markitdown import MarkItDown
10
  from urllib.parse import urlparse
11
+ from smolagents import Tool
12
 
13
  from langchain_core.tools import tool
14
 
 
365
  return f"Unexpected error during OCR: {str(e)}"
366
 
367
 
368
+ @tool
369
+ def transcribe_audio(audio_path: str) -> str:
370
+ """
371
+ Transcribes speech from an audio file using Whisper (local).
372
+
373
+ Args:
374
+ audio_path: Path to the audio file (e.g., .mp3, .wav, .m4a).
375
+
376
+ Returns:
377
+ The transcribed text or an error message.
378
+ """
379
+ try:
380
+ import whisper
381
+
382
+ if not os.path.exists(audio_path):
383
+ return f"Error: File not found at '{audio_path}'."
384
+
385
+ model = whisper.load_model("base") # You can use "small", "medium", "large"
386
+ result = model.transcribe(audio_path)
387
+
388
+ return result["text"].strip()
389
+ except ImportError:
390
+ return (
391
+ "Error: 'whisper' library is not installed. "
392
+ "Install it using 'pip install openai-whisper'."
393
+ )
394
+ except Exception as e:
395
+ return f"Error during transcription: {str(e)}"
396
+
397
  level1_tools = [
398
  multiply,
399
  add,
 
409
  download_file_from_url,
410
  save_and_read_file,
411
  read_python_file,
412
+ extract_text_from_image,
413
+ transcribe_audio
414
  ]