GTimothee commited on
Commit
a54a14b
·
1 Parent(s): f2260e2

use my own gradio ui

Browse files
Files changed (2) hide show
  1. app.py +33 -20
  2. requirements.txt +4 -0
app.py CHANGED
@@ -1,34 +1,47 @@
 
1
  import os
2
- from smolagents import CodeAgent
 
3
  from dotenv import load_dotenv
4
- from smolagents import HfApiModel
5
- from smolagents import Tool
6
- import gradio as gr
7
- from smolagents import GradioUI
8
 
9
  load_dotenv()
10
 
11
- model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
12
 
13
- def get_tools():
14
- hf_speech2text_tool = Tool.from_hub(
15
- "GTimothee/hf_text2speech_tool",
16
- token=os.getenv('HF_TOKEN'),
17
- trust_remote_code=True
18
- )
 
 
19
 
20
- hf_text2speech_tool = Tool.from_hub(
21
- "GTimothee/kokoro_text2speech_tool",
22
- token=os.getenv('HF_TOKEN'),
23
- trust_remote_code=True
 
 
 
24
  )
 
 
 
 
25
  add_base_tools = True
26
- tools_list = [hf_speech2text_tool, hf_text2speech_tool]
27
  return tools_list, add_base_tools
28
 
29
 
30
  if __name__ == "__main__":
31
  tools_list, add_base_tools = get_tools()
32
- model = HfApiModel(model_id, provider=None)
33
- agent = CodeAgent(tools=tools_list, model=model, add_base_tools=add_base_tools, additional_authorized_imports=['web_search'])
34
- GradioUI(agent).launch()
 
 
 
 
 
 
1
+ import io
2
  import os
3
+
4
+ import soundfile as sf
5
  from dotenv import load_dotenv
6
+ from huggingface_hub import InferenceClient
7
+ from smolagents import CodeAgent, GradioUI, HfApiModel
 
 
8
 
9
  load_dotenv()
10
 
 
11
 
12
+ def convert_data_to_audio_filelike(your_input_tuple):
13
+ """Convert (sample_rate, np.ndarray) to a BytesIO WAV file"""
14
+ sample_rate, audio_data = your_input_tuple
15
+ buffer = io.BytesIO()
16
+ sf.write(buffer, audio_data, sample_rate, format="WAV")
17
+ buffer.seek(0)
18
+ return buffer
19
+
20
 
21
+ def speech2text_func(data, model: str = "openai/whisper-small.en") -> str:
22
+ if isinstance(data, tuple):
23
+ buffer = convert_data_to_audio_filelike(data)
24
+ data = buffer.read()
25
+ client = InferenceClient(
26
+ provider="hf-inference",
27
+ api_key=os.getenv("HF_TOKEN"),
28
  )
29
+ return client.automatic_speech_recognition(data, model=model).text
30
+
31
+
32
+ def get_tools():
33
  add_base_tools = True
34
+ tools_list = []
35
  return tools_list, add_base_tools
36
 
37
 
38
  if __name__ == "__main__":
39
  tools_list, add_base_tools = get_tools()
40
+ model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct", provider=None)
41
+ agent = CodeAgent(
42
+ tools=tools_list,
43
+ model=model,
44
+ add_base_tools=add_base_tools,
45
+ additional_authorized_imports=["web_search"],
46
+ )
47
+ GradioUI(agent).launch(speech2text_func=speech2text_func)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ git+https://github.com/GTimothee/smolagents.git@add_audio_input#egg=smolagents
2
+ python-dotenv
3
+ gradio
4
+ soundfile