amkj84 commited on
Commit
0455eb3
·
verified ·
1 Parent(s): 337d05a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -0
app.py CHANGED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from pynput import keyboard
3
+ import transformers
4
+ from transformers import WhisperFeatureExtractor, WhisperForConditionalGeneration
5
+ import soundfile as sf
6
+
7
+ # Define voice, speed, and pitch variables (initial values)
8
+ voice = "en" # English (change for other voices)
9
+ speed = 1.0
10
+ pitch = 1.0
11
+
12
+ # Initialize feature extractor and model from Hugging Face Transformers
13
+ feature_extractor = WhisperFeatureExtractor.from_pretrained("openai/whisper")
14
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper")
15
+
16
+
17
+ def on_press(key):
18
+ global voice, speed, pitch
19
+
20
+ # Handle special keys (consider modifications for your needs)
21
+ if key == keyboard.Key.esc:
22
+ return False # Exit keystroke listener
23
+
24
+ pressed_key = str(key).replace("'", "")
25
+
26
+ # Text pre-processing (optional, customize for specific needs)
27
+ text = f"{pressed_key}"
28
+
29
+ # Encode text
30
+ inputs = feature_extractor(text, return_tensors="pt")
31
+
32
+ # Generate speech tokens with adjusted voice, speed, and pitch
33
+ generation = model.generate(
34
+ **inputs, voice=voice, speed=speed, pitch=pitch
35
+ )
36
+
37
+ # Decode tokens to text (for debugging purposes)
38
+ # decoded_text = feature_extractor.decode(generation, skip_special_tokens=True)
39
+
40
+ # Convert generated tokens to audio waveform
41
+ audio_output = model.to_audio(generation)
42
+
43
+ # Play the audio (replace with your preferred audio playback library)
44
+ sf.write("output.wav", audio_output, samplerate=16000)
45
+ st.audio("output.wav", format="audio/wav")
46
+
47
+
48
+ # Streamlit App
49
+
50
+ st.title("Text-to-Speech Keystroke Announcer")
51
+
52
+ # User Interface for customization options
53
+ voice_selected = st.selectbox("Voice", ["en", "fr", "es"]) # Add more options
54
+ speed_slider = st.slider("Speaking Speed", min_value=0.5, max_value=2.0, value=1.0)
55
+ pitch_slider = st.slider("Speaking Pitch", min_value=0.5, max_value=2.0, value=1.0)
56
+
57
+ # Update variables based on user selections
58
+ voice = voice_selected
59
+ speed = speed_slider
60
+ pitch = pitch_slider
61
+
62
+ # Start keystroke listener on button press
63
+ if st.button("Start Keystroke Announcer"):
64
+ with keyboard.Listener(on_press=on_press) as listener:
65
+ listener.join()
66
+
67
+ st.write("Press 'Esc' to stop keystroke detection.")