Spaces:
Sleeping
Sleeping
File size: 2,221 Bytes
0455eb3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import streamlit as st
from pynput import keyboard
import transformers
from transformers import WhisperFeatureExtractor, WhisperForConditionalGeneration
import soundfile as sf
# Define voice, speed, and pitch variables (initial values)
voice = "en" # English (change for other voices)
speed = 1.0
pitch = 1.0
# Initialize feature extractor and model from Hugging Face Transformers
feature_extractor = WhisperFeatureExtractor.from_pretrained("openai/whisper")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper")
def on_press(key):
global voice, speed, pitch
# Handle special keys (consider modifications for your needs)
if key == keyboard.Key.esc:
return False # Exit keystroke listener
pressed_key = str(key).replace("'", "")
# Text pre-processing (optional, customize for specific needs)
text = f"{pressed_key}"
# Encode text
inputs = feature_extractor(text, return_tensors="pt")
# Generate speech tokens with adjusted voice, speed, and pitch
generation = model.generate(
**inputs, voice=voice, speed=speed, pitch=pitch
)
# Decode tokens to text (for debugging purposes)
# decoded_text = feature_extractor.decode(generation, skip_special_tokens=True)
# Convert generated tokens to audio waveform
audio_output = model.to_audio(generation)
# Play the audio (replace with your preferred audio playback library)
sf.write("output.wav", audio_output, samplerate=16000)
st.audio("output.wav", format="audio/wav")
# Streamlit App
st.title("Text-to-Speech Keystroke Announcer")
# User Interface for customization options
voice_selected = st.selectbox("Voice", ["en", "fr", "es"]) # Add more options
speed_slider = st.slider("Speaking Speed", min_value=0.5, max_value=2.0, value=1.0)
pitch_slider = st.slider("Speaking Pitch", min_value=0.5, max_value=2.0, value=1.0)
# Update variables based on user selections
voice = voice_selected
speed = speed_slider
pitch = pitch_slider
# Start keystroke listener on button press
if st.button("Start Keystroke Announcer"):
with keyboard.Listener(on_press=on_press) as listener:
listener.join()
st.write("Press 'Esc' to stop keystroke detection.") |