Spaces:
Sleeping
Sleeping
import streamlit as st | |
from pynput import keyboard | |
import transformers | |
from transformers import WhisperFeatureExtractor, WhisperForConditionalGeneration | |
import soundfile as sf | |
# Define voice, speed, and pitch variables (initial values) | |
voice = "en" # English (change for other voices) | |
speed = 1.0 | |
pitch = 1.0 | |
# Initialize feature extractor and model from Hugging Face Transformers | |
feature_extractor = WhisperFeatureExtractor.from_pretrained("openai/whisper") | |
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper") | |
def on_press(key): | |
global voice, speed, pitch | |
# Handle special keys (consider modifications for your needs) | |
if key == keyboard.Key.esc: | |
return False # Exit keystroke listener | |
pressed_key = str(key).replace("'", "") | |
# Text pre-processing (optional, customize for specific needs) | |
text = f"{pressed_key}" | |
# Encode text | |
inputs = feature_extractor(text, return_tensors="pt") | |
# Generate speech tokens with adjusted voice, speed, and pitch | |
generation = model.generate( | |
**inputs, voice=voice, speed=speed, pitch=pitch | |
) | |
# Decode tokens to text (for debugging purposes) | |
# decoded_text = feature_extractor.decode(generation, skip_special_tokens=True) | |
# Convert generated tokens to audio waveform | |
audio_output = model.to_audio(generation) | |
# Play the audio (replace with your preferred audio playback library) | |
sf.write("output.wav", audio_output, samplerate=16000) | |
st.audio("output.wav", format="audio/wav") | |
# Streamlit App | |
st.title("Text-to-Speech Keystroke Announcer") | |
# User Interface for customization options | |
voice_selected = st.selectbox("Voice", ["en", "fr", "es"]) # Add more options | |
speed_slider = st.slider("Speaking Speed", min_value=0.5, max_value=2.0, value=1.0) | |
pitch_slider = st.slider("Speaking Pitch", min_value=0.5, max_value=2.0, value=1.0) | |
# Update variables based on user selections | |
voice = voice_selected | |
speed = speed_slider | |
pitch = pitch_slider | |
# Start keystroke listener on button press | |
if st.button("Start Keystroke Announcer"): | |
with keyboard.Listener(on_press=on_press) as listener: | |
listener.join() | |
st.write("Press 'Esc' to stop keystroke detection.") |