import streamlit as st from pynput import keyboard import transformers from transformers import WhisperFeatureExtractor, WhisperForConditionalGeneration import soundfile as sf # Define voice, speed, and pitch variables (initial values) voice = "en" # English (change for other voices) speed = 1.0 pitch = 1.0 # Initialize feature extractor and model from Hugging Face Transformers feature_extractor = WhisperFeatureExtractor.from_pretrained("openai/whisper") model = WhisperForConditionalGeneration.from_pretrained("openai/whisper") def on_press(key): global voice, speed, pitch # Handle special keys (consider modifications for your needs) if key == keyboard.Key.esc: return False # Exit keystroke listener pressed_key = str(key).replace("'", "") # Text pre-processing (optional, customize for specific needs) text = f"{pressed_key}" # Encode text inputs = feature_extractor(text, return_tensors="pt") # Generate speech tokens with adjusted voice, speed, and pitch generation = model.generate( **inputs, voice=voice, speed=speed, pitch=pitch ) # Decode tokens to text (for debugging purposes) # decoded_text = feature_extractor.decode(generation, skip_special_tokens=True) # Convert generated tokens to audio waveform audio_output = model.to_audio(generation) # Play the audio (replace with your preferred audio playback library) sf.write("output.wav", audio_output, samplerate=16000) st.audio("output.wav", format="audio/wav") # Streamlit App st.title("Text-to-Speech Keystroke Announcer") # User Interface for customization options voice_selected = st.selectbox("Voice", ["en", "fr", "es"]) # Add more options speed_slider = st.slider("Speaking Speed", min_value=0.5, max_value=2.0, value=1.0) pitch_slider = st.slider("Speaking Pitch", min_value=0.5, max_value=2.0, value=1.0) # Update variables based on user selections voice = voice_selected speed = speed_slider pitch = pitch_slider # Start keystroke listener on button press if st.button("Start Keystroke Announcer"): with keyboard.Listener(on_press=on_press) as listener: listener.join() st.write("Press 'Esc' to stop keystroke detection.")