# ====================================== # Package Import # ====================================== import streamlit as st from PIL import Image import time from transformers import pipeline # ====================================== # Basic Initialization # ====================================== # Initialize image captioning pipeline with pretrained model _image_caption_pipeline = pipeline( task="image-to-text", model="cnmoro/tiny-image-captioning" ) # Global model configuration constants _text_generation_pipeline = pipeline("text-generation", model="Qwen/Qwen3-0.6B",max_new_tokens=100) # Initialize TTS components once to avoid reloading _SPEECH_PIPELINE = pipeline("text-to-speech", model="facebook/mms-tts-eng") # ====================================== # Function settings # ====================================== def generate_image_caption(input_image): """ Generate a textual description for an input image using a pretrained model. Args: input_image (Union[PIL.Image.Image, str]): Image to process. Can be either: - A PIL Image object - A string containing a filesystem path to an image file Returns: str: Generated caption text in natural language Example: >>> from PIL import Image >>> img = Image.open("photo.jpg") >>> caption = generate_image_caption(img) >>> print(f"Caption: {caption}") """ # Process image through the captioning pipeline inference_results = _image_caption_pipeline(input_image) # Extract text from the first (and only) result dictionary caption_text = inference_results[0]['generated_text'] return caption_text def generate_story_content(system_prompt: str, user_prompt: str) -> str: """ Generates a children's story based on provided system and user prompts. Args: system_prompt: Defines the assistant's role and writing constraints user_prompt: Describes the story scenario and specific elements to include Returns: Generated story text without any thinking process metadata Raises: RuntimeError: If text generation fails at any stage Example: >>> story = generate_story_content( ... "You are a helpful children's author...", ... "Kids playing with dogs in a sunny meadow..." ... ) """ try: # Prepare chat message structure conversation_history = [ {"role": "user", "content": system_prompt+user_prompt+"/no_think"}, ] # Generate the story story=_text_generation_pipeline(conversation_history) # Extract the stroy result stroy_result=story[0]["generated_text"][1]["content"][19:] # Process and clean output return stroy_result except Exception as error: raise RuntimeError(f"Story generation failed: {str(error)}") from error def generate_audio_from_story(story_text: str) -> str: """ Convert text story to speech audio file using text-to-speech synthesis. Args: story_text: Input story text to synthesize Returns: Path to generated audio file Raises: ValueError: For empty/invalid input text RuntimeError: If audio generation fails Example: >>> generate_audio_from_story("Children playing in the park", "story_audio.wav") 'story_audio.wav' """ # Validate input text if not isinstance(story_text, str) or not story_text.strip(): raise ValueError("Input story text must be a non-empty string") try: # Generate speech speech_output = _SPEECH_PIPELINE( story_text ) return speech_output except Exception as error: raise RuntimeError(f"Audio synthesis failed: {str(error)}") from error # ====================================== # Main Application Interface # ====================================== def main(): """Main application interface for Streamlit""" # Page configuration st.set_page_config( page_title="Fantasy Adventure Generator", layout="wide", initial_sidebar_state="collapsed" ) # Title and description st.title("🧙‍♂️ Fantasy Adventure Story Generator") st.markdown(""" Upload an image and get: - Automatic scene description - AI-generated adventure story - Audio version of the story """) # Help section st.markdown("---") st.subheader("🌟 How to Use:") st.info(""" 1. Upload any picture (animals, nature, or people work best!) 2. Choose your favorite story style 3. Click the confirmation button 4. Wait for image analysis to complete 5. Enjoy your personalized story and audio! """) # File uploader uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: # Process image image = Image.open(uploaded_file).convert("RGB") # Layout columns col1, col2 = st.columns(2) with col1: st.image(image, caption="Uploaded Image", use_column_width=True) # Generation button if st.button("✨ Generate Story & Audio"): with st.spinner("Processing your request..."): # Generate outputs and Display results with col2: st.subheader("🔍 Scene Description") with st.spinner("Preparing story caption..."): caption = generate_image_caption(image) st.write(caption) st.subheader("📖 Generated Story") with st.spinner("Preparing story..."): sys_prompt = "You are a fantasy writer. Create a 100-word adventure story about " story = generate_story_content(sys_prompt, caption) st.write(story) st.subheader("🔊 Audio Playback") with st.spinner("Preparing speech..."): speech = generate_audio_from_story(story) st.audio(speech["audio"], sample_rate=speech["sampling_rate"], format='audio/wav') if __name__ == "__main__": main()