# ====================================== # Package Import # ====================================== import streamlit as st from PIL import Image import time from transformers import pipeline # ====================================== # Basic Initialization # ====================================== # Initialize image captioning pipeline with pretrained model _image_caption_pipeline = pipeline( task="image-to-text", model="cnmoro/tiny-image-captioning" ) # Global model configuration constants _text_generation_pipeline = pipeline("text-generation", model="Qwen/Qwen3-1.7B") # Initialize TTS components once to avoid reloading _SPEECH_PIPELINE = pipeline("text-to-speech", model="facebook/mms-tts-eng") # ====================================== # Function settings # ====================================== def generate_image_caption(input_image): """ Generate a textual description for an input image using a pretrained model. Args: input_image (Union[PIL.Image.Image, str]): Image to process. Can be either: - A PIL Image object - A string containing a filesystem path to an image file Returns: str: Generated caption text in natural language Example: >>> from PIL import Image >>> img = Image.open("photo.jpg") >>> caption = generate_image_caption(img) >>> print(f"Caption: {caption}") """ # Process image through the captioning pipeline inference_results = _image_caption_pipeline(input_image) # Extract text from the first (and only) result dictionary caption_text = inference_results[0]['generated_text'] return caption_text def generate_story_content(system_prompt: str, user_prompt: str) -> str: """ Generates a children's story based on provided system and user prompts. Args: system_prompt: Defines the assistant's role and writing constraints user_prompt: Describes the story scenario and specific elements to include Returns: Generated story text without any thinking process metadata Raises: RuntimeError: If text generation fails at any stage Example: >>> story = generate_story_content( ... "You are a helpful children's author...", ... "Kids playing with dogs in a sunny meadow..." ... ) """ try: # Prepare chat message structure conversation_history = [ {"role": "user", "content": system_prompt+user_prompt+"/no_think"}, ] # Generate the story story=_text_generation_pipeline(conversation_history) # Extract the stroy result stroy_result=story[0]["generated_text"][1]["content"][19:] # Process and clean output return stroy_result except Exception as error: raise RuntimeError(f"Story generation failed: {str(error)}") from error def generate_audio_from_story(story_text: str) -> str: """ Convert text story to speech audio file using text-to-speech synthesis. Args: story_text: Input story text to synthesize Returns: Path to generated audio file Raises: ValueError: For empty/invalid input text RuntimeError: If audio generation fails Example: >>> generate_audio_from_story("Children playing in the park", "story_audio.wav") 'story_audio.wav' """ # Validate input text if not isinstance(story_text, str) or not story_text.strip(): raise ValueError("Input story text must be a non-empty string") try: # Generate speech speech_output = _SPEECH_PIPELINE( story_text ) return speech_output except Exception as error: raise RuntimeError(f"Audio synthesis failed: {str(error)}") from error # ====================================== # Page Configuration & Custom Styling # ====================================== st.set_page_config( page_title="Magic Story Generator", page_icon="đ§", layout="centered", initial_sidebar_state="collapsed" ) # Custom CSS styling for child-friendly interface st.markdown(""" """, unsafe_allow_html=True) # ====================================== # Main Application Interface # ====================================== st.title("đ§ Welcome to Magic Story Maker!") # File upload section with st.container(): st.subheader("Step 1: Upload Your Picture") uploaded_image = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"], label_visibility="collapsed") # Initialize session state for confirmation status if 'confirmed' not in st.session_state: st.session_state.confirmed = False # Main processing flow if uploaded_image is not None: # Display uploaded image with st.spinner("⨠Magical image processing..."): image = Image.open(uploaded_image) st.image(image, caption="Your Magical Image", use_column_width=True) # Prompt selection section with st.container(): st.subheader("Step 2: Choose Story Style") # Create three columns for prompt buttons col1, col2, col3 = st.columns(3) with col1: if st.button("đ Learning Story", help="Generate educational story with life lessons", key="edu_btn"): st.session_state.selected_prompt = "educational" st.session_state.confirmed = False with col2: if st.button("đ Fantasy Adventure", help="Create magical adventure story", key="fantasy_btn"): st.session_state.selected_prompt = "adventure" st.session_state.confirmed = False with col3: if st.button("đģ Animal Friends", help="Make story about friendly animals", key="animal_btn"): st.session_state.selected_prompt = "animal" st.session_state.confirmed = False # Add confirmation button with st.container(): st.subheader("Step 3: Confirm Selection") if st.button("đŽ Start Magic Creation!", help="Click to generate story after choosing style", type="primary"): st.session_state.confirmed = True # Only show generation when confirmed if st.session_state.get('confirmed', False): # Generate image caption with loading state with st.spinner("đ Analyzing image and generating description..."): image_caption = generate_image_caption(image) # Display caption results using CSS class st.subheader("đ Image Understanding") st.markdown(f'