Spaces:

ccclllwww
/

Assignment_V1

Sleeping

App Files Files Community

Assignment_V1 / app.py

ccclllwww

Update app.py

4204a24 verified about 2 months ago

raw

history blame

10.8 kB

	# ======================================
	# Package Import
	# ======================================

	import streamlit as st
	from PIL import Image
	import time
	from transformers import pipeline
	from typing import Tuple
	from datasets import load_dataset
	import soundfile as sf
	import torch

	# ======================================
	# Basic Initialization
	# ======================================

	# Initialize image captioning pipeline with pretrained model
	_image_caption_pipeline = pipeline(
	task="image-to-text",
	model="cnmoro/tiny-image-captioning"
	)

	# Global model configuration constants
	_text_generation_pipeline = pipeline("text-generation", model="Qwen/Qwen1.5-0.5B",max_new_tokens=100)

	# Initialize TTS components once to avoid reloading
	_SPEECH_PIPELINE = pipeline("text-to-speech", model="microsoft/speecht5_tts")
	_EMBEDDINGS_DATASET = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
	_DEFAULT_SPEAKER_EMBEDDING = torch.tensor(_EMBEDDINGS_DATASET[7306]["xvector"]).unsqueeze(0)

	# ======================================
	# Function settings
	# ======================================

	def generate_image_caption(input_image):
	"""
	Generate a textual description for an input image using a pretrained model.

	Args:
	input_image (Union[PIL.Image.Image, str]): Image to process. Can be either:
	- A PIL Image object
	- A string containing a filesystem path to an image file

	Returns:
	str: Generated caption text in natural language

	Example:
	>>> from PIL import Image
	>>> img = Image.open("photo.jpg")
	>>> caption = generate_image_caption(img)
	>>> print(f"Caption: {caption}")
	"""
	# Process image through the captioning pipeline
	inference_results = _image_caption_pipeline(input_image)

	# Extract text from the first (and only) result dictionary
	caption_text = inference_results[0]['generated_text']

	return caption_text

	def generate_story_content(system_prompt: str, user_prompt: str) -> str:
	"""
	Generates a children's story based on provided system and user prompts.

	Args:
	system_prompt: Defines the assistant's role and writing constraints
	user_prompt: Describes the story scenario and specific elements to include

	Returns:
	Generated story text without any thinking process metadata

	Raises:
	RuntimeError: If text generation fails at any stage

	Example:
	>>> story = generate_story_content(
	... "You are a helpful children's author...",
	... "Kids playing with dogs in a sunny meadow..."
	... )
	"""
	try:
	# Prepare chat message structure
	conversation_history = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt},
	]

	# Generate the story
	story=_text_generation_pipeline(conversation_history)

	# Extract the stroy result
	stroy_result=story[0]["generated_text"][2]["content"]

	# Process and clean output
	return stroy_result

	except Exception as error:
	raise RuntimeError(f"Story generation failed: {str(error)}") from error

	def generate_audio_from_story(story_text: str, output_path: str = "output.wav") -> str:
	"""
	Convert text story to speech audio file using text-to-speech synthesis.

	Args:
	story_text: Input story text to synthesize
	output_path: Path to save generated audio (default: 'output.wav')

	Returns:
	Path to generated audio file

	Raises:
	ValueError: For empty/invalid input text
	RuntimeError: If audio generation fails

	Example:
	>>> generate_audio_from_story("Children playing in the park", "story_audio.wav")
	'story_audio.wav'
	"""
	# Validate input text
	if not isinstance(story_text, str) or not story_text.strip():
	raise ValueError("Input story text must be a non-empty string")

	try:
	# Generate speech with default speaker profile
	speech_output = _SPEECH_PIPELINE(
	story_text,
	forward_params={"speaker_embeddings": _DEFAULT_SPEAKER_EMBEDDING}
	)

	# Save audio to WAV file
	sf.write(
	output_path,
	speech_output["audio"],
	samplerate=speech_output["sampling_rate"]
	)

	return output_path

	except Exception as error:
	raise RuntimeError(f"Audio synthesis failed: {str(error)}") from error


	# ======================================
	# Page Configuration & Custom Styling
	# ======================================
	st.set_page_config(
	page_title="Magic Story Generator",
	page_icon="🧚",
	layout="centered",
	initial_sidebar_state="collapsed"
	)

	# Custom CSS styling for child-friendly interface
	st.markdown("""
	<style>
	/* Prompt buttons styling */
	.prompt-btn {
	background: #4CAF50 !important;
	border-radius: 15px !important;
	padding: 15px 30px !important;
	font-size: 1.1rem !important;
	margin: 10px;
	}

	/* Story container styling */
	.story-container {
	background: #FFF3E0;
	border-radius: 20px;
	padding: 25px;
	margin: 20px 0;
	box-shadow: 0 4px 8px rgba(0,0,0,0.1);
	}
	/* Image caption styling */
	.image-caption {
	border-left: 4px solid #4CAF50;
	padding-left: 1rem;
	font-size: 1.1rem;
	color: #2c3e50;
	margin: 1.5rem 0;
	}

	/* Progress spinner styling */
	.stSpinner > div {
	font-size: 1.2rem !important;
	color: #9C27B0 !important;
	}
	</style>
	""", unsafe_allow_html=True)

	# ======================================
	# Main Application Interface
	# ======================================
	st.title("🧚 Welcome to Magic Story Maker!")

	# File upload section
	with st.container():
	st.subheader("Step 1: Upload Your Picture")
	uploaded_image = st.file_uploader("Choose an image...",
	type=["png", "jpg", "jpeg"],
	label_visibility="collapsed")

	# Initialize session state for confirmation status
	if 'confirmed' not in st.session_state:
	st.session_state.confirmed = False

	# Main processing flow
	if uploaded_image is not None:
	# Display uploaded image
	with st.spinner("✨ Magical image processing..."):
	image = Image.open(uploaded_image)
	st.image(image, caption="Your Magical Image", use_column_width=True)

	# Prompt selection section
	with st.container():
	st.subheader("Step 2: Choose Story Style")

	# Create three columns for prompt buttons
	col1, col2, col3 = st.columns(3)
	with col1:
	if st.button("📚 Learning Story",
	help="Generate educational story with life lessons",
	key="edu_btn"):
	st.session_state.selected_prompt = "educational"
	st.session_state.confirmed = False
	with col2:
	if st.button("🌠 Fantasy Adventure",
	help="Create magical adventure story",
	key="fantasy_btn"):
	st.session_state.selected_prompt = "adventure"
	st.session_state.confirmed = False
	with col3:
	if st.button("🐻 Animal Friends",
	help="Make story about friendly animals",
	key="animal_btn"):
	st.session_state.selected_prompt = "animal"
	st.session_state.confirmed = False

	# Add confirmation button
	with st.container():
	st.subheader("Step 3: Confirm Selection")
	if st.button("🔮 Start Magic Creation!",
	help="Click to generate story after choosing style",
	type="primary"):
	st.session_state.confirmed = True

	# Only show generation when confirmed
	if st.session_state.get('confirmed', False):
	# Generate image caption with loading state
	with st.spinner("🔍 Analyzing image and generating description..."):
	image_caption = generate_image_caption(image)

	# Display caption results using CSS class
	st.subheader("📝 Image Understanding")
	st.markdown(f'<div class="story-container image-caption">{image_caption}</div>',
	unsafe_allow_html=True)
	st.write("") # Add spacing

	# Define prompt templates
	PROMPT_TEMPLATES = {
	"educational": {
	"system": "You are a children's educator. Create a simple 100-word story that teaches basic life skills or moral lessons.",
	"icon": "📚"
	},
	"adventure": {
	"system": "You are a fantasy writer. Create a 100-word magical adventure story suitable for children.",
	"icon": "🌠"
	},
	"animal": {
	"system": "You are an animal expert. Create a 100-word story about friendly animals learning together.",
	"icon": "🐻"
	}
	}

	# Safe access with default fallback
	selected_prompt = st.session_state.get("selected_prompt", "educational")

	# Story generation section
	with st.spinner(f"{PROMPT_TEMPLATES[selected_prompt]['icon']} Creating your story..."):
	# Generate story content using the caption
	selected_template = PROMPT_TEMPLATES[selected_prompt]
	story_text = generate_story_content(
	system_prompt=selected_template["system"],
	user_prompt=image_caption
	)

	# Display formatted story
	st.subheader("✨ Your Magical Story")
	st.markdown(f'<div class="story-container">{story_text}</div>',
	unsafe_allow_html=True)

	# Audio generation section
	with st.spinner("🔮 Preparing story narration..."):
	audio_file = generate_audio_from_story(story_text, "story_audio.wav")
	st.subheader("🎧 Listen to Your Story")
	st.audio(audio_file)
	else:
	# Show waiting message
	st.info("ℹ️ Please select a story style and click the confirmation button to continue")

	# Help section
	st.markdown("---")
	st.subheader("🌟 How to Use:")
	st.info("""
	1. Upload any picture (animals, nature, or people work best!)
	2. Choose your favorite story style
	3. Click the confirmation button
	4. Wait for image analysis to complete
	5. Enjoy your personalized story and audio!
	""")