Spaces:

MoJaff
/

Mustalhim_AI

Running

App Files Files Community

Mustalhim_AI / app.py

MoJaff

Update app.py

fd20bfc verified 5 months ago

raw

history blame

2.5 kB

	import torch
	from transformers import AutoModelForCausalLM, LlamaTokenizer, pipeline as transformers_pipeline
	from gtts import gTTS # Replace KPipeline with gTTS
	import soundfile as sf
	import numpy as np
	import gradio as gr
	import os

	# Initialize the image-to-text pipeline
	captionImage = transformers_pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")

	# Initialize the text-generation pipeline
	device = "cpu"
	model_id = "ALLaM-AI/ALLaM-7B-Instruct-preview"

	# Load the model
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype="auto",
	trust_remote_code=True,
	)

	# Use LlamaTokenizer for compatibility
	tokenizer = LlamaTokenizer.from_pretrained(model_id)

	# Initialize the text-generation pipeline
	generator = transformers_pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	return_full_text=False,
	max_new_tokens=500,
	do_sample=False,
	)

	# Function to generate caption
	def Image_Caption(image):
	caption = captionImage(image)
	return caption[0]['generated_text']

	# Function to generate a story
	def Generate_story(textAbout):
	# Define the prompt as a string
	prompt = f'write a long story about {textAbout} that takes 3 min to read'

	# Generate the story using the generator pipeline
	story = generator(prompt)

	# Extract the generated text
	story = story[0]['generated_text']

	# Clean up the story
	story = story.replace('\n', ' ').replace('arafed', ' ')

	return story

	# Function to generate audio using gTTS
	def Generate_audio(text, lang='en'):
	tts = gTTS(text=text, lang=lang, slow=False) # Create a gTTS object
	audio_file = "output_audio.mp3" # Save as MP3
	tts.save(audio_file) # Save the audio file
	return audio_file

	# Main function to process the image and generate audio
	def Mustalhim(image):
	caption = Image_Caption(image)
	story = Generate_story(caption)
	audio_file = Generate_audio(story)
	return audio_file

	# Gradio interface
	def gradio_interface(image):
	audio_file = Mustalhim(image)
	return audio_file

	# Path to the example image
	example_image = "Example.PNG"

	# Create the Gradio app
	app = gr.Interface(
	fn=gradio_interface,
	inputs=gr.Image(type="pil"),
	outputs=gr.Audio(type="filepath"),
	title="Image to Audio Story",
	description="Upload an image, and the app will generate a story and convert it to audio.",
	examples=[[example_image]]
	)

	# Launch the app
	app.launch()