Mustalhim_AI / app.py
MoJaff's picture
Create app.py
20faed5 verified
raw
history blame
2.44 kB
import torch
device = "cpu"
model_id ="ALLaM-AI/ALLaM-7B-Instruct-preview"
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype="auto",
trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained("ALLaM-AI/ALLaM-7B-Instruct-preview")
messages = [
{"role": "user", "content": "write a long story that takes 3 min to read"}
]
generator = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
return_full_text=False,
max_new_tokens=500,
do_sample=False
)
from kokoro import KPipeline
from IPython.display import display, Audio
import soundfile as sf
pipeline = KPipeline(lang_code='b', model=False)
import numpy as np
def Generate_audio(text, voice='bm_lewis', speed=1):
pipeline = KPipeline(lang_code='b')
generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
full_audio = []
for _, _, audio in generator:
full_audio.extend(audio)
full_audio = np.array(full_audio)
return full_audio, 24000
from transformers import pipeline as transformers_pipeline
captionImage = transformers_pipeline("image-to-text",
model="Salesforce/blip-image-captioning-large")
def Image_Caption(image):
caption = captionImage(image)
caption = caption[0]['generated_text']
return caption
def Generate_story(textAbout):
storyAbout = {"role": "user", "content": f'write a long story about {textAbout} that takes 3 min to read'},
story = generator(storyAbout)
story = story[0]['generated_text']
story = story.replace('\n', ' ').replace('arafed', ' ')
return story
def Mustalhim(image):
caption = Image_Caption(image)
story = Generate_story(caption)
audio = Generate_audio(story)
return audio
def gradio_interface(image):
audio_waveform, sampling_rate = Mustalhim(image)
audio_file = "output_audio.wav"
sf.write(audio_file, audio_waveform, sampling_rate)
return audio_file
example_image = "Example.PNG"
app = gr.Interface(
fn=gradio_interface,
inputs=gr.Image(type="pil"),
outputs=gr.Audio(type="filepath"),
title="Image to Audio Story",
description="Upload an image, and the app will generate a story and convert it to audio.",
examples=[[example_image]]
)
# Launch the app
app.launch()