Spaces:

MoJaff
/

Mustalhim_AI

Running

App Files Files Community

MoJaff commited on Feb 26

Commit

8a3a311

verified ·

1 Parent(s): 90dfe38

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -68

app.py CHANGED Viewed

@@ -1,109 +1,86 @@
 import torch
 device = "cpu"
-model_id ="ALLaM-AI/ALLaM-7B-Instruct-preview"
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     torch_dtype="auto",
     trust_remote_code=True,
 )
-tokenizer = AutoTokenizer.from_pretrained("ALLaM-AI/ALLaM-7B-Instruct-preview")
-messages = [
-    {"role": "user", "content": "write a long story that takes 3 min to read"}
-]
-generator = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    return_full_text=False,
-    max_new_tokens=500,
-    do_sample=False
 )
-from kokoro import KPipeline
-from IPython.display import display, Audio
-import soundfile as sf
-pipeline = KPipeline(lang_code='b', model=False)
-import numpy as np
 def Generate_audio(text, voice='bm_lewis', speed=1):
     pipeline = KPipeline(lang_code='b')
     generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
     full_audio = []
     for _, _, audio in generator:
         full_audio.extend(audio)
     full_audio = np.array(full_audio)
     return full_audio, 24000
-from transformers import pipeline as transformers_pipeline
-captionImage = transformers_pipeline("image-to-text",
-                                 model="Salesforce/blip-image-captioning-large")
-def Image_Caption(image):
-    caption = captionImage(image)
-    caption = caption[0]['generated_text']
-    return caption
-def Generate_story(textAbout):
-  storyAbout =  {"role": "user", "content": f'write a long story about {textAbout} that takes 3 min to read'},
-  story = generator(storyAbout)
-  story = story[0]['generated_text']
-  story = story.replace('\n', ' ').replace('arafed', ' ')
-  return story
 def Mustalhim(image):
-  caption = Image_Caption(image)
-  story = Generate_story(caption)
-  audio = Generate_audio(story)
-  return audio
 def gradio_interface(image):
     audio_waveform, sampling_rate = Mustalhim(image)
     audio_file = "output_audio.wav"
     sf.write(audio_file, audio_waveform, sampling_rate)
     return audio_file
 example_image = "Example.PNG"
 app = gr.Interface(
-    fn=gradio_interface,
-    inputs=gr.Image(type="pil"),
-    outputs=gr.Audio(type="filepath"),
     title="Image to Audio Story",
     description="Upload an image, and the app will generate a story and convert it to audio.",
     examples=[[example_image]]
 )
 # Launch the app
-app.launch()

 import torch
+from transformers import AutoModelForCausalLM, LlamaTokenizer, pipeline as transformers_pipeline
+from kokoro import KPipeline
+import soundfile as sf
+import numpy as np
+import gradio as gr
+# Initialize the image-to-text pipeline
+captionImage = transformers_pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
+# Initialize the text-generation pipeline
 device = "cpu"
+model_id = "ALLaM-AI/ALLaM-7B-Instruct-preview"
+# Load the model
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     torch_dtype="auto",
     trust_remote_code=True,
 )
+# Use LlamaTokenizer for compatibility
+tokenizer = LlamaTokenizer.from_pretrained(model_id)
+# Initialize the text-generation pipeline
+generator = transformers_pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    return_full_text=False,
+    max_new_tokens=500,
+    do_sample=False,
 )
+# Function to generate caption
+def Image_Caption(image):
+    caption = captionImage(image)
+    return caption[0]['generated_text']
+# Function to generate a story
+def Generate_story(textAbout):
+    storyAbout = {"role": "user", "content": f'write a long story about {textAbout} that takes 3 min to read'}
+    story = generator(storyAbout)
+    story = story[0]['generated_text']
+    story = story.replace('\n', ' ').replace('arafed', ' ')
+    return story
+# Function to generate audio
 def Generate_audio(text, voice='bm_lewis', speed=1):
     pipeline = KPipeline(lang_code='b')
     generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
     full_audio = []
     for _, _, audio in generator:
         full_audio.extend(audio)
     full_audio = np.array(full_audio)
     return full_audio, 24000
+# Main function to process the image and generate audio
 def Mustalhim(image):
+    caption = Image_Caption(image)
+    story = Generate_story(caption)
+    audio = Generate_audio(story)
+    return audio
+# Gradio interface
 def gradio_interface(image):
     audio_waveform, sampling_rate = Mustalhim(image)
     audio_file = "output_audio.wav"
     sf.write(audio_file, audio_waveform, sampling_rate)
     return audio_file
+# Path to the example image
 example_image = "Example.PNG"
+# Create the Gradio app
 app = gr.Interface(
+    fn=gradio_interface,
+    inputs=gr.Image(type="pil"),
+    outputs=gr.Audio(type="filepath"),
     title="Image to Audio Story",
     description="Upload an image, and the app will generate a story and convert it to audio.",
     examples=[[example_image]]
 )
 # Launch the app
+app.launch()