khalednabawi11's picture
Update app.py
ec88703 verified
# import gradio as gr
# from transformers import BlipProcessor, BlipForConditionalGeneration
# from PIL import Image
# import torch
# # Load model and processor from your Hugging Face repo
# model_id = "khalednabawi11/blip-roco-model"
# processor = BlipProcessor.from_pretrained(model_id)
# model = BlipForConditionalGeneration.from_pretrained(model_id)
# model.eval()
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)
# def generate_caption(image):
# # Preprocess
# inputs = processor(image, return_tensors="pt").to(device)
# # Generate caption
# with torch.no_grad():
# output = model.generate(**inputs, max_new_tokens=250, num_beams=5)
# # Decode
# caption = processor.decode(output[0], skip_special_tokens=True)
# return caption
# # def generate_caption(image):
# # prompt = "Radiology report:"
# # inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)
# # output = model.generate(
# # **inputs,
# # max_length=250,
# # num_beams=3,
# # repetition_penalty=1.2,
# # length_penalty=0.0,
# # early_stopping=True,
# # # truncation=True
# # )
# # caption = processor.batch_decode(output, skip_special_tokens=True)[0]
# # return caption.strip()
# # Gradio UI
# demo = gr.Interface(
# fn=generate_caption,
# inputs=gr.Image(type="pil", label="Upload an Image"),
# outputs=gr.Textbox(label="Generated Caption"),
# title="BLIP Medical Caption Generator",
# description="Upload an image and get a caption generated by your fine-tuned BLIP model.",
# )
# if __name__ == "__main__":
# demo.launch()
# import os
# import gradio as gr
# from transformers import AutoProcessor, AutoModelForVision2Seq, AutoModelForImageTextToText
# from PIL import Image
# import torch
# from huggingface_hub import login
# hf_token = os.getenv("hf_token")
# login(token=hf_token)
# processor = AutoProcessor.from_pretrained("google/medgemma-4b-it")
# model = AutoModelForImageTextToText.from_pretrained("google/medgemma-4b-it", device_map = "cpu")
# processor = AutoProcessor.from_pretrained("google/gemma-3n-E4B-it-litert-preview")
# model = AutoModelForImageTextToText.from_pretrained("google/gemma-3n-E4B-it-litert-preview", device_map = "cpu")
# model.eval()
# # Inference function
# def generate_caption(image, prompt):
# inputs = processor(images=image, text=prompt, return_tensors="pt")
# with torch.no_grad():
# outputs = model.generate(
# **inputs,
# max_new_tokens=256,
# num_beams=4,
# early_stopping=True
# )
# caption = processor.decode(outputs[0], skip_special_tokens=True)
# return caption.strip()
# # Gradio UI
# demo = gr.Interface(
# fn=generate_caption,
# inputs=[
# gr.Image(type="pil", label="Upload Medical Image"),
# gr.Textbox(label="Prompt", value="Radiology report:")
# ],
# outputs=gr.Textbox(label="Generated Caption"),
# title="Medical Scan Report Generator",
# description="Upload a medical image and enter a prompt (e.g. 'Radiology report:') to generate a diagnostic caption.",
# )
# if __name__ == "__main__":
# demo.launch()
import os
import torch
from transformers import pipeline
from PIL import Image
import gradio as gr
from huggingface_hub import login
hf_token = os.getenv("hf_token")
login(token=hf_token)
# model_id = "google/gemma-3n-E4B-it-litert-preview"
model_id = "google/medgemma-4b-it"
# Load the MedGemma pipeline
pipe = pipeline(
"image-text-to-text",
model=model_id,
torch_dtype=torch.bfloat16,
device="cuda" if torch.cuda.is_available() else "cpu",
)
# Inference function
def analyze_scan(image):
messages = [
{
"role": "system",
"content": [{"type": "text", "text": "You are an expert radiologist."}]
},
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this Medical Scan Image Giving a full detailed report"},
{"type": "image", "image": image},
]
}
]
output = pipe(text=messages, max_new_tokens=200)
return output[0]["generated_text"][-1]["content"]
# Gradio Interface
demo = gr.Interface(
fn=analyze_scan,
inputs=gr.Image(type="pil", label="Upload Medical Scan"),
outputs=gr.Textbox(label="Scanalyze Medical Scan Report"),
title="Medical Scan Analyzer (MedGemma)",
description="Upload a Medical Scan image to get an AI-generated diagnostic report using Google's MedGemma model.",
allow_flagging="never",
)
if __name__ == "__main__":
demo.launch()