# import gradio as gr | |
# from transformers import BlipProcessor, BlipForConditionalGeneration | |
# from PIL import Image | |
# import torch | |
# # Load model and processor from your Hugging Face repo | |
# model_id = "khalednabawi11/blip-roco-model" | |
# processor = BlipProcessor.from_pretrained(model_id) | |
# model = BlipForConditionalGeneration.from_pretrained(model_id) | |
# model.eval() | |
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# model.to(device) | |
# def generate_caption(image): | |
# # Preprocess | |
# inputs = processor(image, return_tensors="pt").to(device) | |
# # Generate caption | |
# with torch.no_grad(): | |
# output = model.generate(**inputs, max_new_tokens=250, num_beams=5) | |
# # Decode | |
# caption = processor.decode(output[0], skip_special_tokens=True) | |
# return caption | |
# # def generate_caption(image): | |
# # prompt = "Radiology report:" | |
# # inputs = processor(images=image, text=prompt, return_tensors="pt").to(device) | |
# # output = model.generate( | |
# # **inputs, | |
# # max_length=250, | |
# # num_beams=3, | |
# # repetition_penalty=1.2, | |
# # length_penalty=0.0, | |
# # early_stopping=True, | |
# # # truncation=True | |
# # ) | |
# # caption = processor.batch_decode(output, skip_special_tokens=True)[0] | |
# # return caption.strip() | |
# # Gradio UI | |
# demo = gr.Interface( | |
# fn=generate_caption, | |
# inputs=gr.Image(type="pil", label="Upload an Image"), | |
# outputs=gr.Textbox(label="Generated Caption"), | |
# title="BLIP Medical Caption Generator", | |
# description="Upload an image and get a caption generated by your fine-tuned BLIP model.", | |
# ) | |
# if __name__ == "__main__": | |
# demo.launch() | |
# import os | |
# import gradio as gr | |
# from transformers import AutoProcessor, AutoModelForVision2Seq, AutoModelForImageTextToText | |
# from PIL import Image | |
# import torch | |
# from huggingface_hub import login | |
# hf_token = os.getenv("hf_token") | |
# login(token=hf_token) | |
# processor = AutoProcessor.from_pretrained("google/medgemma-4b-it") | |
# model = AutoModelForImageTextToText.from_pretrained("google/medgemma-4b-it", device_map = "cpu") | |
# processor = AutoProcessor.from_pretrained("google/gemma-3n-E4B-it-litert-preview") | |
# model = AutoModelForImageTextToText.from_pretrained("google/gemma-3n-E4B-it-litert-preview", device_map = "cpu") | |
# model.eval() | |
# # Inference function | |
# def generate_caption(image, prompt): | |
# inputs = processor(images=image, text=prompt, return_tensors="pt") | |
# with torch.no_grad(): | |
# outputs = model.generate( | |
# **inputs, | |
# max_new_tokens=256, | |
# num_beams=4, | |
# early_stopping=True | |
# ) | |
# caption = processor.decode(outputs[0], skip_special_tokens=True) | |
# return caption.strip() | |
# # Gradio UI | |
# demo = gr.Interface( | |
# fn=generate_caption, | |
# inputs=[ | |
# gr.Image(type="pil", label="Upload Medical Image"), | |
# gr.Textbox(label="Prompt", value="Radiology report:") | |
# ], | |
# outputs=gr.Textbox(label="Generated Caption"), | |
# title="Medical Scan Report Generator", | |
# description="Upload a medical image and enter a prompt (e.g. 'Radiology report:') to generate a diagnostic caption.", | |
# ) | |
# if __name__ == "__main__": | |
# demo.launch() | |
import os | |
import torch | |
from transformers import pipeline | |
from PIL import Image | |
import gradio as gr | |
from huggingface_hub import login | |
hf_token = os.getenv("hf_token") | |
login(token=hf_token) | |
# model_id = "google/gemma-3n-E4B-it-litert-preview" | |
model_id = "google/medgemma-4b-it" | |
# Load the MedGemma pipeline | |
pipe = pipeline( | |
"image-text-to-text", | |
model=model_id, | |
torch_dtype=torch.bfloat16, | |
device="cuda" if torch.cuda.is_available() else "cpu", | |
) | |
# Inference function | |
def analyze_scan(image): | |
messages = [ | |
{ | |
"role": "system", | |
"content": [{"type": "text", "text": "You are an expert radiologist."}] | |
}, | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "text", "text": "Describe this Medical Scan Image Giving a full detailed report"}, | |
{"type": "image", "image": image}, | |
] | |
} | |
] | |
output = pipe(text=messages, max_new_tokens=200) | |
return output[0]["generated_text"][-1]["content"] | |
# Gradio Interface | |
demo = gr.Interface( | |
fn=analyze_scan, | |
inputs=gr.Image(type="pil", label="Upload Medical Scan"), | |
outputs=gr.Textbox(label="Scanalyze Medical Scan Report"), | |
title="Medical Scan Analyzer (MedGemma)", | |
description="Upload a Medical Scan image to get an AI-generated diagnostic report using Google's MedGemma model.", | |
allow_flagging="never", | |
) | |
if __name__ == "__main__": | |
demo.launch() | |