# import gradio as gr # from transformers import BlipProcessor, BlipForConditionalGeneration # from PIL import Image # import torch # # Load model and processor from your Hugging Face repo # model_id = "khalednabawi11/blip-roco-model" # processor = BlipProcessor.from_pretrained(model_id) # model = BlipForConditionalGeneration.from_pretrained(model_id) # model.eval() # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # model.to(device) # def generate_caption(image): # # Preprocess # inputs = processor(image, return_tensors="pt").to(device) # # Generate caption # with torch.no_grad(): # output = model.generate(**inputs, max_new_tokens=250, num_beams=5) # # Decode # caption = processor.decode(output[0], skip_special_tokens=True) # return caption # # def generate_caption(image): # # prompt = "Radiology report:" # # inputs = processor(images=image, text=prompt, return_tensors="pt").to(device) # # output = model.generate( # # **inputs, # # max_length=250, # # num_beams=3, # # repetition_penalty=1.2, # # length_penalty=0.0, # # early_stopping=True, # # # truncation=True # # ) # # caption = processor.batch_decode(output, skip_special_tokens=True)[0] # # return caption.strip() # # Gradio UI # demo = gr.Interface( # fn=generate_caption, # inputs=gr.Image(type="pil", label="Upload an Image"), # outputs=gr.Textbox(label="Generated Caption"), # title="BLIP Medical Caption Generator", # description="Upload an image and get a caption generated by your fine-tuned BLIP model.", # ) # if __name__ == "__main__": # demo.launch() # import os # import gradio as gr # from transformers import AutoProcessor, AutoModelForVision2Seq, AutoModelForImageTextToText # from PIL import Image # import torch # from huggingface_hub import login # hf_token = os.getenv("hf_token") # login(token=hf_token) # processor = AutoProcessor.from_pretrained("google/medgemma-4b-it") # model = AutoModelForImageTextToText.from_pretrained("google/medgemma-4b-it", device_map = "cpu") # processor = AutoProcessor.from_pretrained("google/gemma-3n-E4B-it-litert-preview") # model = AutoModelForImageTextToText.from_pretrained("google/gemma-3n-E4B-it-litert-preview", device_map = "cpu") # model.eval() # # Inference function # def generate_caption(image, prompt): # inputs = processor(images=image, text=prompt, return_tensors="pt") # with torch.no_grad(): # outputs = model.generate( # **inputs, # max_new_tokens=256, # num_beams=4, # early_stopping=True # ) # caption = processor.decode(outputs[0], skip_special_tokens=True) # return caption.strip() # # Gradio UI # demo = gr.Interface( # fn=generate_caption, # inputs=[ # gr.Image(type="pil", label="Upload Medical Image"), # gr.Textbox(label="Prompt", value="Radiology report:") # ], # outputs=gr.Textbox(label="Generated Caption"), # title="Medical Scan Report Generator", # description="Upload a medical image and enter a prompt (e.g. 'Radiology report:') to generate a diagnostic caption.", # ) # if __name__ == "__main__": # demo.launch() import os import torch from transformers import pipeline from PIL import Image import gradio as gr from huggingface_hub import login hf_token = os.getenv("hf_token") login(token=hf_token) # model_id = "google/gemma-3n-E4B-it-litert-preview" model_id = "google/medgemma-4b-it" # Load the MedGemma pipeline pipe = pipeline( "image-text-to-text", model=model_id, torch_dtype=torch.bfloat16, device="cuda" if torch.cuda.is_available() else "cpu", ) # Inference function def analyze_scan(image): messages = [ { "role": "system", "content": [{"type": "text", "text": "You are an expert radiologist."}] }, { "role": "user", "content": [ {"type": "text", "text": "Describe this Medical Scan Image Giving a full detailed report"}, {"type": "image", "image": image}, ] } ] output = pipe(text=messages, max_new_tokens=200) return output[0]["generated_text"][-1]["content"] # Gradio Interface demo = gr.Interface( fn=analyze_scan, inputs=gr.Image(type="pil", label="Upload Medical Scan"), outputs=gr.Textbox(label="Scanalyze Medical Scan Report"), title="Medical Scan Analyzer (MedGemma)", description="Upload a Medical Scan image to get an AI-generated diagnostic report using Google's MedGemma model.", allow_flagging="never", ) if __name__ == "__main__": demo.launch()