# import gradio as gr
# from transformers import BlipProcessor, BlipForConditionalGeneration
# from PIL import Image
# import torch

# # Load model and processor from your Hugging Face repo
# model_id = "khalednabawi11/blip-roco-model"

# processor = BlipProcessor.from_pretrained(model_id)
# model = BlipForConditionalGeneration.from_pretrained(model_id)
# model.eval()

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)

# def generate_caption(image):
#     # Preprocess
#     inputs = processor(image, return_tensors="pt").to(device)

#     # Generate caption
#     with torch.no_grad():
#         output = model.generate(**inputs, max_new_tokens=250, num_beams=5)

#     # Decode
#     caption = processor.decode(output[0], skip_special_tokens=True)
#     return caption

# # def generate_caption(image):
# #     prompt = "Radiology report:"
# #     inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)

# #     output = model.generate(
# #         **inputs,
# #         max_length=250,
# #         num_beams=3,
# #         repetition_penalty=1.2,
# #         length_penalty=0.0,
# #         early_stopping=True,
# #         # truncation=True
# #     )

# #     caption = processor.batch_decode(output, skip_special_tokens=True)[0]

# #     return caption.strip()


# # Gradio UI
# demo = gr.Interface(
#     fn=generate_caption,
#     inputs=gr.Image(type="pil", label="Upload an Image"),
#     outputs=gr.Textbox(label="Generated Caption"),
#     title="BLIP Medical Caption Generator",
#     description="Upload an image and get a caption generated by your fine-tuned BLIP model.",
# )

# if __name__ == "__main__":
#     demo.launch()


# import os
# import gradio as gr
# from transformers import AutoProcessor, AutoModelForVision2Seq, AutoModelForImageTextToText
# from PIL import Image
# import torch
# from huggingface_hub import login
# hf_token = os.getenv("hf_token")
# login(token=hf_token)

# processor = AutoProcessor.from_pretrained("google/medgemma-4b-it")
# model = AutoModelForImageTextToText.from_pretrained("google/medgemma-4b-it", device_map = "cpu")


# processor = AutoProcessor.from_pretrained("google/gemma-3n-E4B-it-litert-preview")
# model = AutoModelForImageTextToText.from_pretrained("google/gemma-3n-E4B-it-litert-preview", device_map = "cpu")


# model.eval()

# # Inference function
# def generate_caption(image, prompt):
#     inputs = processor(images=image, text=prompt, return_tensors="pt")

#     with torch.no_grad():
#         outputs = model.generate(
#             **inputs,
#             max_new_tokens=256,
#             num_beams=4,
#             early_stopping=True
#         )

#     caption = processor.decode(outputs[0], skip_special_tokens=True)
#     return caption.strip()

# # Gradio UI
# demo = gr.Interface(
#     fn=generate_caption,
#     inputs=[
#         gr.Image(type="pil", label="Upload Medical Image"),
#         gr.Textbox(label="Prompt", value="Radiology report:")
#     ],
#     outputs=gr.Textbox(label="Generated Caption"),
#     title="Medical Scan Report Generator",
#     description="Upload a medical image and enter a prompt (e.g. 'Radiology report:') to generate a diagnostic caption.",
# )

# if __name__ == "__main__":
#     demo.launch()


import os
import torch
from transformers import pipeline
from PIL import Image
import gradio as gr
from huggingface_hub import login
hf_token = os.getenv("hf_token")
login(token=hf_token)


# model_id = "google/gemma-3n-E4B-it-litert-preview"
model_id = "google/medgemma-4b-it"

# Load the MedGemma pipeline
pipe = pipeline(
    "image-text-to-text",
    model=model_id,
    torch_dtype=torch.bfloat16,
    device="cuda" if torch.cuda.is_available() else "cpu",
)

# Inference function
def analyze_scan(image):
    messages = [
        {
            "role": "system",
            "content": [{"type": "text", "text": "You are an expert radiologist."}]
        },
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Describe this Medical Scan Image Giving a full detailed report"},
                {"type": "image", "image": image},
            ]
        }
    ]
    output = pipe(text=messages, max_new_tokens=200)
    return output[0]["generated_text"][-1]["content"]

# Gradio Interface
demo = gr.Interface(
    fn=analyze_scan,
    inputs=gr.Image(type="pil", label="Upload Medical Scan"),
    outputs=gr.Textbox(label="Scanalyze Medical Scan Report"),
    title="Medical Scan Analyzer (MedGemma)",
    description="Upload a Medical Scan image to get an AI-generated diagnostic report using Google's MedGemma model.",
    allow_flagging="never",
)

if __name__ == "__main__":
    demo.launch()