Spaces:

abaryan
/

BioXP-0.5b-v2

Running

App Files Files Community

Abaryan commited on 9 days ago

Commit

1f15859

verified ·

1 Parent(s): a437b0a

Update app.py

Browse files

Files changed (1) hide show

app.py +215 -51

app.py CHANGED Viewed

@@ -1,64 +1,228 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("rgb2gbr/GRPO_BioMedmcqa_Qwen2.5-0.5B")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-if __name__ == "__main__":
-    demo.launch()

+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+import torch
+from transformers import AutoModelForMultipleChoice, AutoTokenizer
+import os
+from datasets import load_dataset
+import random
+from typing import Optional, List
 import gradio as gr
+app = FastAPI()
+# Add CORS middleware for Gradio
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Define input models
+class QuestionRequest(BaseModel):
+    question: str
+    options: list[str]  # List of 4 options
+class DatasetQuestion(BaseModel):
+    question: str
+    opa: str
+    opb: str
+    opc: str
+    opd: str
+    cop: Optional[int] = None  # Correct option (0-3)
+    exp: Optional[str] = None  # Explanation if available
+# Global variables
+model = None
+tokenizer = None
+dataset = None
+def load_model():
+    global model, tokenizer, dataset
+    try:
+        # Load your fine-tuned model and tokenizer
+        model_name = os.getenv("BioXP-0.5b", "rgb2gbr/GRPO_BioMedmcqa_Qwen2.5-0.5B")
+        model = AutoModelForMultipleChoice.from_pretrained(model_name)
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        # Load MedMCQA dataset
+        dataset = load_dataset("openlifescienceai/medmcqa")
+        # Move model to GPU if available
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        model = model.to(device)
+        model.eval()
+    except Exception as e:
+        raise Exception(f"Error loading model: {str(e)}")
+def predict_gradio(question: str, option_a: str, option_b: str, option_c: str, option_d: str):
+    """Gradio interface prediction function"""
+    try:
+        options = [option_a, option_b, option_c, option_d]
+        inputs = []
+        for option in options:
+            text = f"{question} {option}"
+            inputs.append(text)
+        encodings = tokenizer(
+            inputs,
+            padding=True,
+            truncation=True,
+            max_length=512,
+            return_tensors="pt"
+        )
+        device = next(model.parameters()).device
+        encodings = {k: v.to(device) for k, v in encodings.items()}
+        with torch.no_grad():
+            outputs = model(**encodings)
+            logits = outputs.logits
+            probabilities = torch.softmax(logits, dim=1)[0].tolist()
+            predicted_class = torch.argmax(logits, dim=1).item()
+        # Format the output for Gradio
+        result = f"Predicted Answer: {options[predicted_class]}\n\n"
+        result += "Confidence Scores:\n"
+        for i, (opt, prob) in enumerate(zip(options, probabilities)):
+            result += f"{opt}: {prob:.2%}\n"
+        return result
+    except Exception as e:
+        return f"Error: {str(e)}"
+def get_random_question():
+    """Get a random question for Gradio interface"""
+    if dataset is None:
+        return "Error: Dataset not loaded", "", "", "", ""
+    index = random.randint(0, len(dataset['train']) - 1)
+    question_data = dataset['train'][index]
+    return (
+        question_data['question'],
+        question_data['opa'],
+        question_data['opb'],
+        question_data['opc'],
+        question_data['opd']
+    )
+# Create Gradio interface
+with gr.Blocks(title="Medical MCQ Predictor") as demo:
+    gr.Markdown("# Medical MCQ Predictor")
+    gr.Markdown("Enter a medical question and its options, or get a random question from MedMCQA dataset.")
+    with gr.Row():
+        with gr.Column():
+            question = gr.Textbox(label="Question", lines=3)
+            option_a = gr.Textbox(label="Option A")
+            option_b = gr.Textbox(label="Option B")
+            option_c = gr.Textbox(label="Option C")
+            option_d = gr.Textbox(label="Option D")
+            with gr.Row():
+                predict_btn = gr.Button("Predict")
+                random_btn = gr.Button("Get Random Question")
+            output = gr.Textbox(label="Prediction", lines=5)
+    predict_btn.click(
+        fn=predict_gradio,
+        inputs=[question, option_a, option_b, option_c, option_d],
+        outputs=output
+    )
+    random_btn.click(
+        fn=get_random_question,
+        inputs=[],
+        outputs=[question, option_a, option_b, option_c, option_d]
+    )
+# Mount Gradio app to FastAPI
+app = gr.mount_gradio_app(app, demo, path="/")
+@app.on_event("startup")
+async def startup_event():
+    load_model()
+@app.get("/dataset/question")
+async def get_dataset_question(index: Optional[int] = None, random_question: bool = False):
+    """Get a question from the MedMCQA dataset"""
+    try:
+        if dataset is None:
+            raise HTTPException(status_code=500, detail="Dataset not loaded")
+        if random_question:
+            index = random.randint(0, len(dataset['train']) - 1)
+        elif index is None:
+            raise HTTPException(status_code=400, detail="Either index or random_question must be provided")
+        question_data = dataset['train'][index]
+        question = DatasetQuestion(
+            question=question_data['question'],
+            opa=question_data['opa'],
+            opb=question_data['opb'],
+            opc=question_data['opc'],
+            opd=question_data['opd'],
+            cop=question_data['cop'] if 'cop' in question_data else None,
+            exp=question_data['exp'] if 'exp' in question_data else None
+        )
+        return question
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/predict")
+async def predict(request: QuestionRequest):
+    if len(request.options) != 4:
+        raise HTTPException(status_code=400, detail="Exactly 4 options are required")
+    try:
+        inputs = []
+        for option in request.options:
+            text = f"{request.question} {option}"
+            inputs.append(text)
+        encodings = tokenizer(
+            inputs,
+            padding=True,
+            truncation=True,
+            max_length=512,
+            return_tensors="pt"
+        )
+        device = next(model.parameters()).device
+        encodings = {k: v.to(device) for k, v in encodings.items()}
+        with torch.no_grad():
+            outputs = model(**encodings)
+            logits = outputs.logits
+            probabilities = torch.softmax(logits, dim=1)[0].tolist()
+            predicted_class = torch.argmax(logits, dim=1).item()
+        response = {
+            "predicted_option": request.options[predicted_class],
+            "option_index": predicted_class,
+            "confidence": probabilities[predicted_class],
+            "probabilities": {
+                f"option_{i}": prob for i, prob in enumerate(probabilities)
+            }
+        }
+        return response
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/health")
+async def health_check():
+    return {
+        "status": "healthy",
+        "model_loaded": model is not None,
+        "dataset_loaded": dataset is not None
+    }