Spaces:

s12144251
/

phi4-medqa-demo

Runtime error

App Files Files Community

s12144251 commited on May 5

Commit

9678f07

verified ·

1 Parent(s): 6d02fb4

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -30

app.py CHANGED Viewed

@@ -1,61 +1,68 @@
-# Install required libraries
-pip install unsloth peft bitsandbytes accelerate transformers
 import subprocess
 import sys
-subprocess.check_call([sys.executable, "-m", "pip", "install", "unsloth", "peft", "bitsandbytes", "accelerate", "transformers"])
-# Import necessary modules
 from transformers import AutoTokenizer
 from unsloth import FastLanguageModel
-# Define the MedQA prompt
-medqa_prompt = """You are a medical QA system. Answer the following medical question clearly and in detail with complete sentences.
-### Question:
 {}
-### Answer:
 """
-# Load the model and tokenizer using unsloth
-model_name = "Vijayendra/Phi4-MedQA"
 model, tokenizer = FastLanguageModel.from_pretrained(
     model_name=model_name,
     max_seq_length=2048,
-    dtype=None,  # Use default precision
-    load_in_4bit=True,  # Enable 4-bit quantization
-    device_map="auto"  # Automatically map model to available devices
 )
-# Enable faster inference
 FastLanguageModel.for_inference(model)
-# Prepare the medical question
-medical_question = "What are the common symptoms of diabetes?"  # Replace with your medical question
 inputs = tokenizer(
     [medqa_prompt.format(medical_question)],
     return_tensors="pt",
     padding=True,
     truncation=True,
     max_length=1024
-).to("cuda")  # Ensure inputs are on the GPU
-# Generate the output
 outputs = model.generate(
     **inputs,
-    max_new_tokens=512,  # Allow for detailed responses
-    use_cache=True  # Speeds up generation
 )
-# Decode and clean the response
 response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-# Extract and print the generated answer
-answer_text = response.split("### Answer:")[1].strip() if "### Answer:" in response else response.strip()
-print(f"Question: {medical_question}")
-print(f"Answer: {answer_text}")

 import subprocess
 import sys
+# تثبيت الحزم المطلوبة
+try:
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "unsloth", "peft", "bitsandbytes", "accelerate", "transformers"])
+except subprocess.CalledProcessError as e:
+    print("فشل في تثبيت الحزم:", e)
+    sys.exit(1)
+# استيراد المكتبات
 from transformers import AutoTokenizer
 from unsloth import FastLanguageModel
+import torch
+# قالب السؤال والإجابة
+medqa_prompt = """أنت نظام للإجابة على الأسئلة الطبية. أجب عن السؤال التالي بشكل واضح ومفصل وبجمل كاملة.
+### السؤال:
 {}
+### الإجابة:
 """
+# تحميل النموذج والمحول tokenizer
+model_name = "Vijayendra/Phi4-MedQA"
 model, tokenizer = FastLanguageModel.from_pretrained(
     model_name=model_name,
     max_seq_length=2048,
+    dtype=None,         # الدقة الافتراضية
+    load_in_4bit=True,  # تحميل بخفة 4 بت
+    device_map="auto"   # يوزع النموذج تلقائياً على الأجهزة المتوفرة (مثل GPU)
 )
+# تفعيل وضع الاستدلال السريع
 FastLanguageModel.for_inference(model)
+# السؤال الطبي
+medical_question = "ما هي الأعراض الشائعة لمرض السكري؟"
+# تجهيز الإدخال
 inputs = tokenizer(
     [medqa_prompt.format(medical_question)],
     return_tensors="pt",
     padding=True,
     truncation=True,
     max_length=1024
+)
+# التأكد من إرسال الإدخال إلى الـ GPU إذا كان متوفر
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+inputs = {k: v.to(device) for k, v in inputs.items()}
+# توليد الإجابة
 outputs = model.generate(
     **inputs,
+    max_new_tokens=512,  # عدد الكلمات الجديدة المسموح بها في الإجابة
+    use_cache=True
 )
+# فك ترميز الإجابة من التوكنز إلى نص
 response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+# استخراج فقط نص الإجابة
+answer_text = response.split("### الإجابة:")[1].strip() if "### الإجابة:" in response else response.strip()
+# طباعة النتيجة
+print(f"\nالسؤال: {medical_question}")
+print(f"الإجابة: {answer_text}")