s12144251 commited on
Commit
b9ba0f4
·
verified ·
1 Parent(s): 3e24f35

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -54
app.py CHANGED
@@ -1,54 +1,60 @@
1
- # Install required libraries
2
- !pip install unsloth peft bitsandbytes accelerate transformers
3
-
4
- # Import necessary modules
5
- from transformers import AutoTokenizer
6
- from unsloth import FastLanguageModel
7
-
8
- # Define the MedQA prompt
9
- medqa_prompt = """You are a medical QA system. Answer the following medical question clearly and in detail with complete sentences.
10
-
11
- ### Question:
12
- {}
13
-
14
- ### Answer:
15
- """
16
-
17
- # Load the model and tokenizer using unsloth
18
- model_name = "Vijayendra/Phi4-MedQA"
19
- model, tokenizer = FastLanguageModel.from_pretrained(
20
- model_name=model_name,
21
- max_seq_length=2048,
22
- dtype=None, # Use default precision
23
- load_in_4bit=True, # Enable 4-bit quantization
24
- device_map="auto" # Automatically map model to available devices
25
- )
26
-
27
- # Enable faster inference
28
- FastLanguageModel.for_inference(model)
29
-
30
- # Prepare the medical question
31
- medical_question = "What are the common symptoms of diabetes?" # Replace with your medical question
32
- inputs = tokenizer(
33
- [medqa_prompt.format(medical_question)],
34
- return_tensors="pt",
35
- padding=True,
36
- truncation=True,
37
- max_length=1024
38
- ).to("cuda") # Ensure inputs are on the GPU
39
-
40
- # Generate the output
41
- outputs = model.generate(
42
- **inputs,
43
- max_new_tokens=512, # Allow for detailed responses
44
- use_cache=True # Speeds up generation
45
- )
46
-
47
- # Decode and clean the response
48
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
49
-
50
- # Extract and print the generated answer
51
- answer_text = response.split("### Answer:")[1].strip() if "### Answer:" in response else response.strip()
52
-
53
- print(f"Question: {medical_question}")
54
- print(f"Answer: {answer_text}")
 
 
 
 
 
 
 
1
+ # Install required libraries
2
+ pip install unsloth peft bitsandbytes accelerate transformers
3
+
4
+ import subprocess
5
+ import sys
6
+
7
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "unsloth", "peft", "bitsandbytes", "accelerate", "transformers"])
8
+
9
+
10
+ # Import necessary modules
11
+ from transformers import AutoTokenizer
12
+ from unsloth import FastLanguageModel
13
+
14
+ # Define the MedQA prompt
15
+ medqa_prompt = """You are a medical QA system. Answer the following medical question clearly and in detail with complete sentences.
16
+
17
+ ### Question:
18
+ {}
19
+
20
+ ### Answer:
21
+ """
22
+
23
+ # Load the model and tokenizer using unsloth
24
+ model_name = "Vijayendra/Phi4-MedQA"
25
+ model, tokenizer = FastLanguageModel.from_pretrained(
26
+ model_name=model_name,
27
+ max_seq_length=2048,
28
+ dtype=None, # Use default precision
29
+ load_in_4bit=True, # Enable 4-bit quantization
30
+ device_map="auto" # Automatically map model to available devices
31
+ )
32
+
33
+ # Enable faster inference
34
+ FastLanguageModel.for_inference(model)
35
+
36
+ # Prepare the medical question
37
+ medical_question = "What are the common symptoms of diabetes?" # Replace with your medical question
38
+ inputs = tokenizer(
39
+ [medqa_prompt.format(medical_question)],
40
+ return_tensors="pt",
41
+ padding=True,
42
+ truncation=True,
43
+ max_length=1024
44
+ ).to("cuda") # Ensure inputs are on the GPU
45
+
46
+ # Generate the output
47
+ outputs = model.generate(
48
+ **inputs,
49
+ max_new_tokens=512, # Allow for detailed responses
50
+ use_cache=True # Speeds up generation
51
+ )
52
+
53
+ # Decode and clean the response
54
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
55
+
56
+ # Extract and print the generated answer
57
+ answer_text = response.split("### Answer:")[1].strip() if "### Answer:" in response else response.strip()
58
+
59
+ print(f"Question: {medical_question}")
60
+ print(f"Answer: {answer_text}")