Spaces:

NouRed
/

BioMed-LLaMa-3

Build error

NouRed commited on May 29, 2024

Commit

d32201f

verified ·

1 Parent(s): ca2100e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -36,51 +36,44 @@ def generate_prompt(instruction, input=None):
 ### Response:
 """
-@spaces.GPU
-def models():
-    based_model_path = "meta-llama/Meta-Llama-3-8B"
-    lora_weights = "NouRed/BioMed-Tuned-Llama-3-8b"
-    load_in_4bit=True
-    bnb_4bit_use_double_quant=True
-    bnb_4bit_quant_type="nf4"
-    bnb_4bit_compute_dtype=torch.bfloat16
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    tokenizer = AutoTokenizer.from_pretrained(
-        based_model_path,
-        )
-    tokenizer.padding_side = 'right'
-    tokenizer.pad_token = tokenizer.eos_token
-    tokenizer.add_eos_token = True
-    quantization_config = BitsAndBytesConfig(
-        load_in_4bit=load_in_4bit,
-        bnb_4bit_use_double_quant=bnb_4bit_use_double_quant,
-        bnb_4bit_quant_type=bnb_4bit_quant_type,
-        bnb_4bit_compute_dtype=bnb_4bit_compute_dtype
     )
-    base_model = AutoModelForCausalLM.from_pretrained(
-        based_model_path,
-        device_map="auto",
-        attn_implementation="flash_attention_2", # I have an A100 GPU with 40GB of RAM 😎
-        quantization_config=quantization_config,
-        )
-    model = PeftModel.from_pretrained(
-        base_model,
-        lora_weights,
-        torch_dtype=torch.float16,
-        )
-    return model, tokenizer
-model, tokenizer = models()
 @spaces.GPU

 ### Response:
 """
+based_model_path = "meta-llama/Meta-Llama-3-8B"
+lora_weights = "NouRed/BioMed-Tuned-Llama-3-8b"
+load_in_4bit=True
+bnb_4bit_use_double_quant=True
+bnb_4bit_quant_type="nf4"
+bnb_4bit_compute_dtype=torch.bfloat16
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+tokenizer = AutoTokenizer.from_pretrained(
+    based_model_path,
     )
+tokenizer.padding_side = 'right'
+tokenizer.pad_token = tokenizer.eos_token
+tokenizer.add_eos_token = True
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=load_in_4bit,
+    bnb_4bit_use_double_quant=bnb_4bit_use_double_quant,
+    bnb_4bit_quant_type=bnb_4bit_quant_type,
+    bnb_4bit_compute_dtype=bnb_4bit_compute_dtype
+)
+base_model = AutoModelForCausalLM.from_pretrained(
+    based_model_path,
+    device_map="auto",
+    attn_implementation="flash_attention_2", # I have an A100 GPU with 40GB of RAM 😎
+    quantization_config=quantization_config,
+    )
+model = PeftModel.from_pretrained(
+    base_model,
+    lora_weights,
+    torch_dtype=torch.float16,
+    )
 @spaces.GPU