Spaces:

NouRed
/

BioMed-LLaMa-3

Build error

App Files Files Community

NouRed commited on Apr 22, 2024

Commit

e7209b2

verified ·

1 Parent(s): ca382da

Create app.py

Browse files

Files changed (1) hide show

app.py +165 -0

app.py ADDED Viewed

	@@ -0,0 +1,165 @@

+# -*- coding: utf-8 -*- Nour Eddine Zekaoui et al.
+import torch
+import gradio as gr
+from peft import PeftModel
+from transformers import (
+    AutoTokenizer,
+    BitsAndBytesConfig,
+    AutoModelForCausalLM)
+def generate_prompt(instruction, input=None):
+    if input:
+        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.  # noqa: E501
+### Instruction:
+{instruction}
+### Input:
+{input}
+### Response:
+"""
+    else:
+        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.  # noqa: E501
+### Instruction:
+{instruction}
+### Response:
+"""
+based_model_path = "meta-llama/Meta-Llama-3-8B"
+lora_weights = "NouRed/BioMed-Tuned-Llama-3-8b"
+load_in_4bit=True
+bnb_4bit_use_double_quant=True
+bnb_4bit_quant_type="nf4"
+bnb_4bit_compute_dtype=torch.bfloat16
+device = torch.device("cuda" if torch.cuda.is_available() else "CPU")
+tokenizer = AutoTokenizer.from_pretrained(
+    based_model_path,
+    )
+tokenizer.padding_side = 'right'
+tokenizer.pad_token = tokenizer.eos_token
+tokenizer.add_eos_token = True
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=load_in_4bit,
+    bnb_4bit_use_double_quant=bnb_4bit_use_double_quant,
+    bnb_4bit_quant_type=bnb_4bit_quant_type,
+    bnb_4bit_compute_dtype=bnb_4bit_compute_dtype
+)
+base_model = AutoModelForCausalLM.from_pretrained(
+    based_model_path,
+    device_map="auto",
+    attn_implementation="flash_attention_2", # I have an A100 GPU with 40GB of RAM 😎
+    quantization_config=quantization_config,
+    )
+model = PeftModel.from_pretrained(
+    base_model,
+    lora_weights,
+    torch_dtype=torch.float16,
+    )
+def generate(
+    instruction,
+    input=None,
+    temperature=0.1,
+    top_p=0.9,
+    top_k=40,
+    num_beams=4,
+    max_new_tokens=128,
+    do_sample=True,
+    **kwargs):
+  prompt = generate_prompt(instruction, input)
+  inputs = tokenizer(prompt, return_tensors="pt").to(device)
+  with torch.no_grad():
+    generated_ids = model.generate(
+      **inputs,
+      top_p=top_p,
+      top_k=top_k,
+      do_sample=do_sample,
+      max_new_tokens=max_new_tokens,
+    )
+  output = tokenizer.decode(
+      generated_ids[0],
+      skip_special_tokens=True,
+      clean_up_tokenization_spaces=True
+      )
+  response = output.split("### Response:")[1].strip()
+  return response
+description = """
+<div style="justify-content: center; text-align: center;">
+    <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
+    <h2>
+        <p> BioMed-LLaMa-3: Effecient Intruction Fine-Tuning in Biomedical Language</p>
+    </h2>
+    </div>
+    <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
+        <a href="https://huggingface.co/NouRed/BioMed-Tuned-Llama-3-8b" target="_blank"><img src="https://img.shields.io/badge/🤗_Hugging_Face-BioMedLLaMa3-orange" alt="HF HUB"></a> &nbsp;&nbsp;
+        <a href="https://colab.research.google.com/drive/1PDa8b5TqpAYxDVlF0Elv32KOM2kFaXJh" target="_blank"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Inference Notebook"></a>
+    </div>
+</div>
+"""
+gr.Interface(
+    fn=generate,
+    inputs=[
+        gr.components.Textbox(
+            lines=2,
+            label="Instruction",
+            placeholder="Tell me about Covid-19?",
+        ),
+        gr.components.Textbox(lines=2, label="Input", placeholder="none"),
+        gr.components.Slider(
+            minimum=0, maximum=1, value=0.1, label="Temperature"
+        ),
+        gr.components.Slider(
+            minimum=0, maximum=1, value=0.9, label="Top p"
+        ),
+        gr.components.Slider(
+            minimum=0, maximum=100, step=1, value=40, label="Top k"
+        ),
+        gr.components.Slider(
+            minimum=1, maximum=4, step=1, value=4, label="Beams"
+        ),
+        gr.components.Slider(
+            minimum=1, maximum=2000, step=1, value=128, label="Max tokens"
+        ),
+        gr.components.Checkbox(
+            value=True, label="Do Sample", info="Do you want to use sampling during text generation?"
+        ),
+    ],
+    outputs=[
+        gr.components.Textbox(
+            lines=5,
+            label="Output",
+        )
+    ],
+    examples=[
+        ["Suggest treatment for pneumonia", "", 0.1, 0.9, 40, 4, 128, True],
+        ["I have a sore throat, slight cough, tiredness. should i get tested fro covid 19?", "", 0.1, 0.9, 40, 4, 128, True],
+        ["Husband of this patient asked me how to treat premature ejaculation and how to increase her libido.", "", 0.1, 0.9, 40, 4, 128, True],
+    ],
+    theme="soft",
+    description=description,  # noqa: E501
+).launch()