Spaces:

Nastarang
/

chatbot

Sleeping

Nastarang commited on 21 days ago

Commit

2fc5937

1 Parent(s): 0f5fea2

requirement file

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,17 +3,17 @@ from transformers import AutoTokenizer
 from auto_gptq import AutoGPTQForCausalLM
 import gradio as gr
-checkpoint = "cortecs/Meta-Llama-3-8B-Instruct-GPTQ-8b"
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(checkpoint, use_fast=True)
-# Load GPTQ model
 model = AutoGPTQForCausalLM.from_quantized(
     checkpoint,
     device="cuda:0" if torch.cuda.is_available() else "cpu",
-    torch_dtype=torch.float16,
 )
 # Function to format prompt + generate response
@@ -24,7 +24,7 @@ def predict(message, history):
     outputs = model.generate(
         **inputs,
         do_sample=True,
-        temperature=0.6,
         top_p=0.9,
         max_new_tokens=256,
         eos_token_id=tokenizer.eos_token_id
@@ -35,4 +35,9 @@ def predict(message, history):
     return response
 # Launch Gradio chatbot
-gr.ChatInterface(predict, title=" LLaMA 3 Chatbot").launch(debug=True)

 from auto_gptq import AutoGPTQForCausalLM
 import gradio as gr
+checkpoint = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(checkpoint, use_fast=True)
+# Load GPTQ model correctly
 model = AutoGPTQForCausalLM.from_quantized(
     checkpoint,
     device="cuda:0" if torch.cuda.is_available() else "cpu",
+    torch_dtype=torch.float32,
+    trust_remote_code=True
 )
 # Function to format prompt + generate response
     outputs = model.generate(
         **inputs,
         do_sample=True,
+        temperature=0.7,
         top_p=0.9,
         max_new_tokens=256,
         eos_token_id=tokenizer.eos_token_id
     return response
 # Launch Gradio chatbot
+gr.ChatInterface(predict).launch(debug=True)
+demo.launch()