Balaramkm commited on
Commit
5dfad25
·
verified ·
1 Parent(s): 3a29174

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -17
app.py CHANGED
@@ -1,23 +1,21 @@
1
  import gradio as gr
2
- from llama_cpp import Llama
 
3
 
4
- llm = Llama(
5
- model_path="deepseek-coder-6.7b.Q4_K_M.gguf",
6
- n_ctx=2048,
7
- n_threads=4
 
 
 
 
8
  )
9
 
10
  def chat(prompt):
11
- system_prompt = "You are a helpful coding assistant. Answer precisely."
12
- full_prompt = f"### Instruction:\n{prompt}\n### Response:\n"
13
- output = llm(full_prompt, max_tokens=1024)
14
- return output["choices"][0]["text"]
15
 
16
- gr.Interface(
17
- fn=chat,
18
- inputs="text",
19
- outputs="text",
20
- title="DeepSeek Coder 6.7B",
21
- description="Free ChatGPT-style coding assistant",
22
- theme="soft"
23
- ).launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
 
5
+ model_id = "deepseek-ai/deepseek-coder-6.7b-instruct"
6
+
7
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ model_id,
10
+ device_map="auto",
11
+ torch_dtype=torch.float16,
12
+ trust_remote_code=True
13
  )
14
 
15
  def chat(prompt):
16
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
17
+ outputs = model.generate(**inputs, max_new_tokens=256, do_sample=True)
18
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
19
+ return response
20
 
21
+ gr.Interface(fn=chat, inputs="text", outputs="text", title="DeepSeek Coder 6.7B Chatbot").launch()