Futuresony commited on
Commit
ed0ccfa
·
verified ·
1 Parent(s): c3a8689

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -1
app.py CHANGED
@@ -5,7 +5,58 @@ import torch
5
 
6
  # Use a CPU-compatible base model (replace this with your actual full-precision model)
7
  base_model_id = "unsloth/gemma-2b" # Replace with real CPU-compatible model
8
- lora_model_id = "Futuresony/CCM-AI"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  # Load the base model on CPU
11
  base_model = AutoModelForCausalLM.from_pretrained(
 
5
 
6
  # Use a CPU-compatible base model (replace this with your actual full-precision model)
7
  base_model_id = "unsloth/gemma-2b" # Replace with real CPU-compatible model
8
+ lora_model_id = "import gradio as gr
9
+ from huggingface_hub import InferenceClient
10
+ import os
11
+
12
+ # 🔹 Hugging Face Credentials
13
+ HF_REPO = "Futuresony/gemma2-9b-lora-alpaca"
14
+ HF_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
15
+
16
+ client = InferenceClient(HF_REPO, token=HF_TOKEN)
17
+
18
+ def format_alpaca_prompt(user_input, system_prompt, history):
19
+ """Formats input in Alpaca/LLaMA style"""
20
+ history_str = "\n".join([f"### Instruction:\n{h[0]}\n### Response:\n{h[1]}" for h in history])
21
+ prompt = f"""{system_prompt}
22
+ {history_str}
23
+
24
+ ### Instruction:
25
+ {user_input}
26
+
27
+ ### Response:
28
+ """
29
+ return prompt
30
+
31
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
32
+ formatted_prompt = format_alpaca_prompt(message, system_message, history)
33
+
34
+ response = client.text_generation(
35
+ formatted_prompt,
36
+ max_new_tokens=max_tokens,
37
+ temperature=temperature,
38
+ top_p=top_p,
39
+ )
40
+
41
+ # ✅ Extract only the response
42
+ cleaned_response = response.split("### Response:")[-1].strip()
43
+
44
+ history.append((message, cleaned_response)) # ✅ Update history with the new message and response
45
+
46
+ yield cleaned_response # ✅ Output only the answer
47
+
48
+ demo = gr.ChatInterface(
49
+ respond,
50
+ additional_inputs=[
51
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
52
+ gr.Slider(minimum=1, maximum=250, value=128, step=1, label="Max new tokens"),
53
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.9, step=0.1, label="Temperature"),
54
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.99, step=0.01, label="Top-p (nucleus sampling)"),
55
+ ],
56
+ )
57
+
58
+ if __name__ == "__main__":
59
+ demo.launch()"
60
 
61
  # Load the base model on CPU
62
  base_model = AutoModelForCausalLM.from_pretrained(