markredito commited on
Commit
5bd469b
·
verified ·
1 Parent(s): ab4349c

fixed token reqs

Browse files
Files changed (1) hide show
  1. app.py +38 -14
app.py CHANGED
@@ -1,18 +1,37 @@
 
1
  import torch
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  from peft import PeftModel
4
- import gradio as gr
5
 
6
- # Model identifiers
 
 
 
7
  BASE_MODEL = "google/gemma-3-1b-it"
8
- LORA_ADAPTER = "markredito/gemma-pip-finetuned-v2" # replace this!
 
 
 
 
 
 
 
9
 
10
- # Load base + adapter
11
- tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, token=True)
12
- model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, torch_dtype=torch.bfloat16, device_map="auto", token=True)
13
- model = PeftModel.from_pretrained(model, LORA_ADAPTER, token=True)
 
 
14
 
15
- # Handle missing pad token
 
 
 
 
 
 
16
  if tokenizer.pad_token is None:
17
  tokenizer.pad_token = tokenizer.eos_token
18
  tokenizer.padding_side = "right"
@@ -25,6 +44,7 @@ def generate_response(user_input):
25
  "<start_of_turn>model\n"
26
  )
27
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
28
  outputs = model.generate(
29
  **inputs,
30
  max_new_tokens=200,
@@ -32,18 +52,22 @@ def generate_response(user_input):
32
  temperature=0.7,
33
  top_p=0.9,
34
  top_k=50,
35
- eos_token_id=tokenizer.eos_token_id,
36
  pad_token_id=tokenizer.pad_token_id,
 
37
  )
 
 
38
  response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
39
- response = response.split("<end_of_turn>")[0].strip()
 
40
  return response
41
 
42
  # Gradio UI
43
  gr.Interface(
44
  fn=generate_response,
45
- inputs=gr.Textbox(label="Enter your prompt"),
46
- outputs=gr.Textbox(label="Model response"),
47
- title="Gemma LoRA: Philosophical Inference",
48
- description="LoRA fine-tuned Gemma model generating poetic/abstract outputs."
 
49
  ).launch()
 
1
+ import os
2
  import torch
3
+ import gradio as gr
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  from peft import PeftModel
 
6
 
7
+ # Read your Hugging Face token from Space Secrets
8
+ HF_TOKEN = os.environ.get("HF_TOKEN")
9
+
10
+ # Hugging Face model identifiers
11
  BASE_MODEL = "google/gemma-3-1b-it"
12
+ LORA_ADAPTER = "your-username/your-lora-repo" # 🔁 Replace this with your adapter repo
13
+
14
+ # Load base model with token (required for gated models)
15
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, token=HF_TOKEN)
16
+
17
+ # Detect if GPU is available
18
+ device = "cuda" if torch.cuda.is_available() else "cpu"
19
+ dtype = torch.bfloat16 if device == "cuda" else torch.float32
20
 
21
+ model = AutoModelForCausalLM.from_pretrained(
22
+ BASE_MODEL,
23
+ device_map="auto" if device == "cuda" else None,
24
+ torch_dtype=dtype,
25
+ token=HF_TOKEN
26
+ )
27
 
28
+ model = PeftModel.from_pretrained(
29
+ model,
30
+ LORA_ADAPTER,
31
+ token=HF_TOKEN
32
+ )
33
+
34
+ # Pad token fallback
35
  if tokenizer.pad_token is None:
36
  tokenizer.pad_token = tokenizer.eos_token
37
  tokenizer.padding_side = "right"
 
44
  "<start_of_turn>model\n"
45
  )
46
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
47
+
48
  outputs = model.generate(
49
  **inputs,
50
  max_new_tokens=200,
 
52
  temperature=0.7,
53
  top_p=0.9,
54
  top_k=50,
 
55
  pad_token_id=tokenizer.pad_token_id,
56
+ eos_token_id=tokenizer.eos_token_id,
57
  )
58
+
59
+ # Decode and clean output
60
  response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
61
+ response = response.split("<end_of_turn>")[0].replace("model\n", "").strip()
62
+
63
  return response
64
 
65
  # Gradio UI
66
  gr.Interface(
67
  fn=generate_response,
68
+ inputs=gr.Textbox(label="Enter your prompt", placeholder="E.g. Describe a universe made of sound..."),
69
+ outputs=gr.Textbox(label="Model's response"),
70
+ title="Gemma LoRA: Abstract Thought Generator",
71
+ description="LoRA fine-tuned `gemma-3-1b-it` on poetic/philosophical prompts. Run your own abstract experiments.",
72
+ theme="soft"
73
  ).launch()