markredito's picture
fixed token reqs
5bd469b verified
raw
history blame
2.2 kB
import os
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
# Read your Hugging Face token from Space Secrets
HF_TOKEN = os.environ.get("HF_TOKEN")
# Hugging Face model identifiers
BASE_MODEL = "google/gemma-3-1b-it"
LORA_ADAPTER = "your-username/your-lora-repo" # πŸ” Replace this with your adapter repo
# Load base model with token (required for gated models)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, token=HF_TOKEN)
# Detect if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.bfloat16 if device == "cuda" else torch.float32
model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
device_map="auto" if device == "cuda" else None,
torch_dtype=dtype,
token=HF_TOKEN
)
model = PeftModel.from_pretrained(
model,
LORA_ADAPTER,
token=HF_TOKEN
)
# Pad token fallback
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
def generate_response(user_input):
prompt = (
"<start_of_turn>user\n"
f"{user_input.strip()}\n"
"<end_of_turn>\n"
"<start_of_turn>model\n"
)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=200,
do_sample=True,
temperature=0.7,
top_p=0.9,
top_k=50,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
)
# Decode and clean output
response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
response = response.split("<end_of_turn>")[0].replace("model\n", "").strip()
return response
# Gradio UI
gr.Interface(
fn=generate_response,
inputs=gr.Textbox(label="Enter your prompt", placeholder="E.g. Describe a universe made of sound..."),
outputs=gr.Textbox(label="Model's response"),
title="Gemma LoRA: Abstract Thought Generator",
description="LoRA fine-tuned `gemma-3-1b-it` on poetic/philosophical prompts. Run your own abstract experiments.",
theme="soft"
).launch()