Spaces:
Sleeping
Sleeping
import os | |
import torch | |
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
from peft import PeftModel | |
# Read your Hugging Face token from Space Secrets | |
HF_TOKEN = os.environ.get("HF_TOKEN") | |
# Hugging Face model identifiers | |
BASE_MODEL = "google/gemma-3-1b-it" | |
LORA_ADAPTER = "your-username/your-lora-repo" # π Replace this with your adapter repo | |
# Load base model with token (required for gated models) | |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, token=HF_TOKEN) | |
# Detect if GPU is available | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
dtype = torch.bfloat16 if device == "cuda" else torch.float32 | |
model = AutoModelForCausalLM.from_pretrained( | |
BASE_MODEL, | |
device_map="auto" if device == "cuda" else None, | |
torch_dtype=dtype, | |
token=HF_TOKEN | |
) | |
model = PeftModel.from_pretrained( | |
model, | |
LORA_ADAPTER, | |
token=HF_TOKEN | |
) | |
# Pad token fallback | |
if tokenizer.pad_token is None: | |
tokenizer.pad_token = tokenizer.eos_token | |
tokenizer.padding_side = "right" | |
def generate_response(user_input): | |
prompt = ( | |
"<start_of_turn>user\n" | |
f"{user_input.strip()}\n" | |
"<end_of_turn>\n" | |
"<start_of_turn>model\n" | |
) | |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=200, | |
do_sample=True, | |
temperature=0.7, | |
top_p=0.9, | |
top_k=50, | |
pad_token_id=tokenizer.pad_token_id, | |
eos_token_id=tokenizer.eos_token_id, | |
) | |
# Decode and clean output | |
response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True) | |
response = response.split("<end_of_turn>")[0].replace("model\n", "").strip() | |
return response | |
# Gradio UI | |
gr.Interface( | |
fn=generate_response, | |
inputs=gr.Textbox(label="Enter your prompt", placeholder="E.g. Describe a universe made of sound..."), | |
outputs=gr.Textbox(label="Model's response"), | |
title="Gemma LoRA: Abstract Thought Generator", | |
description="LoRA fine-tuned `gemma-3-1b-it` on poetic/philosophical prompts. Run your own abstract experiments.", | |
theme="soft" | |
).launch() | |