markredito's picture
Update app.py
6fd805d verified
import os
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
# Hugging Face Token from Space Secrets
HF_TOKEN = os.environ.get("HF_TOKEN")
# Model IDs
BASE_MODEL = "google/gemma-3-1b-it"
LORA_ADAPTER = "markredito/gemma-pip-finetuned-v2" # πŸ” Replace with your actual LoRA repo
# Check device
device = "cuda" if torch.cuda.is_available() else "cpu"
# Quantization config for 4-bit (recommended on T4 GPU)
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
)
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
device_map="auto",
torch_dtype=torch.bfloat16,
quantization_config=bnb_config,
token=HF_TOKEN,
attn_implementation="eager" # Required for Gemma3 + quant
)
model = PeftModel.from_pretrained(model, LORA_ADAPTER, token=HF_TOKEN)
# Pad token fallback
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
# Generation function
def generate_response(prompt, temperature, top_p, top_k):
formatted = (
"<start_of_turn>user\n"
f"{prompt.strip()}\n"
"<end_of_turn>\n"
"<start_of_turn>model\n"
)
inputs = tokenizer(formatted, return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=200,
do_sample=True,
temperature=temperature,
top_p=top_p,
top_k=top_k,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
)
decoded = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
cleaned = decoded.split("<end_of_turn>")[0].replace("model\n", "").strip()
return cleaned
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## ✨ Gemma Psychedelic Model Demo")
gr.Markdown("Use your imagination or try one of the examples below to explore poetic and philosophical responses.")
gr.Markdown("Note: this model intentionally hallucinates.")
examples = [
"Describe a world where clouds are solid and people walk on them",
"Contrast quantum realities phenomena from the perspective of a starship navigator, using a spiral into infinity.",
"Dream up futuristic phenomena from the perspective of a timeless oracle, using a fractal blooming in chaos.",
]
with gr.Row():
with gr.Column():
prompt_input = gr.Textbox(label="Enter your prompt", lines=4, placeholder="Try something like: What if gravity took a day off?")
gr.Examples(
examples=examples,
inputs=prompt_input,
label="Example Prompts"
)
temperature = gr.Slider(0.1, 1.5, value=0.7, label="Temperature")
top_p = gr.Slider(0.1, 1.0, value=0.9, label="Top-p (nucleus sampling)")
top_k = gr.Slider(0, 100, step=1, value=50, label="Top-k")
submit = gr.Button("Generate")
with gr.Column():
output = gr.Textbox(label="Model Response", lines=10)
submit.click(fn=generate_response, inputs=[prompt_input, temperature, top_p, top_k], outputs=output)
demo.launch()