testing_gemma / src /streamlit_app.py
Rahul-8799's picture
Update src/streamlit_app.py
bbcce4f verified
import os
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import streamlit as st
import torch
st.title("Tokenizer Test Space")
# model_id = "unsloth/gemma-3-1b-it" # Test with the official model first
model_id = "Rahul-8799/project_manager_gemma3" # If the official model works, try yours
# Define a writable directory for the cache. /tmp is usually writable in Spaces.
cache_directory = "/tmp/hf_cache"
# Ensure the cache directory exists (good practice, though hf_hub might handle it)
os.makedirs(cache_directory, exist_ok=True)
try:
st.write(f"Attempting to load tokenizer for {model_id}...")
# Explicitly pass the cache_dir
tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=cache_directory)
st.success("Tokenizer loaded successfully!")
st.write("Tokenizer details:", tokenizer)
except Exception as e:
st.error(f"Error loading tokenizer: {e}")
st.exception(e) # Show full traceback
try:
st.write(f"Attempting to load model for {model_id}...")
# Assuming you want 4-bit quantization for Gemma
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=False,
)
# Explicitly pass the cache_dir
model = AutoModelForCausalLM.from_pretrained(
model_id,
quantization_config=quantization_config,
low_cpu_mem_usage=True,
torch_dtype=torch.bfloat16,
trust_remote_code=True,
cache_dir=cache_directory # Add this line
)
st.success("Model loaded successfully!")
st.write("Model details:", model)
except Exception as e:
st.error(f"Error loading model: {e}")
st.exception(e)