Spaces:

Rahul-8799
/

testing_gemma

Sleeping

File size: 1,759 Bytes

b14b9f9
 
a6f2d08
762beb8
2f7103c
762beb8
bbcce4f
 
762beb8
b14b9f9
 
 
 
 
 
762beb8
 
b14b9f9
 
762beb8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b14b9f9
762beb8
 
 
 
 
b14b9f9
 
762beb8

import os
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import streamlit as st
import torch

st.title("Tokenizer Test Space")
# model_id = "unsloth/gemma-3-1b-it" # Test with the official model first
model_id = "Rahul-8799/project_manager_gemma3" # If the official model works, try yours

# Define a writable directory for the cache. /tmp is usually writable in Spaces.
cache_directory = "/tmp/hf_cache"

# Ensure the cache directory exists (good practice, though hf_hub might handle it)
os.makedirs(cache_directory, exist_ok=True)

try:
    st.write(f"Attempting to load tokenizer for {model_id}...")
    # Explicitly pass the cache_dir
    tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=cache_directory)
    st.success("Tokenizer loaded successfully!")
    st.write("Tokenizer details:", tokenizer)
except Exception as e:
    st.error(f"Error loading tokenizer: {e}")
    st.exception(e) # Show full traceback

try:
    st.write(f"Attempting to load model for {model_id}...")
    # Assuming you want 4-bit quantization for Gemma
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_use_double_quant=False,
    )
    # Explicitly pass the cache_dir
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        quantization_config=quantization_config,
        low_cpu_mem_usage=True,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        cache_dir=cache_directory # Add this line
    )
    st.success("Model loaded successfully!")
    st.write("Model details:", model)
except Exception as e:
    st.error(f"Error loading model: {e}")
    st.exception(e)