Spaces:
Sleeping
Sleeping
File size: 1,759 Bytes
b14b9f9 a6f2d08 762beb8 2f7103c 762beb8 bbcce4f 762beb8 b14b9f9 762beb8 b14b9f9 762beb8 b14b9f9 762beb8 b14b9f9 762beb8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import os
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import streamlit as st
import torch
st.title("Tokenizer Test Space")
# model_id = "unsloth/gemma-3-1b-it" # Test with the official model first
model_id = "Rahul-8799/project_manager_gemma3" # If the official model works, try yours
# Define a writable directory for the cache. /tmp is usually writable in Spaces.
cache_directory = "/tmp/hf_cache"
# Ensure the cache directory exists (good practice, though hf_hub might handle it)
os.makedirs(cache_directory, exist_ok=True)
try:
st.write(f"Attempting to load tokenizer for {model_id}...")
# Explicitly pass the cache_dir
tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=cache_directory)
st.success("Tokenizer loaded successfully!")
st.write("Tokenizer details:", tokenizer)
except Exception as e:
st.error(f"Error loading tokenizer: {e}")
st.exception(e) # Show full traceback
try:
st.write(f"Attempting to load model for {model_id}...")
# Assuming you want 4-bit quantization for Gemma
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=False,
)
# Explicitly pass the cache_dir
model = AutoModelForCausalLM.from_pretrained(
model_id,
quantization_config=quantization_config,
low_cpu_mem_usage=True,
torch_dtype=torch.bfloat16,
trust_remote_code=True,
cache_dir=cache_directory # Add this line
)
st.success("Model loaded successfully!")
st.write("Model details:", model)
except Exception as e:
st.error(f"Error loading model: {e}")
st.exception(e) |