import os from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig import streamlit as st import torch st.title("Tokenizer Test Space") # model_id = "unsloth/gemma-3-1b-it" # Test with the official model first model_id = "Rahul-8799/project_manager_gemma3" # If the official model works, try yours # Define a writable directory for the cache. /tmp is usually writable in Spaces. cache_directory = "/tmp/hf_cache" # Ensure the cache directory exists (good practice, though hf_hub might handle it) os.makedirs(cache_directory, exist_ok=True) try: st.write(f"Attempting to load tokenizer for {model_id}...") # Explicitly pass the cache_dir tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=cache_directory) st.success("Tokenizer loaded successfully!") st.write("Tokenizer details:", tokenizer) except Exception as e: st.error(f"Error loading tokenizer: {e}") st.exception(e) # Show full traceback try: st.write(f"Attempting to load model for {model_id}...") # Assuming you want 4-bit quantization for Gemma quantization_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=False, ) # Explicitly pass the cache_dir model = AutoModelForCausalLM.from_pretrained( model_id, quantization_config=quantization_config, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16, trust_remote_code=True, cache_dir=cache_directory # Add this line ) st.success("Model loaded successfully!") st.write("Model details:", model) except Exception as e: st.error(f"Error loading model: {e}") st.exception(e)