Spaces:
Sleeping
Sleeping
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import streamlit as st | |
import torch | |
from huggingface_hub import login | |
login(token="HF_TOKEN") | |
st.title("Tokenizer Test Space") | |
model_id = "google/gemma-2b-it" # Test with the official model first | |
# model_id = "Rahul-8799/project_manager_gemma3" # If the official model works, try yours | |
try: | |
st.write(f"Attempting to load tokenizer for {model_id}...") | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
st.success("Tokenizer loaded successfully!") | |
st.write("Tokenizer details:", tokenizer) | |
except Exception as e: | |
st.error(f"Error loading tokenizer: {e}") | |
st.exception(e) # Show full traceback | |
try: | |
st.write(f"Attempting to load model for {model_id}...") | |
# Assuming you want 4-bit quantization for Gemma | |
from transformers import BitsAndBytesConfig | |
quantization_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_quant_type="nf4", | |
bnb_4bit_compute_dtype=torch.bfloat16, | |
bnb_4bit_use_double_quant=False, | |
) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
quantization_config=quantization_config, | |
low_cpu_mem_usage=True, | |
torch_dtype=torch.bfloat16, | |
trust_remote_code=True | |
) | |
st.success("Model loaded successfully!") | |
st.write("Model details:", model) | |
except Exception as e: | |
st.error(f"Error loading model: {e}") | |
st.exception(e) |