Spaces:
Sleeping
Sleeping
File size: 1,440 Bytes
762beb8 a6f2d08 762beb8 2f7103c 762beb8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
from transformers import AutoTokenizer, AutoModelForCausalLM
import streamlit as st
import torch
from huggingface_hub import login
login(token="HF_TOKEN")
st.title("Tokenizer Test Space")
model_id = "google/gemma-2b-it" # Test with the official model first
# model_id = "Rahul-8799/project_manager_gemma3" # If the official model works, try yours
try:
st.write(f"Attempting to load tokenizer for {model_id}...")
tokenizer = AutoTokenizer.from_pretrained(model_id)
st.success("Tokenizer loaded successfully!")
st.write("Tokenizer details:", tokenizer)
except Exception as e:
st.error(f"Error loading tokenizer: {e}")
st.exception(e) # Show full traceback
try:
st.write(f"Attempting to load model for {model_id}...")
# Assuming you want 4-bit quantization for Gemma
from transformers import BitsAndBytesConfig
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=False,
)
model = AutoModelForCausalLM.from_pretrained(
model_id,
quantization_config=quantization_config,
low_cpu_mem_usage=True,
torch_dtype=torch.bfloat16,
trust_remote_code=True
)
st.success("Model loaded successfully!")
st.write("Model details:", model)
except Exception as e:
st.error(f"Error loading model: {e}")
st.exception(e) |