from transformers import AutoTokenizer, AutoModelForCausalLM
import streamlit as st
import torch
import os


st.title("Tokenizer Test Space")

model_id = "google/gemma-2b-it" # Test with the official model first
# model_id = "Rahul-8799/project_manager_gemma3" # If the official model works, try yours

try:
    st.write(f"Attempting to load tokenizer for {model_id}...")
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    st.success("Tokenizer loaded successfully!")
    st.write("Tokenizer details:", tokenizer)
except Exception as e:
    st.error(f"Error loading tokenizer: {e}")
    st.exception(e) # Show full traceback

try:
    st.write(f"Attempting to load model for {model_id}...")
    # Assuming you want 4-bit quantization for Gemma
    from transformers import BitsAndBytesConfig
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_use_double_quant=False,
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        quantization_config=quantization_config,
        low_cpu_mem_usage=True,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True
    )
    st.success("Model loaded successfully!")
    st.write("Model details:", model)
except Exception as e:
    st.error(f"Error loading model: {e}")
    st.exception(e)