Spaces:

arubenruben
/

Quantization-Attempts

Running

File size: 823 Bytes

d75b820

from transformers import AutoModel


base_model = "ibm-research/granite-3.2-8b-instruct-GGUF"
GGUF_MODEL = "granite-3.2-8b-instruct-Q4_K_M.gguf"
#model = AutoModel.from_pretrained("ibm-research/granite-3.2-8b-instruct", device_map="auto")

model = AutoModel.from_pretrained(base_model, device_map="auto", torch_dtype="auto", quantization_config=None, gguf_file=GGUF_MODEL)
model.config
"""
# pip install gguf
from transformers import AutoTokenizer, AutoModelForCausalLM

model_id = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
filename = "tinyllama-1.1b-chat-v1.0.Q6_K.gguf"

torch_dtype = torch.float32 # could be torch.float16 or torch.bfloat16 too
tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename, torch_dtype=torch_dtype)

"""