File size: 823 Bytes
d75b820 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
from transformers import AutoModel
base_model = "ibm-research/granite-3.2-8b-instruct-GGUF"
GGUF_MODEL = "granite-3.2-8b-instruct-Q4_K_M.gguf"
#model = AutoModel.from_pretrained("ibm-research/granite-3.2-8b-instruct", device_map="auto")
model = AutoModel.from_pretrained(base_model, device_map="auto", torch_dtype="auto", quantization_config=None, gguf_file=GGUF_MODEL)
model.config
"""
# pip install gguf
from transformers import AutoTokenizer, AutoModelForCausalLM
model_id = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
filename = "tinyllama-1.1b-chat-v1.0.Q6_K.gguf"
torch_dtype = torch.float32 # could be torch.float16 or torch.bfloat16 too
tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename, torch_dtype=torch_dtype)
""" |