Spaces:
Sleeping
Sleeping
from transformers import AutoTokenizer, AutoModelForCausalLM | |
class SmolLM: | |
def __init__(self, model_path="HuggingFaceTB/SmolLM2-1.7B-Instruct"): | |
self.available = True | |
try: | |
print(f"[INFO] Loading model tokenizer from {model_path}") | |
self.tokenizer = AutoTokenizer.from_pretrained(model_path) | |
print(f"[INFO] Loading model from {model_path}") | |
self.model = AutoModelForCausalLM.from_pretrained(model_path) | |
print("[INFO] Model loaded successfully") | |
except Exception as e: | |
print(f"[ERROR] Failed to load model '{model_path}': {e}") | |
self.available = False | |
def predict(self, prompt): | |
if not self.available: | |
print("[WARN] LLama model unavailable, returning default weight 0.5") | |
return "0.5" | |
try: | |
print(f"[INFO] Generating response for prompt: {prompt[:100]}...", flush=True) | |
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) | |
outputs = self.model.generate(**inputs, max_length=150, num_return_sequences=1) | |
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) | |
print(f"[INFO] Generated response: {response[:100]}...", flush=True) | |
return response | |
except Exception as e: | |
print(f"[ERROR] LLama model inference failed: {e}") | |
return "0.5" | |