from transformers import AutoTokenizer, AutoModelForCausalLM class SmolLM: def __init__(self, model_path="HuggingFaceTB/SmolLM2-1.7B-Instruct"): self.available = True try: print(f"[INFO] Loading model tokenizer from {model_path}") self.tokenizer = AutoTokenizer.from_pretrained(model_path) print(f"[INFO] Loading model from {model_path}") self.model = AutoModelForCausalLM.from_pretrained(model_path) print("[INFO] Model loaded successfully") except Exception as e: print(f"[ERROR] Failed to load model '{model_path}': {e}") self.available = False def predict(self, prompt): if not self.available: print("[WARN] LLama model unavailable, returning default weight 0.5") return "0.5" try: print(f"[INFO] Generating response for prompt: {prompt[:100]}...", flush=True) inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) outputs = self.model.generate(**inputs, max_length=150, num_return_sequences=1) response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) print(f"[INFO] Generated response: {response[:100]}...", flush=True) return response except Exception as e: print(f"[ERROR] LLama model inference failed: {e}") return "0.5"