from sentence_transformers import SentenceTransformer, util def evaluate_model(model_name,dataset): try: model = SentenceTransformer(model_name) scores = [] for row in dataset: emb1 = model.encode(row["instruction"], convert_to_tensor=True) emb2 = model.encode(row["output"], convert_to_tensor=True) sim_score = float(util.cos_sim(emb1, emb2)[0]) scores.append(sim_score) return sum(scores) / len(scores) except Exception as e: print(f"Evaluation failed: {e}") return None