Spaces:
Runtime error
Runtime error
from transformers import AutoTokenizer, AutoModel | |
import torch | |
from datasets import load_dataset | |
from sklearn.metrics.pairwise import cosine_similarity | |
import numpy as np | |
def evaluate_model(model_name, dataset): | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
try: | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModel.from_pretrained(model_name) | |
model.eval() | |
model.to(device) | |
except Exception as e: | |
print(f"Model loading failed: {e}") | |
return None | |
embeddings1, embeddings2 = [], [] | |
try: | |
for item in dataset: | |
inputs1 = tokenizer(item["instruction"], return_tensors="pt", truncation=True, padding=True).to(device) | |
inputs2 = tokenizer(item["output"], return_tensors="pt", truncation=True, padding=True).to(device) | |
with torch.no_grad(): | |
embed1 = model(**inputs1).last_hidden_state[:, 0, :].cpu().numpy() | |
embed2 = model(**inputs2).last_hidden_state[:, 0, :].cpu().numpy() | |
embeddings1.append(embed1.flatten()) | |
embeddings2.append(embed2.flatten()) | |
sims = [cosine_similarity([e1], [e2])[0][0] for e1, e2 in zip(embeddings1, embeddings2)] | |
if "similarity_score" in dataset[0]: | |
labels = [item["similarity_score"] for item in dataset] | |
corr = np.corrcoef(sims, labels)[0, 1] | |
return float(corr) | |
else: | |
print("No similarity scores in dataset.") | |
return None | |
except Exception as e: | |
print(f"Evaluation failed: {e}") | |
return None | |