File size: 1,630 Bytes
e6ce7cd
 
c0b5eff
e6ce7cd
 
c0b5eff
e3adec8
 
 
c0b5eff
e6ce7cd
 
 
e3adec8
 
 
e6ce7cd
 
 
e3adec8
e6ce7cd
 
e3adec8
 
c0b5eff
e6ce7cd
e3adec8
 
c0b5eff
e3adec8
 
e6ce7cd
 
 
e3adec8
 
 
 
 
 
 
 
5ba152f
 
e3adec8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from transformers import AutoTokenizer, AutoModel
import torch
from datasets import load_dataset
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def evaluate_model(model_name, dataset):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    try:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModel.from_pretrained(model_name)
        model.eval()
        model.to(device)
    except Exception as e:
        print(f"Model loading failed: {e}")
        return None

    embeddings1, embeddings2 = [], []

    try:
        for item in dataset:
            inputs1 = tokenizer(item["instruction"], return_tensors="pt", truncation=True, padding=True).to(device)
            inputs2 = tokenizer(item["output"], return_tensors="pt", truncation=True, padding=True).to(device)

            with torch.no_grad():
                embed1 = model(**inputs1).last_hidden_state[:, 0, :].cpu().numpy()
                embed2 = model(**inputs2).last_hidden_state[:, 0, :].cpu().numpy()

            embeddings1.append(embed1.flatten())
            embeddings2.append(embed2.flatten())

        sims = [cosine_similarity([e1], [e2])[0][0] for e1, e2 in zip(embeddings1, embeddings2)]

        if "similarity_score" in dataset[0]:
            labels = [item["similarity_score"] for item in dataset]
            corr = np.corrcoef(sims, labels)[0, 1]
            return float(corr)
        else:
            print("No similarity scores in dataset.")
            return None

    except Exception as e:
        print(f"Evaluation failed: {e}")
        return None