Mehrdad-S commited on
Commit
a0c8afe
·
verified ·
1 Parent(s): e3adec8

Update evaluate.py

Browse files
Files changed (1) hide show
  1. evaluate.py +11 -39
evaluate.py CHANGED
@@ -1,45 +1,17 @@
1
- from transformers import AutoTokenizer, AutoModel
2
- import torch
3
- from datasets import load_dataset
4
- from sklearn.metrics.pairwise import cosine_similarity
5
- import numpy as np
6
-
7
- def evaluate_model(model_name, dataset):
8
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
-
10
- try:
11
- tokenizer = AutoTokenizer.from_pretrained(model_name)
12
- model = AutoModel.from_pretrained(model_name)
13
- model.eval()
14
- model.to(device)
15
- except Exception as e:
16
- print(f"Model loading failed: {e}")
17
- return None
18
-
19
- embeddings1, embeddings2 = [], []
20
 
 
21
  try:
22
- for item in dataset:
23
- inputs1 = tokenizer(item["instruction"], return_tensors="pt", truncation=True, padding=True).to(device)
24
- inputs2 = tokenizer(item["output"], return_tensors="pt", truncation=True, padding=True).to(device)
25
-
26
- with torch.no_grad():
27
- embed1 = model(**inputs1).last_hidden_state[:, 0, :].cpu().numpy()
28
- embed2 = model(**inputs2).last_hidden_state[:, 0, :].cpu().numpy()
29
-
30
- embeddings1.append(embed1.flatten())
31
- embeddings2.append(embed2.flatten())
32
-
33
- sims = [cosine_similarity([e1], [e2])[0][0] for e1, e2 in zip(embeddings1, embeddings2)]
34
 
35
- if "similarity_score" in dataset[0]:
36
- labels = [item["similarity_score"] for item in dataset]
37
- corr = np.corrcoef(sims, labels)[0, 1]
38
- return float(corr)
39
- else:
40
- print("No similarity scores in dataset.")
41
- return None
42
 
 
43
  except Exception as e:
44
  print(f"Evaluation failed: {e}")
45
- return None
 
1
+ from sentence_transformers import SentenceTransformer, util
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ def evaluate_model(model_name,dataset):
4
  try:
5
+ model = SentenceTransformer(model_name)
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ scores = []
8
+ for row in dataset:
9
+ emb1 = model.encode(row["instruction"], convert_to_tensor=True)
10
+ emb2 = model.encode(row["output"], convert_to_tensor=True)
11
+ sim_score = float(util.cos_sim(emb1, emb2)[0])
12
+ scores.append(sim_score)
 
13
 
14
+ return sum(scores) / len(scores)
15
  except Exception as e:
16
  print(f"Evaluation failed: {e}")
17
+ return None