Mehrdad-S commited on
Commit
e3adec8
·
verified ·
1 Parent(s): eca1e92

Update evaluate.py

Browse files
Files changed (1) hide show
  1. evaluate.py +22 -13
evaluate.py CHANGED
@@ -4,33 +4,42 @@ from datasets import load_dataset
4
  from sklearn.metrics.pairwise import cosine_similarity
5
  import numpy as np
6
 
7
- def evaluate_model(model_name,dataset):
 
 
8
  try:
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
10
  model = AutoModel.from_pretrained(model_name)
11
  model.eval()
12
- model.to("cuda" if torch.cuda.is_available() else "cpu")
13
- except:
 
14
  return None
15
 
16
  embeddings1, embeddings2 = [], []
 
17
  try:
18
  for item in dataset:
19
- inputs1 = tokenizer(item["instruction"], return_tensors="pt", truncation=True, padding=True)
20
- inputs2 = tokenizer(item["output"], return_tensors="pt", truncation=True, padding=True)
21
 
22
  with torch.no_grad():
23
- embed1 = model(**inputs1).last_hidden_state[:, 0, :]
24
- embed2 = model(**inputs2).last_hidden_state[:, 0, :]
25
 
26
- embeddings1.append(embed1.squeeze().numpy())
27
- embeddings2.append(embed2.squeeze().numpy())
28
 
29
  sims = [cosine_similarity([e1], [e2])[0][0] for e1, e2 in zip(embeddings1, embeddings2)]
30
- labels = [item["similarity_score"] for item in dataset]
31
 
32
- corr = np.corrcoef(sims, labels)[0, 1]
33
- return float(corr)
 
 
 
 
 
 
34
  except Exception as e:
35
  print(f"Evaluation failed: {e}")
36
- return None
 
4
  from sklearn.metrics.pairwise import cosine_similarity
5
  import numpy as np
6
 
7
+ def evaluate_model(model_name, dataset):
8
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
+
10
  try:
11
  tokenizer = AutoTokenizer.from_pretrained(model_name)
12
  model = AutoModel.from_pretrained(model_name)
13
  model.eval()
14
+ model.to(device)
15
+ except Exception as e:
16
+ print(f"Model loading failed: {e}")
17
  return None
18
 
19
  embeddings1, embeddings2 = [], []
20
+
21
  try:
22
  for item in dataset:
23
+ inputs1 = tokenizer(item["instruction"], return_tensors="pt", truncation=True, padding=True).to(device)
24
+ inputs2 = tokenizer(item["output"], return_tensors="pt", truncation=True, padding=True).to(device)
25
 
26
  with torch.no_grad():
27
+ embed1 = model(**inputs1).last_hidden_state[:, 0, :].cpu().numpy()
28
+ embed2 = model(**inputs2).last_hidden_state[:, 0, :].cpu().numpy()
29
 
30
+ embeddings1.append(embed1.flatten())
31
+ embeddings2.append(embed2.flatten())
32
 
33
  sims = [cosine_similarity([e1], [e2])[0][0] for e1, e2 in zip(embeddings1, embeddings2)]
 
34
 
35
+ if "similarity_score" in dataset[0]:
36
+ labels = [item["similarity_score"] for item in dataset]
37
+ corr = np.corrcoef(sims, labels)[0, 1]
38
+ return float(corr)
39
+ else:
40
+ print("No similarity scores in dataset.")
41
+ return None
42
+
43
  except Exception as e:
44
  print(f"Evaluation failed: {e}")
45
+ return None