Mehrdad-S commited on
Commit
e6ce7cd
·
verified ·
1 Parent(s): aece1af

Update evaluate.py

Browse files
Files changed (1) hide show
  1. evaluate.py +29 -11
evaluate.py CHANGED
@@ -1,19 +1,37 @@
 
 
1
  from datasets import load_dataset
2
- from sentence_transformers import SentenceTransformer, util
 
3
 
4
  def evaluate_model(model_name):
5
  try:
6
- model = SentenceTransformer(model_name)
7
- dataset = load_dataset("arshiaafshani/persian-natural-fluently", split="train[:10]")
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- scores = []
10
- for row in dataset:
11
- emb1 = model.encode(row["instruction"], convert_to_tensor=True)
12
- emb2 = model.encode(row["output"], convert_to_tensor=True)
13
- sim_score = float(util.cos_sim(emb1, emb2)[0])
14
- scores.append(sim_score)
15
 
16
- return sum(scores) / len(scores)
 
 
 
 
 
 
 
17
  except Exception as e:
18
  print(f"Evaluation failed: {e}")
19
- return None
 
1
+ from transformers import AutoTokenizer, AutoModel
2
+ import torch
3
  from datasets import load_dataset
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ import numpy as np
6
 
7
  def evaluate_model(model_name):
8
  try:
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+ model = AutoModel.from_pretrained(model_name)
11
+ model.eval()
12
+ model.to("cuda" if torch.cuda.is_available() else "cpu")
13
+ except:
14
+ return None
15
+
16
+ dataset = load_dataset("arshiaafshani/persian-natural-fluently", split="train[:10]")
17
+ embeddings1, embeddings2 = [], []
18
+ try:
19
+ for item in dataset:
20
+ inputs1 = tokenizer(item["instruction"], return_tensors="pt", truncation=True, padding=True)
21
+ inputs2 = tokenizer(item["output"], return_tensors="pt", truncation=True, padding=True)
22
 
23
+ with torch.no_grad():
24
+ embed1 = model(**inputs1).last_hidden_state[:, 0, :]
25
+ embed2 = model(**inputs2).last_hidden_state[:, 0, :]
 
 
 
26
 
27
+ embeddings1.append(embed1.squeeze().numpy())
28
+ embeddings2.append(embed2.squeeze().numpy())
29
+
30
+ sims = [cosine_similarity([e1], [e2])[0][0] for e1, e2 in zip(embeddings1, embeddings2)]
31
+ labels = [item["similarity_score"] for item in dataset]
32
+
33
+ corr = np.corrcoef(sims, labels)[0, 1]
34
+ return float(corr)
35
  except Exception as e:
36
  print(f"Evaluation failed: {e}")
37
+ return None