akki2825 commited on
Commit
d41eee2
·
verified ·
1 Parent(s): b19598b

display where the misalignment occurs

Browse files
Files changed (1) hide show
  1. app.py +22 -6
app.py CHANGED
@@ -78,7 +78,7 @@ def calculate_sentence_metrics(reference, hypothesis):
78
  def identify_misaligned_sentences(reference_text, hypothesis_text):
79
  """
80
  Identify sentences that don't match between reference and hypothesis.
81
- Returns a dictionary with misaligned sentence pairs and their indices.
82
  """
83
  reference_sentences = split_into_sentences(reference_text)
84
  hypothesis_sentences = split_into_sentences(hypothesis_text)
@@ -86,10 +86,24 @@ def identify_misaligned_sentences(reference_text, hypothesis_text):
86
  misaligned = []
87
  for i, (ref, hyp) in enumerate(zip(reference_sentences, hypothesis_sentences)):
88
  if ref != hyp:
 
 
 
 
 
 
 
 
 
 
 
89
  misaligned.append({
90
  "index": i+1,
91
  "reference": ref,
92
- "hypothesis": hyp
 
 
 
93
  })
94
 
95
  return misaligned
@@ -115,8 +129,9 @@ def format_sentence_metrics(sentence_wers, sentence_cers, average_wer, average_c
115
  md += "\n### Misaligned Sentences\n\n"
116
  for misaligned in misaligned_sentences:
117
  md += f"#### Sentence {misaligned['index']}\n"
118
- md += f"* Reference: {misaligned['reference']}\n"
119
- md += f"* Hypothesis: {misaligned['hypothesis']}\n\n"
 
120
 
121
  return md
122
 
@@ -215,8 +230,9 @@ def main():
215
  misaligned_md = "### Misaligned Sentences\n\n"
216
  for misaligned in result["Misaligned Sentences"]:
217
  misaligned_md += f"#### Sentence {misaligned['index']}\n"
218
- misaligned_md += f"* Reference: {misaligned['reference']}\n"
219
- misaligned_md += f"* Hypothesis: {misaligned['hypothesis']}\n\n"
 
220
 
221
  return metrics, metrics_md, misaligned_md
222
 
 
78
  def identify_misaligned_sentences(reference_text, hypothesis_text):
79
  """
80
  Identify sentences that don't match between reference and hypothesis.
81
+ Returns a dictionary with misaligned sentence pairs, their indices, and misalignment details.
82
  """
83
  reference_sentences = split_into_sentences(reference_text)
84
  hypothesis_sentences = split_into_sentences(hypothesis_text)
 
86
  misaligned = []
87
  for i, (ref, hyp) in enumerate(zip(reference_sentences, hypothesis_sentences)):
88
  if ref != hyp:
89
+ # Find the first position where the sentences diverge
90
+ min_len = min(len(ref), len(hyp))
91
+ misalignment_start = 0
92
+ for j in range(min_len):
93
+ if ref[j] != hyp[j]:
94
+ misalignment_start = j
95
+ break
96
+ # Prepare the context for display
97
+ context_ref = ref[:misalignment_start] + f"**{ref[misalignment_start:]}**"
98
+ context_hyp = hyp[:misalignment_start] + f"**{hyp[misalignment_start:]}**"
99
+
100
  misaligned.append({
101
  "index": i+1,
102
  "reference": ref,
103
+ "hypothesis": hyp,
104
+ "misalignment_start": misalignment_start,
105
+ "context_ref": context_ref,
106
+ "context_hyp": context_hyp
107
  })
108
 
109
  return misaligned
 
129
  md += "\n### Misaligned Sentences\n\n"
130
  for misaligned in misaligned_sentences:
131
  md += f"#### Sentence {misaligned['index']}\n"
132
+ md += f"* Reference: {misaligned['context_ref']}\n"
133
+ md += f"* Hypothesis: {misaligned['context_hyp']}\n"
134
+ md += f"* Misalignment starts at position: {misaligned['misalignment_start']}\n\n"
135
 
136
  return md
137
 
 
230
  misaligned_md = "### Misaligned Sentences\n\n"
231
  for misaligned in result["Misaligned Sentences"]:
232
  misaligned_md += f"#### Sentence {misaligned['index']}\n"
233
+ misaligned_md += f"* Reference: {misaligned['context_ref']}\n"
234
+ misaligned_md += f"* Hypothesis: {misaligned['context_hyp']}\n"
235
+ misaligned_md += f"* Misalignment starts at position: {misaligned['misalignment_start']}\n\n"
236
 
237
  return metrics, metrics_md, misaligned_md
238