Spaces:
Sleeping
Sleeping
display where the misalignment occurs
Browse files
app.py
CHANGED
@@ -78,7 +78,7 @@ def calculate_sentence_metrics(reference, hypothesis):
|
|
78 |
def identify_misaligned_sentences(reference_text, hypothesis_text):
|
79 |
"""
|
80 |
Identify sentences that don't match between reference and hypothesis.
|
81 |
-
Returns a dictionary with misaligned sentence pairs and
|
82 |
"""
|
83 |
reference_sentences = split_into_sentences(reference_text)
|
84 |
hypothesis_sentences = split_into_sentences(hypothesis_text)
|
@@ -86,10 +86,24 @@ def identify_misaligned_sentences(reference_text, hypothesis_text):
|
|
86 |
misaligned = []
|
87 |
for i, (ref, hyp) in enumerate(zip(reference_sentences, hypothesis_sentences)):
|
88 |
if ref != hyp:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
misaligned.append({
|
90 |
"index": i+1,
|
91 |
"reference": ref,
|
92 |
-
"hypothesis": hyp
|
|
|
|
|
|
|
93 |
})
|
94 |
|
95 |
return misaligned
|
@@ -115,8 +129,9 @@ def format_sentence_metrics(sentence_wers, sentence_cers, average_wer, average_c
|
|
115 |
md += "\n### Misaligned Sentences\n\n"
|
116 |
for misaligned in misaligned_sentences:
|
117 |
md += f"#### Sentence {misaligned['index']}\n"
|
118 |
-
md += f"* Reference: {misaligned['
|
119 |
-
md += f"* Hypothesis: {misaligned['
|
|
|
120 |
|
121 |
return md
|
122 |
|
@@ -215,8 +230,9 @@ def main():
|
|
215 |
misaligned_md = "### Misaligned Sentences\n\n"
|
216 |
for misaligned in result["Misaligned Sentences"]:
|
217 |
misaligned_md += f"#### Sentence {misaligned['index']}\n"
|
218 |
-
misaligned_md += f"* Reference: {misaligned['
|
219 |
-
misaligned_md += f"* Hypothesis: {misaligned['
|
|
|
220 |
|
221 |
return metrics, metrics_md, misaligned_md
|
222 |
|
|
|
78 |
def identify_misaligned_sentences(reference_text, hypothesis_text):
|
79 |
"""
|
80 |
Identify sentences that don't match between reference and hypothesis.
|
81 |
+
Returns a dictionary with misaligned sentence pairs, their indices, and misalignment details.
|
82 |
"""
|
83 |
reference_sentences = split_into_sentences(reference_text)
|
84 |
hypothesis_sentences = split_into_sentences(hypothesis_text)
|
|
|
86 |
misaligned = []
|
87 |
for i, (ref, hyp) in enumerate(zip(reference_sentences, hypothesis_sentences)):
|
88 |
if ref != hyp:
|
89 |
+
# Find the first position where the sentences diverge
|
90 |
+
min_len = min(len(ref), len(hyp))
|
91 |
+
misalignment_start = 0
|
92 |
+
for j in range(min_len):
|
93 |
+
if ref[j] != hyp[j]:
|
94 |
+
misalignment_start = j
|
95 |
+
break
|
96 |
+
# Prepare the context for display
|
97 |
+
context_ref = ref[:misalignment_start] + f"**{ref[misalignment_start:]}**"
|
98 |
+
context_hyp = hyp[:misalignment_start] + f"**{hyp[misalignment_start:]}**"
|
99 |
+
|
100 |
misaligned.append({
|
101 |
"index": i+1,
|
102 |
"reference": ref,
|
103 |
+
"hypothesis": hyp,
|
104 |
+
"misalignment_start": misalignment_start,
|
105 |
+
"context_ref": context_ref,
|
106 |
+
"context_hyp": context_hyp
|
107 |
})
|
108 |
|
109 |
return misaligned
|
|
|
129 |
md += "\n### Misaligned Sentences\n\n"
|
130 |
for misaligned in misaligned_sentences:
|
131 |
md += f"#### Sentence {misaligned['index']}\n"
|
132 |
+
md += f"* Reference: {misaligned['context_ref']}\n"
|
133 |
+
md += f"* Hypothesis: {misaligned['context_hyp']}\n"
|
134 |
+
md += f"* Misalignment starts at position: {misaligned['misalignment_start']}\n\n"
|
135 |
|
136 |
return md
|
137 |
|
|
|
230 |
misaligned_md = "### Misaligned Sentences\n\n"
|
231 |
for misaligned in result["Misaligned Sentences"]:
|
232 |
misaligned_md += f"#### Sentence {misaligned['index']}\n"
|
233 |
+
misaligned_md += f"* Reference: {misaligned['context_ref']}\n"
|
234 |
+
misaligned_md += f"* Hypothesis: {misaligned['context_hyp']}\n"
|
235 |
+
misaligned_md += f"* Misalignment starts at position: {misaligned['misalignment_start']}\n\n"
|
236 |
|
237 |
return metrics, metrics_md, misaligned_md
|
238 |
|