Spaces:
Sleeping
Sleeping
debug misaligned sentences
Browse files
app.py
CHANGED
@@ -86,6 +86,7 @@ def identify_misaligned_sentences(reference_text, hypothesis_text):
|
|
86 |
misaligned = []
|
87 |
for i, (ref, hyp) in enumerate(zip(reference_sentences, hypothesis_sentences)):
|
88 |
if ref != hyp:
|
|
|
89 |
# Find the first position where the sentences diverge
|
90 |
min_len = min(len(ref), len(hyp))
|
91 |
misalignment_start = 0
|
@@ -105,7 +106,7 @@ def identify_misaligned_sentences(reference_text, hypothesis_text):
|
|
105 |
"context_ref": context_ref,
|
106 |
"context_hyp": context_hyp
|
107 |
})
|
108 |
-
|
109 |
return misaligned
|
110 |
|
111 |
def format_sentence_metrics(sentence_wers, sentence_cers, average_wer, average_cer, std_dev_wer, std_dev_cer, misaligned_sentences):
|
@@ -129,12 +130,16 @@ def format_sentence_metrics(sentence_wers, sentence_cers, average_wer, average_c
|
|
129 |
md += "\n### Misaligned Sentences\n\n"
|
130 |
for misaligned in misaligned_sentences:
|
131 |
md += f"#### Sentence {misaligned['index']}\n"
|
132 |
-
md += f"* Reference: {misaligned['
|
133 |
-
md += f"* Hypothesis: {misaligned['
|
134 |
md += f"* Misalignment starts at position: {misaligned['misalignment_start']}\n\n"
|
|
|
|
|
|
|
135 |
|
136 |
return md
|
137 |
|
|
|
138 |
@spaces.GPU()
|
139 |
def process_files(reference_file, hypothesis_file):
|
140 |
try:
|
|
|
86 |
misaligned = []
|
87 |
for i, (ref, hyp) in enumerate(zip(reference_sentences, hypothesis_sentences)):
|
88 |
if ref != hyp:
|
89 |
+
print(f"Debug: Found misalignment in sentence {i+1}")
|
90 |
# Find the first position where the sentences diverge
|
91 |
min_len = min(len(ref), len(hyp))
|
92 |
misalignment_start = 0
|
|
|
106 |
"context_ref": context_ref,
|
107 |
"context_hyp": context_hyp
|
108 |
})
|
109 |
+
print(f"Debug: Total misaligned sentences found: {len(misaligned)}")
|
110 |
return misaligned
|
111 |
|
112 |
def format_sentence_metrics(sentence_wers, sentence_cers, average_wer, average_cer, std_dev_wer, std_dev_cer, misaligned_sentences):
|
|
|
130 |
md += "\n### Misaligned Sentences\n\n"
|
131 |
for misaligned in misaligned_sentences:
|
132 |
md += f"#### Sentence {misaligned['index']}\n"
|
133 |
+
md += f"* Reference: {misaligned['reference']}\n"
|
134 |
+
md += f"* Hypothesis: {misaligned['hypothesis']}\n"
|
135 |
md += f"* Misalignment starts at position: {misaligned['misalignment_start']}\n\n"
|
136 |
+
else:
|
137 |
+
md += "\n### Misaligned Sentences\n\n"
|
138 |
+
md += "* No misaligned sentences found."
|
139 |
|
140 |
return md
|
141 |
|
142 |
+
|
143 |
@spaces.GPU()
|
144 |
def process_files(reference_file, hypothesis_file):
|
145 |
try:
|