Spaces:
Running
on
Zero
Running
on
Zero
Better handling of spaces
Browse files
app.py
CHANGED
@@ -226,7 +226,7 @@ class BytelatentProcessor:
|
|
226 |
# Boundary check, though loop structure should prevent out-of-bounds
|
227 |
if current_byte_abs_idx < len(all_token_ids):
|
228 |
label = token_to_patch_label[current_byte_abs_idx] if current_byte_abs_idx < len(token_to_patch_label) else "Error: Label Missing"
|
229 |
-
display_text = f"{processed_char_text}-{j+1}"
|
230 |
highlighted_data.append((display_text, label))
|
231 |
else: # Should ideally not be reached
|
232 |
logging.error(f"Critical: Token index {current_byte_abs_idx} out of bounds for labeling.")
|
@@ -248,7 +248,7 @@ class BytelatentProcessor:
|
|
248 |
except Exception:
|
249 |
pass # Stick with the err_byte display_text
|
250 |
|
251 |
-
highlighted_data.append((display_text
|
252 |
logging.warning(
|
253 |
f"Token ID {problem_byte_id} at index {current_byte_abs_idx} "
|
254 |
f"could not be part of a validly decoded character using iterative decode. Fallback: '{display_text}'."
|
@@ -321,7 +321,7 @@ class BytelatentProcessor:
|
|
321 |
|
322 |
# Create highlighted text data
|
323 |
_highlighted_data, patch_count = self._create_highlight_data(patch_lengths, tokens)
|
324 |
-
ind_highlighted_data = [(text.replace("-1", ""), label) for text, label in _highlighted_data]
|
325 |
grouped_data = defaultdict(str)
|
326 |
for text, label in ind_highlighted_data:
|
327 |
grouped_data[label] += text
|
|
|
226 |
# Boundary check, though loop structure should prevent out-of-bounds
|
227 |
if current_byte_abs_idx < len(all_token_ids):
|
228 |
label = token_to_patch_label[current_byte_abs_idx] if current_byte_abs_idx < len(token_to_patch_label) else "Error: Label Missing"
|
229 |
+
display_text = f"{processed_char_text}-{j+1}"
|
230 |
highlighted_data.append((display_text, label))
|
231 |
else: # Should ideally not be reached
|
232 |
logging.error(f"Critical: Token index {current_byte_abs_idx} out of bounds for labeling.")
|
|
|
248 |
except Exception:
|
249 |
pass # Stick with the err_byte display_text
|
250 |
|
251 |
+
highlighted_data.append((display_text, label))
|
252 |
logging.warning(
|
253 |
f"Token ID {problem_byte_id} at index {current_byte_abs_idx} "
|
254 |
f"could not be part of a validly decoded character using iterative decode. Fallback: '{display_text}'."
|
|
|
321 |
|
322 |
# Create highlighted text data
|
323 |
_highlighted_data, patch_count = self._create_highlight_data(patch_lengths, tokens)
|
324 |
+
ind_highlighted_data = [(text.replace(" -1", "_-1").replace("-1", ""), label) for text, label in _highlighted_data]
|
325 |
grouped_data = defaultdict(str)
|
326 |
for text, label in ind_highlighted_data:
|
327 |
grouped_data[label] += text
|