Spaces:
Sleeping
Sleeping
Commit
·
0260491
1
Parent(s):
530452f
fix bug of colored text
Browse files
application.py
CHANGED
@@ -125,7 +125,7 @@ FOR GOVERNOR<br>
|
|
125 |
- Each highlighted pair (marked with a number) shows the key differences
|
126 |
between the input text and the source.
|
127 |
"""
|
128 |
-
table = """
|
129 |
<h5>Comparison between input news and source news:</h5>
|
130 |
<table border="1" style="width:100%; text-align:left;">
|
131 |
<col style="width: 170px;">
|
@@ -144,7 +144,7 @@ between the input text and the source.
|
|
144 |
<tr>
|
145 |
<td style="border-bottom: 1px solid transparent";>TBD</td>
|
146 |
<td style="border-bottom: 1px solid transparent";>TBD</td>
|
147 |
-
<td rowspan="2">
|
148 |
<td rowspan="2">TBD</td>
|
149 |
</tr>
|
150 |
<tr>
|
@@ -164,7 +164,7 @@ between the input text and the source.
|
|
164 |
with gr.Column(scale=2):
|
165 |
with gr.Accordion("NEWS ANALYSIS"):
|
166 |
verification_button = gr.Button("Verify news")
|
167 |
-
with gr.Tab("
|
168 |
gr.HTML(ordinary_user_explanation)
|
169 |
ordinary_user_result = gr.HTML(table)
|
170 |
with gr.Tab("Fact Checker"):
|
|
|
125 |
- Each highlighted pair (marked with a number) shows the key differences
|
126 |
between the input text and the source.
|
127 |
"""
|
128 |
+
table = f"""
|
129 |
<h5>Comparison between input news and source news:</h5>
|
130 |
<table border="1" style="width:100%; text-align:left;">
|
131 |
<col style="width: 170px;">
|
|
|
144 |
<tr>
|
145 |
<td style="border-bottom: 1px solid transparent";>TBD</td>
|
146 |
<td style="border-bottom: 1px solid transparent";>TBD</td>
|
147 |
+
<td rowspan="2"> <img src="https://huggingface.co/spaces/pmkhanh7890/news_verification/resolve/main/examples/example_image_input.jpg" alt="A picture of a cat."></td>
|
148 |
<td rowspan="2">TBD</td>
|
149 |
</tr>
|
150 |
<tr>
|
|
|
164 |
with gr.Column(scale=2):
|
165 |
with gr.Accordion("NEWS ANALYSIS"):
|
166 |
verification_button = gr.Button("Verify news")
|
167 |
+
with gr.Tab("Ordinary User"):
|
168 |
gr.HTML(ordinary_user_explanation)
|
169 |
ordinary_user_result = gr.HTML(table)
|
170 |
with gr.Tab("Fact Checker"):
|
src/application/content_detection.py
CHANGED
@@ -409,7 +409,6 @@ class NewsVerification:
|
|
409 |
first_url_row,
|
410 |
last_url_row,
|
411 |
span_row,
|
412 |
-
max_length,
|
413 |
)
|
414 |
rows.append(formatted_row)
|
415 |
|
@@ -443,7 +442,6 @@ class NewsVerification:
|
|
443 |
first_url_row=True,
|
444 |
last_url_row=True,
|
445 |
span_row=1,
|
446 |
-
max_length=30,
|
447 |
):
|
448 |
entity_count = 0
|
449 |
if row[0]["input"] is None:
|
@@ -850,12 +848,12 @@ class NewsVerification:
|
|
850 |
|
851 |
Examples:
|
852 |
starts = [0, 5, 10]
|
853 |
-
ends = [3, 7, 12]
|
854 |
-
ignore_indices = [1, 2,
|
855 |
|
856 |
# Output:
|
857 |
-
starts = [0, 3, 5, 10
|
858 |
-
ends = [
|
859 |
|
860 |
"""
|
861 |
|
@@ -880,12 +878,46 @@ class NewsVerification:
|
|
880 |
|
881 |
start_end = list(range(start, end + 1, 1))
|
882 |
start_end = list(set(start_end) - set(ignore_indices))
|
883 |
-
new_start, new_end = self.extract_sequences(start_end)
|
|
|
|
|
|
|
|
|
|
|
884 |
filtered_starts.extend(new_start)
|
885 |
filtered_ends.extend(new_end)
|
886 |
|
887 |
return filtered_starts, filtered_ends
|
888 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
889 |
def extract_sequences(self, numbers):
|
890 |
if len(numbers) == 1:
|
891 |
return [numbers[0]], [numbers[0]]
|
|
|
409 |
first_url_row,
|
410 |
last_url_row,
|
411 |
span_row,
|
|
|
412 |
)
|
413 |
rows.append(formatted_row)
|
414 |
|
|
|
442 |
first_url_row=True,
|
443 |
last_url_row=True,
|
444 |
span_row=1,
|
|
|
445 |
):
|
446 |
entity_count = 0
|
447 |
if row[0]["input"] is None:
|
|
|
848 |
|
849 |
Examples:
|
850 |
starts = [0, 5, 10]
|
851 |
+
ends = [3, 7, 12] # words at the end will not be colored.
|
852 |
+
ignore_indices = [1, 2, 12, 17]
|
853 |
|
854 |
# Output:
|
855 |
+
starts = [0, 3, 5, 10]
|
856 |
+
ends = [1, 4, 7, 12]
|
857 |
|
858 |
"""
|
859 |
|
|
|
878 |
|
879 |
start_end = list(range(start, end + 1, 1))
|
880 |
start_end = list(set(start_end) - set(ignore_indices))
|
881 |
+
#new_start, new_end = self.extract_sequences(start_end)
|
882 |
+
new_start, new_end = self.extract_new_startend(
|
883 |
+
start,
|
884 |
+
end,
|
885 |
+
ignore_indices
|
886 |
+
)
|
887 |
filtered_starts.extend(new_start)
|
888 |
filtered_ends.extend(new_end)
|
889 |
|
890 |
return filtered_starts, filtered_ends
|
891 |
|
892 |
+
def extract_new_startend(self, start, end, ignore_indices):
|
893 |
+
# sort a set of ignore_indices
|
894 |
+
indexes = list(set(ignore_indices))
|
895 |
+
indexes.sort()
|
896 |
+
|
897 |
+
new_starts = []
|
898 |
+
new_ends = []
|
899 |
+
new_start = start
|
900 |
+
if indexes is None or len(indexes) < 1:
|
901 |
+
new_starts.append(start)
|
902 |
+
new_ends.append(end)
|
903 |
+
return new_starts, new_ends
|
904 |
+
|
905 |
+
for index in indexes:
|
906 |
+
if index < start:
|
907 |
+
continue
|
908 |
+
elif index >= end:
|
909 |
+
continue
|
910 |
+
|
911 |
+
new_starts.append(new_start)
|
912 |
+
new_ends.append(index)
|
913 |
+
|
914 |
+
new_start = index + 1
|
915 |
+
|
916 |
+
new_starts.append(new_start)
|
917 |
+
new_ends.append(end)
|
918 |
+
|
919 |
+
return new_starts, new_ends
|
920 |
+
|
921 |
def extract_sequences(self, numbers):
|
922 |
if len(numbers) == 1:
|
923 |
return [numbers[0]], [numbers[0]]
|
src/application/text/entity.py
CHANGED
@@ -231,9 +231,9 @@ def apply_highlight(text, entities_with_colors, key="input", count=0):
|
|
231 |
|
232 |
# Append highlighted text with index label
|
233 |
highlighted_text += (
|
234 |
-
f'
|
235 |
f'border-radius:3px;font-size:14px;display:inline-block;">'
|
236 |
-
f"{index_label}{temp_text[start:end]}</span
|
237 |
)
|
238 |
prev_end = end
|
239 |
highlighted_text += temp_text[prev_end:]
|
|
|
231 |
|
232 |
# Append highlighted text with index label
|
233 |
highlighted_text += (
|
234 |
+
f'<span_style="background-color:{entity_color};color:black;'
|
235 |
f'border-radius:3px;font-size:14px;display:inline-block;">'
|
236 |
+
f"{index_label}{temp_text[start:end]}</span>"
|
237 |
)
|
238 |
prev_end = end
|
239 |
highlighted_text += temp_text[prev_end:]
|
src/application/text/helper.py
CHANGED
@@ -161,10 +161,10 @@ def extract_equal_text(text1, text2):
|
|
161 |
if tag == "equal":
|
162 |
equal_idx_1.append({"start": i1, "end": i2})
|
163 |
equal_idx_2.append({"start": j1, "end": j2})
|
164 |
-
|
165 |
-
|
166 |
-
# print(f'{tag:7} a[{i1:2}:{i2:2}] --> b[{j1:2}:{
|
167 |
-
#
|
168 |
return equal_idx_1, equal_idx_2
|
169 |
|
170 |
|
|
|
161 |
if tag == "equal":
|
162 |
equal_idx_1.append({"start": i1, "end": i2})
|
163 |
equal_idx_2.append({"start": j1, "end": j2})
|
164 |
+
subtext_1 = " ".join(text1[i1:i2])
|
165 |
+
subtext_2 = " ".join(text2[j1:j2])
|
166 |
+
# print(f'{tag:7} a[{i1:2}:{i2:2}] --> b[{j1:2}:{j2:2}] '
|
167 |
+
# f'{subtext_1!r:>55} --> {subtext_2!r}')
|
168 |
return equal_idx_1, equal_idx_2
|
169 |
|
170 |
|
src/application/text/search_detection.py
CHANGED
@@ -286,7 +286,7 @@ def check_paraphrase(input_text, page_text, url):
|
|
286 |
"paraphrase": is_paraphrased,
|
287 |
"url": url,
|
288 |
}
|
289 |
-
print(f
|
290 |
|
291 |
return alignment
|
292 |
|
|
|
286 |
"paraphrase": is_paraphrased,
|
287 |
"url": url,
|
288 |
}
|
289 |
+
print(f'Result: [{alignment["similarity"]}] {alignment["source"]}')
|
290 |
|
291 |
return alignment
|
292 |
|