pmkhanh7890 commited on
Commit
0260491
·
1 Parent(s): 530452f

fix bug of colored text

Browse files
application.py CHANGED
@@ -125,7 +125,7 @@ FOR GOVERNOR<br>
125
  - Each highlighted pair (marked with a number) shows the key differences
126
  between the input text and the source.
127
  """
128
- table = """
129
  <h5>Comparison between input news and source news:</h5>
130
  <table border="1" style="width:100%; text-align:left;">
131
  <col style="width: 170px;">
@@ -144,7 +144,7 @@ between the input text and the source.
144
  <tr>
145
  <td style="border-bottom: 1px solid transparent";>TBD</td>
146
  <td style="border-bottom: 1px solid transparent";>TBD</td>
147
- <td rowspan="2">TBD</td>
148
  <td rowspan="2">TBD</td>
149
  </tr>
150
  <tr>
@@ -164,7 +164,7 @@ between the input text and the source.
164
  with gr.Column(scale=2):
165
  with gr.Accordion("NEWS ANALYSIS"):
166
  verification_button = gr.Button("Verify news")
167
- with gr.Tab("Orinary User"):
168
  gr.HTML(ordinary_user_explanation)
169
  ordinary_user_result = gr.HTML(table)
170
  with gr.Tab("Fact Checker"):
 
125
  - Each highlighted pair (marked with a number) shows the key differences
126
  between the input text and the source.
127
  """
128
+ table = f"""
129
  <h5>Comparison between input news and source news:</h5>
130
  <table border="1" style="width:100%; text-align:left;">
131
  <col style="width: 170px;">
 
144
  <tr>
145
  <td style="border-bottom: 1px solid transparent";>TBD</td>
146
  <td style="border-bottom: 1px solid transparent";>TBD</td>
147
+ <td rowspan="2"> <img src="https://huggingface.co/spaces/pmkhanh7890/news_verification/resolve/main/examples/example_image_input.jpg" alt="A picture of a cat."></td>
148
  <td rowspan="2">TBD</td>
149
  </tr>
150
  <tr>
 
164
  with gr.Column(scale=2):
165
  with gr.Accordion("NEWS ANALYSIS"):
166
  verification_button = gr.Button("Verify news")
167
+ with gr.Tab("Ordinary User"):
168
  gr.HTML(ordinary_user_explanation)
169
  ordinary_user_result = gr.HTML(table)
170
  with gr.Tab("Fact Checker"):
src/application/content_detection.py CHANGED
@@ -409,7 +409,6 @@ class NewsVerification:
409
  first_url_row,
410
  last_url_row,
411
  span_row,
412
- max_length,
413
  )
414
  rows.append(formatted_row)
415
 
@@ -443,7 +442,6 @@ class NewsVerification:
443
  first_url_row=True,
444
  last_url_row=True,
445
  span_row=1,
446
- max_length=30,
447
  ):
448
  entity_count = 0
449
  if row[0]["input"] is None:
@@ -850,12 +848,12 @@ class NewsVerification:
850
 
851
  Examples:
852
  starts = [0, 5, 10]
853
- ends = [3, 7, 12]
854
- ignore_indices = [1, 2, 11, 17]
855
 
856
  # Output:
857
- starts = [0, 3, 5, 10, 12]
858
- ends = [0, 3, 7, 10, 12]
859
 
860
  """
861
 
@@ -880,12 +878,46 @@ class NewsVerification:
880
 
881
  start_end = list(range(start, end + 1, 1))
882
  start_end = list(set(start_end) - set(ignore_indices))
883
- new_start, new_end = self.extract_sequences(start_end)
 
 
 
 
 
884
  filtered_starts.extend(new_start)
885
  filtered_ends.extend(new_end)
886
 
887
  return filtered_starts, filtered_ends
888
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
889
  def extract_sequences(self, numbers):
890
  if len(numbers) == 1:
891
  return [numbers[0]], [numbers[0]]
 
409
  first_url_row,
410
  last_url_row,
411
  span_row,
 
412
  )
413
  rows.append(formatted_row)
414
 
 
442
  first_url_row=True,
443
  last_url_row=True,
444
  span_row=1,
 
445
  ):
446
  entity_count = 0
447
  if row[0]["input"] is None:
 
848
 
849
  Examples:
850
  starts = [0, 5, 10]
851
+ ends = [3, 7, 12] # words at the end will not be colored.
852
+ ignore_indices = [1, 2, 12, 17]
853
 
854
  # Output:
855
+ starts = [0, 3, 5, 10]
856
+ ends = [1, 4, 7, 12]
857
 
858
  """
859
 
 
878
 
879
  start_end = list(range(start, end + 1, 1))
880
  start_end = list(set(start_end) - set(ignore_indices))
881
+ #new_start, new_end = self.extract_sequences(start_end)
882
+ new_start, new_end = self.extract_new_startend(
883
+ start,
884
+ end,
885
+ ignore_indices
886
+ )
887
  filtered_starts.extend(new_start)
888
  filtered_ends.extend(new_end)
889
 
890
  return filtered_starts, filtered_ends
891
 
892
+ def extract_new_startend(self, start, end, ignore_indices):
893
+ # sort a set of ignore_indices
894
+ indexes = list(set(ignore_indices))
895
+ indexes.sort()
896
+
897
+ new_starts = []
898
+ new_ends = []
899
+ new_start = start
900
+ if indexes is None or len(indexes) < 1:
901
+ new_starts.append(start)
902
+ new_ends.append(end)
903
+ return new_starts, new_ends
904
+
905
+ for index in indexes:
906
+ if index < start:
907
+ continue
908
+ elif index >= end:
909
+ continue
910
+
911
+ new_starts.append(new_start)
912
+ new_ends.append(index)
913
+
914
+ new_start = index + 1
915
+
916
+ new_starts.append(new_start)
917
+ new_ends.append(end)
918
+
919
+ return new_starts, new_ends
920
+
921
  def extract_sequences(self, numbers):
922
  if len(numbers) == 1:
923
  return [numbers[0]], [numbers[0]]
src/application/text/entity.py CHANGED
@@ -231,9 +231,9 @@ def apply_highlight(text, entities_with_colors, key="input", count=0):
231
 
232
  # Append highlighted text with index label
233
  highlighted_text += (
234
- f'\n<span_style="background-color:{entity_color};color:black;'
235
  f'border-radius:3px;font-size:14px;display:inline-block;">'
236
- f"{index_label}{temp_text[start:end]}</span>\n"
237
  )
238
  prev_end = end
239
  highlighted_text += temp_text[prev_end:]
 
231
 
232
  # Append highlighted text with index label
233
  highlighted_text += (
234
+ f'<span_style="background-color:{entity_color};color:black;'
235
  f'border-radius:3px;font-size:14px;display:inline-block;">'
236
+ f"{index_label}{temp_text[start:end]}</span>"
237
  )
238
  prev_end = end
239
  highlighted_text += temp_text[prev_end:]
src/application/text/helper.py CHANGED
@@ -161,10 +161,10 @@ def extract_equal_text(text1, text2):
161
  if tag == "equal":
162
  equal_idx_1.append({"start": i1, "end": i2})
163
  equal_idx_2.append({"start": j1, "end": j2})
164
- # subtext_1 = " ".join(text1[i1:i2])
165
- # subtext_2 = " ".join(text2[j1:j2])
166
- # print(f'{tag:7} a[{i1:2}:{i2:2}] --> b[{j1:2}:{j1:2}]
167
- # {subtext_1!r:>55} --> {subtext_2!r}')
168
  return equal_idx_1, equal_idx_2
169
 
170
 
 
161
  if tag == "equal":
162
  equal_idx_1.append({"start": i1, "end": i2})
163
  equal_idx_2.append({"start": j1, "end": j2})
164
+ subtext_1 = " ".join(text1[i1:i2])
165
+ subtext_2 = " ".join(text2[j1:j2])
166
+ # print(f'{tag:7} a[{i1:2}:{i2:2}] --> b[{j1:2}:{j2:2}] '
167
+ # f'{subtext_1!r:>55} --> {subtext_2!r}')
168
  return equal_idx_1, equal_idx_2
169
 
170
 
src/application/text/search_detection.py CHANGED
@@ -286,7 +286,7 @@ def check_paraphrase(input_text, page_text, url):
286
  "paraphrase": is_paraphrased,
287
  "url": url,
288
  }
289
- print(f"Result: [{alignment["similarity"]}] {alignment["source"]}")
290
 
291
  return alignment
292
 
 
286
  "paraphrase": is_paraphrased,
287
  "url": url,
288
  }
289
+ print(f'Result: [{alignment["similarity"]}] {alignment["source"]}')
290
 
291
  return alignment
292