Spaces:

pmkhanh7890
/

news_verification

Sleeping

App Files Files

pmkhanh7890 commited on Feb 21

Commit

0260491

1 Parent(s): 530452f

fix bug of colored text

Browse files

Files changed (5) hide show

application.py +3 -3
src/application/content_detection.py +39 -7
src/application/text/entity.py +2 -2
src/application/text/helper.py +4 -4
src/application/text/search_detection.py +1 -1

application.py CHANGED Viewed

@@ -125,7 +125,7 @@ FOR GOVERNOR<br>
 - Each highlighted pair (marked with a number) shows the key differences
 between the input text and the source.
         """
-        table = """
 <h5>Comparison between input news and source news:</h5>
     <table border="1" style="width:100%; text-align:left;">
     <col style="width: 170px;">
@@ -144,7 +144,7 @@ between the input text and the source.
             <tr>
                 <td style="border-bottom: 1px solid transparent";>TBD</td>
                 <td style="border-bottom: 1px solid transparent";>TBD</td>
-                <td rowspan="2">TBD</td>
                 <td rowspan="2">TBD</td>
             </tr>
             <tr>
@@ -164,7 +164,7 @@ between the input text and the source.
         with gr.Column(scale=2):
             with gr.Accordion("NEWS ANALYSIS"):
                 verification_button = gr.Button("Verify news")
-                with gr.Tab("Orinary User"):
                     gr.HTML(ordinary_user_explanation)
                     ordinary_user_result = gr.HTML(table)
                 with gr.Tab("Fact Checker"):

 - Each highlighted pair (marked with a number) shows the key differences
 between the input text and the source.
         """
+        table = f"""
 <h5>Comparison between input news and source news:</h5>
     <table border="1" style="width:100%; text-align:left;">
     <col style="width: 170px;">
             <tr>
                 <td style="border-bottom: 1px solid transparent";>TBD</td>
                 <td style="border-bottom: 1px solid transparent";>TBD</td>
+                <td rowspan="2"> <img src="https://huggingface.co/spaces/pmkhanh7890/news_verification/resolve/main/examples/example_image_input.jpg" alt="A picture of a cat."></td>
                 <td rowspan="2">TBD</td>
             </tr>
             <tr>
         with gr.Column(scale=2):
             with gr.Accordion("NEWS ANALYSIS"):
                 verification_button = gr.Button("Verify news")
+                with gr.Tab("Ordinary User"):
                     gr.HTML(ordinary_user_explanation)
                     ordinary_user_result = gr.HTML(table)
                 with gr.Tab("Fact Checker"):

src/application/content_detection.py CHANGED Viewed

@@ -409,7 +409,6 @@ class NewsVerification:
                 first_url_row,
                 last_url_row,
                 span_row,
-                max_length,
             )
             rows.append(formatted_row)
@@ -443,7 +442,6 @@ class NewsVerification:
         first_url_row=True,
         last_url_row=True,
         span_row=1,
-        max_length=30,
     ):
         entity_count = 0
         if row[0]["input"] is None:
@@ -850,12 +848,12 @@ class NewsVerification:
         Examples:
             starts = [0, 5, 10]
-            ends = [3, 7, 12]
-            ignore_indices = [1, 2, 11, 17]
             # Output:
-                starts = [0, 3, 5, 10, 12]
-                ends = [0, 3, 7, 10, 12]
         """
@@ -880,12 +878,46 @@ class NewsVerification:
             start_end = list(range(start, end + 1, 1))
             start_end = list(set(start_end) - set(ignore_indices))
-            new_start, new_end = self.extract_sequences(start_end)
             filtered_starts.extend(new_start)
             filtered_ends.extend(new_end)
         return filtered_starts, filtered_ends
     def extract_sequences(self, numbers):
         if len(numbers) == 1:
             return [numbers[0]], [numbers[0]]

                 first_url_row,
                 last_url_row,
                 span_row,
             )
             rows.append(formatted_row)
         first_url_row=True,
         last_url_row=True,
         span_row=1,
     ):
         entity_count = 0
         if row[0]["input"] is None:
         Examples:
             starts = [0, 5, 10]
+            ends = [3, 7, 12]  # words at the end will not be colored.
+            ignore_indices = [1, 2, 12, 17]
             # Output:
+                starts = [0, 3, 5, 10]
+                ends = [1, 4, 7, 12]
         """
             start_end = list(range(start, end + 1, 1))
             start_end = list(set(start_end) - set(ignore_indices))
+            #new_start, new_end = self.extract_sequences(start_end)
+            new_start, new_end = self.extract_new_startend(
+                start,
+                end,
+                ignore_indices
+            )
             filtered_starts.extend(new_start)
             filtered_ends.extend(new_end)
         return filtered_starts, filtered_ends
+    def extract_new_startend(self, start, end, ignore_indices):
+        # sort a set of ignore_indices
+        indexes = list(set(ignore_indices))
+        indexes.sort()
+        new_starts = []
+        new_ends = []
+        new_start = start
+        if indexes is None or len(indexes) < 1:
+            new_starts.append(start)
+            new_ends.append(end)
+            return new_starts, new_ends
+        for index in indexes:
+            if index < start:
+                continue
+            elif index >= end:
+                continue
+            new_starts.append(new_start)
+            new_ends.append(index)
+            new_start = index + 1
+        new_starts.append(new_start)
+        new_ends.append(end)
+        return new_starts, new_ends
     def extract_sequences(self, numbers):
         if len(numbers) == 1:
             return [numbers[0]], [numbers[0]]

src/application/text/entity.py CHANGED Viewed

@@ -231,9 +231,9 @@ def apply_highlight(text, entities_with_colors, key="input", count=0):
             # Append highlighted text with index label
             highlighted_text += (
-                f'\n<span_style="background-color:{entity_color};color:black;'
                 f'border-radius:3px;font-size:14px;display:inline-block;">'
-                f"{index_label}{temp_text[start:end]}</span>\n"
             )
             prev_end = end
         highlighted_text += temp_text[prev_end:]

             # Append highlighted text with index label
             highlighted_text += (
+                f'<span_style="background-color:{entity_color};color:black;'
                 f'border-radius:3px;font-size:14px;display:inline-block;">'
+                f"{index_label}{temp_text[start:end]}</span>"
             )
             prev_end = end
         highlighted_text += temp_text[prev_end:]

src/application/text/helper.py CHANGED Viewed

@@ -161,10 +161,10 @@ def extract_equal_text(text1, text2):
         if tag == "equal":
             equal_idx_1.append({"start": i1, "end": i2})
             equal_idx_2.append({"start": j1, "end": j2})
-            # subtext_1 = " ".join(text1[i1:i2])
-            # subtext_2 = " ".join(text2[j1:j2])
-            # print(f'{tag:7}   a[{i1:2}:{i2:2}] --> b[{j1:2}:{j1:2}]
-            # {subtext_1!r:>55} --> {subtext_2!r}')
     return equal_idx_1, equal_idx_2

         if tag == "equal":
             equal_idx_1.append({"start": i1, "end": i2})
             equal_idx_2.append({"start": j1, "end": j2})
+            subtext_1 = " ".join(text1[i1:i2])
+            subtext_2 = " ".join(text2[j1:j2])
+            # print(f'{tag:7}   a[{i1:2}:{i2:2}] --> b[{j1:2}:{j2:2}] '
+            #       f'{subtext_1!r:>55} --> {subtext_2!r}')
     return equal_idx_1, equal_idx_2

src/application/text/search_detection.py CHANGED Viewed

@@ -286,7 +286,7 @@ def check_paraphrase(input_text, page_text, url):
             "paraphrase": is_paraphrased,
             "url": url,
         }
-        print(f"Result: [{alignment["similarity"]}] {alignment["source"]}")
     return alignment

             "paraphrase": is_paraphrased,
             "url": url,
         }
+        print(f'Result: [{alignment["similarity"]}] {alignment["source"]}')
     return alignment