from difflib import SequenceMatcher from src.application.image.image_detection import detect_image_by_ai_model, detect_image_by_reverse_search, detect_image_from_news_image from src.application.text.model_detection import detect_text_by_ai_model from src.application.text.search_detection import check_human, detect_text_by_relative_search class NewsVerification(): def __init__(self): self.news_text = "" self.news_title = "" self.news_content = "" self.news_image = "" self.text_prediction_label = "" self.text_prediction_score = -1 self.text_referent_url = None self.image_prediction_label = "" self.image_prediction_score = -1 self.image_referent_url = None self.news_prediction_label = "" self.news_prediction_score = -1 self.found_img_url = [] self.aligned_sentences = [] self.is_paraphrased = False def load_news(self, news_title, news_content, news_image): self.news_text = news_title + "\n\n" + news_content self.news_title = news_title self.news_content = news_content self.news_image = news_image def determine_text_origin(self): """ Determines the origin of the given text based on paraphrasing detection and human authorship analysis. Args: text: The input text to be analyzed. Returns: str: The predicted origin of the text: - "HUMAN": If the text is likely written by a human. - "MACHINE": If the text is likely generated by a machine. """ print("CHECK TEXT:") print("\tFrom search engine:") # Classify by search engine self.is_paraphrased, self.text_referent_url, self.aligned_sentences, self.found_img_url = detect_text_by_relative_search(self.news_text) if self.is_paraphrased is False: self.text_prediction_label = "UNKNOWN" else: self.text_prediction_score = 100 if check_human(self.aligned_sentences): self.text_prediction_label = "HUMAN" else: self.text_prediction_label = "MACHINE" # Classify text by AI model print("\tFrom AI model:") if self.text_prediction_label == "UNKNOWN": self.text_prediction_label, self.text_prediction_score = detect_text_by_ai_model(self.news_text) self.text_prediction_score *= 100 def detect_image_origin(self): print("CHECK IMAGE:") if self.news_image is None: self.image_prediction_label = "UNKNOWN" self.image_prediction_score = 0.0 self.image_referent_url = None return print(f"\t: Img path: {self.news_image}") matched_url, similarity = detect_image_from_news_image(self.news_image, self.found_img_url) if matched_url is not None: print(f"matching image: {matched_url}\nsimilarity: {similarity}\n") self.image_prediction_label = "HUMAN" self.image_prediction_score = similarity self.image_referent_url = matched_url return matched_url, similarity = detect_image_by_reverse_search(self.news_image) if matched_url is not None: print(f"matching image: {matched_url}\nsimilarity: {similarity}\n") self.image_prediction_label = "HUMAN" self.image_prediction_score = similarity self.image_referent_url = matched_url return detected_label, score = detect_image_by_ai_model(self.news_image) if detected_label: self.image_prediction_label = detected_label self.image_prediction_score = score self.image_referent_url = None return self.image_prediction_label = "UNKNOWN" self.image_prediction_score = 50 self.image_referent_url = None def determine_news_origin(self): if self.text_prediction_label == "MACHINE": text_prediction_score = 100 - self.text_prediction_score elif self.text_prediction_label == "UNKNOWN": text_prediction_score = 50 else: text_prediction_score = self.text_prediction_score if self.image_prediction_label == "MACHINE": image_prediction_score = 100 - self.image_prediction_score elif self.image_prediction_label == "UNKNOWN": image_prediction_score = 50 else: image_prediction_score = self.image_prediction_score news_prediction_score = (text_prediction_score + image_prediction_score) / 2 if news_prediction_score > 50: self.news_prediction_score = news_prediction_score self.news_prediction_label = "HUMAN" else: self.news_prediction_score = 100 - news_prediction_score self.news_prediction_label = "MACHINE" def generate_analysis_report(self): self.determine_text_origin() self.detect_image_origin() self.determine_news_origin() # Forensic analysis if self.text_prediction_label == "MACHINE": text_prediction_label = "The text is modified by GPT-4o (AI)" else: text_prediction_label = "The text is written by HUMAN" if self.image_prediction_label == "MACHINE": image_prediction_label = "The image is generated by Dall-e (AI)" else: image_prediction_label = "The image is generated by HUMAN" if self.news_prediction_label == "MACHINE": news_prediction_label = "The whole news generated by AI" else: news_prediction_label = "The whole news written by HUMAN" # Misinformation analysis out_of_context_results = "cohesive" if out_of_context_results == "cohesive": out_of_context_results = "The input news is cohesive (non-out-of-context)" else: out_of_context_results = "The input news is out-of-context" out_of_context_prediction_score = 96.7 # Description description = "The description should be concise, clear, and aimed at helping general readers understand the case." if self.text_referent_url is None: referred_news = "
Input sentence | Source sentence |
---|