news_verification / src /application /content_detection.py
pmkhanh7890's picture
1st version of demo
1ce1659
raw
history blame
3.65 kB
from src.application.text.model_detection import detect_by_ai_model
from src.application.text.search_detection import check_human, detect_by_relative_search
def determine_text_origin(title, content):
"""
Determines the origin of the given text based on paraphrasing detection and human authorship analysis.
Args:
text: The input text to be analyzed.
Returns:
str: The predicted origin of the text:
- "HUMAN": If the text is likely written by a human.
- "MACHINE": If the text is likely generated by a machine.
"""
# Classify by search engine
text = title + "\n\n" + content
is_paraphrased, referent_url, aligned_sentences = detect_by_relative_search(text)
prediction_score = 0.0
if not is_paraphrased:
prediction_label = "UNKNOWN"
else:
prediction_score = 100.0
if check_human(aligned_sentences):
prediction_label = "HUMAN"
else:
prediction_label = "MACHINE"
if prediction_label == "UNKNOWN":
# Classify by SOTA model
prediction_label, prediction_score = detect_by_ai_model(text)
return prediction_label, prediction_score, referent_url
def generate_analysis_report(news_title, news_content, news_image):
text_prediction_label, text_confidence_score, text_referent_url = determine_text_origin(news_title, news_content)
# Analyze text content
url1 = text_referent_url
#url2 = "https://example.com/article2"
# Forensic analysis
if text_prediction_label == "MACHINE":
text_prediction_label = "The text is modified by GPT-4o (AI)"
else:
text_prediction_label = "The text is written by HUMAN"
image_detection_results = "MACHINE"
if image_detection_results == "MACHINE":
image_detection_results = "The image is generated by Dall-e (AI)"
else:
image_detection_results = "The image is generated by HUMAN"
image_confidence_score = 90.5
news_detection_results = "MACHINE"
if news_detection_results == "MACHINE":
news_detection_results = "The whole news generated by AI"
else:
news_detection_results = "The whole news written by HUMAN"
news_confidence_score = 97.4
# Misinformation analysis
out_of_context_results = "cohesive"
if out_of_context_results == "cohesive":
out_of_context_results = "The input news is cohesive (non-out-of-context)"
else:
out_of_context_results = "The input news is out-of-context"
out_of_context_confidence_score = 96.7
# Description
description = "The description should be concise, clear, and aimed at helping general readers understand the case."
html_template = f"""
<h2>Placeholder for results</h2>
<div>
<h3>Originality:</h3>
<ul>
<li><a href="{url1}" target="_blank">{url1[:40] + "..."}</a></li>
</ul>
</div>
<div>
<h3>Forensic:</h3>
<b>{news_detection_results} (confidence = {news_confidence_score}%)</b>
<ul>
<li>{text_prediction_label} (confidence = {text_confidence_score}%)</li>
<li>{image_detection_results} (confidence = {image_confidence_score}%)</li>
</ul>
</div>
<div>
<h3>Misinformation:</h3>
<ul>
<li>The input news is {out_of_context_results} (confidence = {out_of_context_confidence_score}%)</li>
</ul>
</div>
<div>
<h3>Description (optional):</h3>
<ul>
<li>{description}</li>
</ul>
</div>
"""
return html_template