Spaces:
Sleeping
Sleeping
from src.application.text.model_detection import detect_by_ai_model | |
from src.application.text.search_detection import check_human, detect_by_relative_search | |
def determine_text_origin(title, content): | |
""" | |
Determines the origin of the given text based on paraphrasing detection and human authorship analysis. | |
Args: | |
text: The input text to be analyzed. | |
Returns: | |
str: The predicted origin of the text: | |
- "HUMAN": If the text is likely written by a human. | |
- "MACHINE": If the text is likely generated by a machine. | |
""" | |
# Classify by search engine | |
text = title + "\n\n" + content | |
is_paraphrased, referent_url, aligned_sentences = detect_by_relative_search(text) | |
prediction_score = 0.0 | |
if not is_paraphrased: | |
prediction_label = "UNKNOWN" | |
else: | |
prediction_score = 100.0 | |
if check_human(aligned_sentences): | |
prediction_label = "HUMAN" | |
else: | |
prediction_label = "MACHINE" | |
if prediction_label == "UNKNOWN": | |
# Classify by SOTA model | |
prediction_label, prediction_score = detect_by_ai_model(text) | |
return prediction_label, prediction_score, referent_url | |
def generate_analysis_report(news_title, news_content, news_image): | |
text_prediction_label, text_confidence_score, text_referent_url = determine_text_origin(news_title, news_content) | |
# Analyze text content | |
url1 = text_referent_url | |
#url2 = "https://example.com/article2" | |
# Forensic analysis | |
if text_prediction_label == "MACHINE": | |
text_prediction_label = "The text is modified by GPT-4o (AI)" | |
else: | |
text_prediction_label = "The text is written by HUMAN" | |
image_detection_results = "MACHINE" | |
if image_detection_results == "MACHINE": | |
image_detection_results = "The image is generated by Dall-e (AI)" | |
else: | |
image_detection_results = "The image is generated by HUMAN" | |
image_confidence_score = 90.5 | |
news_detection_results = "MACHINE" | |
if news_detection_results == "MACHINE": | |
news_detection_results = "The whole news generated by AI" | |
else: | |
news_detection_results = "The whole news written by HUMAN" | |
news_confidence_score = 97.4 | |
# Misinformation analysis | |
out_of_context_results = "cohesive" | |
if out_of_context_results == "cohesive": | |
out_of_context_results = "The input news is cohesive (non-out-of-context)" | |
else: | |
out_of_context_results = "The input news is out-of-context" | |
out_of_context_confidence_score = 96.7 | |
# Description | |
description = "The description should be concise, clear, and aimed at helping general readers understand the case." | |
html_template = f""" | |
<h2>Placeholder for results</h2> | |
<div> | |
<h3>Originality:</h3> | |
<ul> | |
<li><a href="{url1}" target="_blank">{url1[:40] + "..."}</a></li> | |
</ul> | |
</div> | |
<div> | |
<h3>Forensic:</h3> | |
<b>{news_detection_results} (confidence = {news_confidence_score}%)</b> | |
<ul> | |
<li>{text_prediction_label} (confidence = {text_confidence_score}%)</li> | |
<li>{image_detection_results} (confidence = {image_confidence_score}%)</li> | |
</ul> | |
</div> | |
<div> | |
<h3>Misinformation:</h3> | |
<ul> | |
<li>The input news is {out_of_context_results} (confidence = {out_of_context_confidence_score}%)</li> | |
</ul> | |
</div> | |
<div> | |
<h3>Description (optional):</h3> | |
<ul> | |
<li>{description}</li> | |
</ul> | |
</div> | |
""" | |
return html_template |