Spaces:
Sleeping
Sleeping
File size: 3,650 Bytes
1ce1659 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
from src.application.text.model_detection import detect_by_ai_model
from src.application.text.search_detection import check_human, detect_by_relative_search
def determine_text_origin(title, content):
"""
Determines the origin of the given text based on paraphrasing detection and human authorship analysis.
Args:
text: The input text to be analyzed.
Returns:
str: The predicted origin of the text:
- "HUMAN": If the text is likely written by a human.
- "MACHINE": If the text is likely generated by a machine.
"""
# Classify by search engine
text = title + "\n\n" + content
is_paraphrased, referent_url, aligned_sentences = detect_by_relative_search(text)
prediction_score = 0.0
if not is_paraphrased:
prediction_label = "UNKNOWN"
else:
prediction_score = 100.0
if check_human(aligned_sentences):
prediction_label = "HUMAN"
else:
prediction_label = "MACHINE"
if prediction_label == "UNKNOWN":
# Classify by SOTA model
prediction_label, prediction_score = detect_by_ai_model(text)
return prediction_label, prediction_score, referent_url
def generate_analysis_report(news_title, news_content, news_image):
text_prediction_label, text_confidence_score, text_referent_url = determine_text_origin(news_title, news_content)
# Analyze text content
url1 = text_referent_url
#url2 = "https://example.com/article2"
# Forensic analysis
if text_prediction_label == "MACHINE":
text_prediction_label = "The text is modified by GPT-4o (AI)"
else:
text_prediction_label = "The text is written by HUMAN"
image_detection_results = "MACHINE"
if image_detection_results == "MACHINE":
image_detection_results = "The image is generated by Dall-e (AI)"
else:
image_detection_results = "The image is generated by HUMAN"
image_confidence_score = 90.5
news_detection_results = "MACHINE"
if news_detection_results == "MACHINE":
news_detection_results = "The whole news generated by AI"
else:
news_detection_results = "The whole news written by HUMAN"
news_confidence_score = 97.4
# Misinformation analysis
out_of_context_results = "cohesive"
if out_of_context_results == "cohesive":
out_of_context_results = "The input news is cohesive (non-out-of-context)"
else:
out_of_context_results = "The input news is out-of-context"
out_of_context_confidence_score = 96.7
# Description
description = "The description should be concise, clear, and aimed at helping general readers understand the case."
html_template = f"""
<h2>Placeholder for results</h2>
<div>
<h3>Originality:</h3>
<ul>
<li><a href="{url1}" target="_blank">{url1[:40] + "..."}</a></li>
</ul>
</div>
<div>
<h3>Forensic:</h3>
<b>{news_detection_results} (confidence = {news_confidence_score}%)</b>
<ul>
<li>{text_prediction_label} (confidence = {text_confidence_score}%)</li>
<li>{image_detection_results} (confidence = {image_confidence_score}%)</li>
</ul>
</div>
<div>
<h3>Misinformation:</h3>
<ul>
<li>The input news is {out_of_context_results} (confidence = {out_of_context_confidence_score}%)</li>
</ul>
</div>
<div>
<h3>Description (optional):</h3>
<ul>
<li>{description}</li>
</ul>
</div>
"""
return html_template |