Spaces:

pmkhanh7890
/

news_verification

Sleeping

App Files Files

news_verification / src /application /content_detection.py

pmkhanh7890

1st version of demo

1ce1659 5 months ago

raw

history blame

3.65 kB



	from src.application.text.model_detection import detect_by_ai_model
	from src.application.text.search_detection import check_human, detect_by_relative_search


	def determine_text_origin(title, content):
	"""
	Determines the origin of the given text based on paraphrasing detection and human authorship analysis.

	Args:
	text: The input text to be analyzed.

	Returns:
	str: The predicted origin of the text:
	- "HUMAN": If the text is likely written by a human.
	- "MACHINE": If the text is likely generated by a machine.
	"""
	# Classify by search engine
	text = title + "\n\n" + content
	is_paraphrased, referent_url, aligned_sentences = detect_by_relative_search(text)
	prediction_score = 0.0
	if not is_paraphrased:
	prediction_label = "UNKNOWN"
	else:
	prediction_score = 100.0
	if check_human(aligned_sentences):
	prediction_label = "HUMAN"
	else:
	prediction_label = "MACHINE"

	if prediction_label == "UNKNOWN":
	# Classify by SOTA model
	prediction_label, prediction_score = detect_by_ai_model(text)

	return prediction_label, prediction_score, referent_url


	def generate_analysis_report(news_title, news_content, news_image):

	text_prediction_label, text_confidence_score, text_referent_url = determine_text_origin(news_title, news_content)

	# Analyze text content
	url1 = text_referent_url
	#url2 = "https://example.com/article2"

	# Forensic analysis
	if text_prediction_label == "MACHINE":
	text_prediction_label = "The text is modified by GPT-4o (AI)"
	else:
	text_prediction_label = "The text is written by HUMAN"

	image_detection_results = "MACHINE"
	if image_detection_results == "MACHINE":
	image_detection_results = "The image is generated by Dall-e (AI)"
	else:
	image_detection_results = "The image is generated by HUMAN"
	image_confidence_score = 90.5

	news_detection_results = "MACHINE"
	if news_detection_results == "MACHINE":
	news_detection_results = "The whole news generated by AI"
	else:
	news_detection_results = "The whole news written by HUMAN"
	news_confidence_score = 97.4

	# Misinformation analysis
	out_of_context_results = "cohesive"
	if out_of_context_results == "cohesive":
	out_of_context_results = "The input news is cohesive (non-out-of-context)"
	else:
	out_of_context_results = "The input news is out-of-context"
	out_of_context_confidence_score = 96.7

	# Description
	description = "The description should be concise, clear, and aimed at helping general readers understand the case."

	html_template = f"""
	<h2>Placeholder for results</h2>

	<div>
	<h3>Originality:</h3>
	<ul>
	<li><a href="{url1}" target="_blank">{url1[:40] + "..."}</a></li>
	</ul>
	</div>

	<div>
	<h3>Forensic:</h3>
	<b>{news_detection_results} (confidence = {news_confidence_score}%)</b>
	<ul>
	<li>{text_prediction_label} (confidence = {text_confidence_score}%)</li>
	<li>{image_detection_results} (confidence = {image_confidence_score}%)</li>
	</ul>
	</div>

	<div>
	<h3>Misinformation:</h3>
	<ul>
	<li>The input news is {out_of_context_results} (confidence = {out_of_context_confidence_score}%)</li>
	</ul>
	</div>

	<div>
	<h3>Description (optional):</h3>
	<ul>
	<li>{description}</li>
	</ul>
	</div>
	"""

	return html_template