Spaces:

pmkhanh7890
/

news_verification

Sleeping

File size: 3,650 Bytes

1ce1659



from src.application.text.model_detection import detect_by_ai_model
from src.application.text.search_detection import check_human, detect_by_relative_search


def determine_text_origin(title, content):
    """
    Determines the origin of the given text based on paraphrasing detection and human authorship analysis.

    Args:
        text: The input text to be analyzed.

    Returns:
        str: The predicted origin of the text: 
             - "HUMAN": If the text is likely written by a human.
             - "MACHINE": If the text is likely generated by a machine.
    """
    # Classify by search engine
    text = title + "\n\n" + content
    is_paraphrased, referent_url, aligned_sentences = detect_by_relative_search(text)
    prediction_score = 0.0
    if not is_paraphrased:
        prediction_label = "UNKNOWN"
    else:
        prediction_score = 100.0
        if check_human(aligned_sentences):
            prediction_label = "HUMAN"
        else:
            prediction_label = "MACHINE"
    
    if prediction_label == "UNKNOWN":
        # Classify by SOTA model
        prediction_label, prediction_score = detect_by_ai_model(text)
    
    return prediction_label, prediction_score, referent_url


def generate_analysis_report(news_title, news_content, news_image):
    
    text_prediction_label, text_confidence_score, text_referent_url = determine_text_origin(news_title, news_content)
    
    # Analyze text content
    url1 = text_referent_url
    #url2 = "https://example.com/article2"
    
    # Forensic analysis
    if text_prediction_label == "MACHINE":
        text_prediction_label = "The text is modified by GPT-4o (AI)"
    else:
        text_prediction_label = "The text is written by HUMAN"
    
    image_detection_results = "MACHINE"
    if image_detection_results == "MACHINE":
        image_detection_results = "The image is generated by Dall-e (AI)"
    else:
        image_detection_results = "The image is generated by HUMAN"
    image_confidence_score = 90.5
    
    news_detection_results = "MACHINE"
    if news_detection_results == "MACHINE":
        news_detection_results = "The whole news generated by AI"
    else:
        news_detection_results = "The whole news written by HUMAN"
    news_confidence_score = 97.4
    
    # Misinformation analysis
    out_of_context_results = "cohesive"
    if out_of_context_results == "cohesive":
        out_of_context_results = "The input news is cohesive (non-out-of-context)"
    else:
        out_of_context_results = "The input news is out-of-context"
    out_of_context_confidence_score = 96.7
    
    # Description
    description = "The description should be concise, clear, and aimed at helping general readers understand the case." 
    
    html_template = f"""
    <h2>Placeholder for results</h2>
    
    <div>
        <h3>Originality:</h3>
        <ul>
            <li><a href="{url1}" target="_blank">{url1[:40] + "..."}</a></li>
        </ul>
    </div>

    <div>
        <h3>Forensic:</h3>
        <b>{news_detection_results} (confidence = {news_confidence_score}%)</b>
        <ul>
            <li>{text_prediction_label} (confidence = {text_confidence_score}%)</li>
            <li>{image_detection_results} (confidence = {image_confidence_score}%)</li>
        </ul>
    </div>

    <div>
        <h3>Misinformation:</h3>
        <ul>
            <li>The input news is {out_of_context_results} (confidence = {out_of_context_confidence_score}%)</li>
        </ul>
    </div>

    <div>
        <h3>Description (optional):</h3>
        <ul>
            <li>{description}</li>
        </ul>
    </div>
    """

    return html_template