Spaces:

Perfect7613
/

Factchecker

Sleeping

File size: 10,100 Bytes

e268dcd

import gradio as gr
import google.generativeai as genai
from datetime import datetime
from dataclasses import dataclass
from typing import List, Dict, Optional, Tuple
import requests
import json
import os
from dotenv import load_dotenv

@dataclass
class Source:
    """Represents a source used for fact-checking."""
    url: str
    title: str
    content: str
    reputation_score: float

@dataclass
class FactCheckResult:
    """Represents the result of a fact check."""
    claim: str
    verdict: str
    confidence_score: float
    analysis_date: str
    sources: List[Source]
    evidence: List[Dict]
    contradictions: List[Dict]
    explanation: str

class GeminiFactChecker:
    def __init__(self):
        if not os.getenv("GOOGLE_API_KEY"):
            raise ValueError("GOOGLE_API_KEY environment variable is required")
        
        genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
        generation_config = genai.types.GenerationConfig(
            temperature=0.1,
            top_p=0.8,
            top_k=40,
        )
        
        self.model = genai.GenerativeModel(
            model_name='gemini-1.5-pro',
            generation_config=generation_config
        )
        self.search_api_key = os.getenv("SEARCH_API_KEY")
        self.search_engine_id = os.getenv("SEARCH_ENGINE_ID")
        self.jinai_api_key = os.getenv("JINA_AI_API_KEY")
        self.jinai_reader_url = "https://r.jina.ai/"

    def _search_sources(self, claim: str, num_sources: int = 3) -> List[str]:
        try:
            search_url = "https://www.googleapis.com/customsearch/v1"
            params = {
                'key': self.search_api_key,
                'cx': self.search_engine_id,
                'q': claim,
                'num': num_sources
            }
            response = requests.get(search_url, params=params)
            response.raise_for_status()
            search_results = response.json()
            return [item['link'] for item in search_results.get('items', [])]
        except Exception as e:
            print(f"Error searching sources: {str(e)}")
            return []

    def _fetch_webpage_content(self, url: str) -> Optional[dict]:
        try:
            headers = {
                'Accept': 'application/json',
                'Authorization': f'Bearer {self.jinai_api_key}'
            }
            response = requests.get(f"{self.jinai_reader_url}/{url}", 
                                 headers=headers, 
                                 timeout=10)
            response.raise_for_status()
            
            data = response.json()
            if not data.get('data'):
                return None
                
            return {
                "content": data['data'].get('content', '')[:5000],
                "title": data['data'].get('title', ''),
                "data": data['data']
            }
        except Exception as e:
            print(f"Error fetching {url}: {str(e)}")
            return None

    def _analyze_evidence(self, claim: str, sources: List[Source]) -> List[Dict]:
        all_evidence = []

        for source in sources:
            prompt = f"""
            Analyze this content and return evidence as JSON array:
            
            CLAIM: "{claim}"
            SOURCE TITLE: {source.title}
            CONTENT: {source.content[:2000]}
            
            Return array of evidence objects with properties:
            - text: exact quote or clear paraphrase
            - type: "supporting" or "contradicting"
            - relevance: number 0.0 to 1.0
            - source: source title
            """

            try:
                response = self.model.generate_content(prompt)
                if response.text:
                    clean_text = response.text.strip()
                    if clean_text.startswith('```json'):
                        clean_text = clean_text[7:-3]
                    elif clean_text.startswith('[') and clean_text.endswith(']'):
                        clean_text = clean_text

                    evidence_list = json.loads(clean_text)
                    for evidence in evidence_list:
                        evidence["source_score"] = source.reputation_score
                    all_evidence.extend(evidence_list)

            except Exception as e:
                print(f"Error analyzing source {source.url}: {str(e)}")
                continue

        return all_evidence

    def check_fact(self, claim: str, num_sources: int = 3) -> Optional[FactCheckResult]:
        try:
            urls = self._search_sources(claim, num_sources)
            if not urls:
                return None
                
            sources = []
            for url in urls:
                content_dict = self._fetch_webpage_content(url)
                if content_dict:
                    sources.append(Source(
                        url=url,
                        title=content_dict.get("title", url),
                        content=content_dict["content"],
                        reputation_score=0.8  # Default score
                    ))

            if not sources:
                return None
                
            evidence = self._analyze_evidence(claim, sources)
            
            supporting = [e for e in evidence if e["type"] == "supporting"]
            contradicting = [e for e in evidence if e["type"] == "contradicting"]
            
            total_support = sum(
                float(e.get("relevance", 0.5)) * float(e.get("source_score", 1))
                for e in supporting
            )
            
            total_contradiction = sum(
                float(e.get("relevance", 0.5)) * float(e.get("source_score", 1))
                for e in contradicting
            )
            
            if not evidence:
                verdict = "Insufficient evidence"
                confidence = 0.0
                explanation = "No evidence found from analyzed sources."
            else:
                support_ratio = total_support / (total_support + total_contradiction) if (total_support + total_contradiction) > 0 else 0
                confidence = max(support_ratio, 1 - support_ratio)
                
                if support_ratio > 0.6:
                    verdict = "Likely True" if confidence >= 0.7 else "Somewhat True"
                elif support_ratio < 0.4:
                    verdict = "Likely False" if confidence >= 0.7 else "Somewhat False"
                else:
                    verdict = "Inconclusive"
                    
                explanation = f"Based on {len(supporting)} supporting and {len(contradicting)} contradicting pieces of evidence."

            return FactCheckResult(
                claim=claim,
                verdict=verdict,
                confidence_score=confidence,
                analysis_date=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                sources=sources,
                evidence=supporting,
                contradictions=contradicting,
                explanation=explanation
            )

        except Exception as e:
            print(f"Error during fact checking: {str(e)}")
            return None

def format_fact_check_report(result: FactCheckResult) -> str:
    report = f"""# Fact Check Report

## Claim
"{result.claim}"

## Verdict: {result.verdict}
Confidence Score: {result.confidence_score:.2f}

## Explanation
{result.explanation}

## Analysis Summary
- Number of sources analyzed: {len(result.sources)}
- Supporting evidence found: {len(result.evidence)}
- Contradicting points found: {len(result.contradictions)}

## Sources Analyzed
"""
    for source in result.sources:
        report += f"- [{source.title}]({source.url}) (Credibility: {source.reputation_score:.2f})\n"

    if result.evidence:
        report += "\n### Supporting Evidence:\n"
        for e in result.evidence[:3]:
            report += f"- {e['text']} (Source: {e['source']})\n"

    if result.contradictions:
        report += "\n### Contradicting Points:\n"
        for c in result.contradictions[:3]:
            report += f"- {c['text']} (Source: {c['source']})\n"

    return report

def main():
    load_dotenv()
    fact_checker = GeminiFactChecker()

    with gr.Blocks() as demo:
        gr.Markdown("# AI-Powered Fact Checker")
        gr.Markdown("Enter a claim to check its veracity against multiple sources.")

        with gr.Row():
            with gr.Column():
                claim = gr.Textbox(
                    label="Claim to Check",
                    placeholder="Enter the claim you want to verify...",
                    lines=3
                )
                num_sources = gr.Slider(
                    label="Number of Sources to Check",
                    minimum=1,
                    maximum=5,
                    value=3,
                    step=1
                )
                check_button = gr.Button("Check Claim", variant="primary")

            with gr.Column():
                status = gr.Markdown("Ready to check claims...")
                report = gr.Markdown()

        def check_fact_wrapper(claim: str, num_sources: int):
            status_value = "🔍 Searching and analyzing sources..."
            yield status_value, ""
            
            try:
                result = fact_checker.check_fact(claim, int(num_sources))
                if result:
                    status_value = "✅ Analysis complete!"
                    report_value = format_fact_check_report(result)
                else:
                    status_value = "❌ Error occurred"
                    report_value = "Error occurred during fact checking."
            except Exception as e:
                status_value = "❌ Error occurred"
                report_value = f"Error: {str(e)}"
            
            yield status_value, report_value

        check_button.click(
            fn=check_fact_wrapper,
            inputs=[claim, num_sources],
            outputs=[status, report],
            show_progress=True
        )

    demo.launch()

if __name__ == "__main__":
    main()