Factchecker / app.py
Perfect7613's picture
Done
e268dcd verified
import gradio as gr
import google.generativeai as genai
from datetime import datetime
from dataclasses import dataclass
from typing import List, Dict, Optional, Tuple
import requests
import json
import os
from dotenv import load_dotenv
@dataclass
class Source:
"""Represents a source used for fact-checking."""
url: str
title: str
content: str
reputation_score: float
@dataclass
class FactCheckResult:
"""Represents the result of a fact check."""
claim: str
verdict: str
confidence_score: float
analysis_date: str
sources: List[Source]
evidence: List[Dict]
contradictions: List[Dict]
explanation: str
class GeminiFactChecker:
def __init__(self):
if not os.getenv("GOOGLE_API_KEY"):
raise ValueError("GOOGLE_API_KEY environment variable is required")
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
generation_config = genai.types.GenerationConfig(
temperature=0.1,
top_p=0.8,
top_k=40,
)
self.model = genai.GenerativeModel(
model_name='gemini-1.5-pro',
generation_config=generation_config
)
self.search_api_key = os.getenv("SEARCH_API_KEY")
self.search_engine_id = os.getenv("SEARCH_ENGINE_ID")
self.jinai_api_key = os.getenv("JINA_AI_API_KEY")
self.jinai_reader_url = "https://r.jina.ai/"
def _search_sources(self, claim: str, num_sources: int = 3) -> List[str]:
try:
search_url = "https://www.googleapis.com/customsearch/v1"
params = {
'key': self.search_api_key,
'cx': self.search_engine_id,
'q': claim,
'num': num_sources
}
response = requests.get(search_url, params=params)
response.raise_for_status()
search_results = response.json()
return [item['link'] for item in search_results.get('items', [])]
except Exception as e:
print(f"Error searching sources: {str(e)}")
return []
def _fetch_webpage_content(self, url: str) -> Optional[dict]:
try:
headers = {
'Accept': 'application/json',
'Authorization': f'Bearer {self.jinai_api_key}'
}
response = requests.get(f"{self.jinai_reader_url}/{url}",
headers=headers,
timeout=10)
response.raise_for_status()
data = response.json()
if not data.get('data'):
return None
return {
"content": data['data'].get('content', '')[:5000],
"title": data['data'].get('title', ''),
"data": data['data']
}
except Exception as e:
print(f"Error fetching {url}: {str(e)}")
return None
def _analyze_evidence(self, claim: str, sources: List[Source]) -> List[Dict]:
all_evidence = []
for source in sources:
prompt = f"""
Analyze this content and return evidence as JSON array:
CLAIM: "{claim}"
SOURCE TITLE: {source.title}
CONTENT: {source.content[:2000]}
Return array of evidence objects with properties:
- text: exact quote or clear paraphrase
- type: "supporting" or "contradicting"
- relevance: number 0.0 to 1.0
- source: source title
"""
try:
response = self.model.generate_content(prompt)
if response.text:
clean_text = response.text.strip()
if clean_text.startswith('```json'):
clean_text = clean_text[7:-3]
elif clean_text.startswith('[') and clean_text.endswith(']'):
clean_text = clean_text
evidence_list = json.loads(clean_text)
for evidence in evidence_list:
evidence["source_score"] = source.reputation_score
all_evidence.extend(evidence_list)
except Exception as e:
print(f"Error analyzing source {source.url}: {str(e)}")
continue
return all_evidence
def check_fact(self, claim: str, num_sources: int = 3) -> Optional[FactCheckResult]:
try:
urls = self._search_sources(claim, num_sources)
if not urls:
return None
sources = []
for url in urls:
content_dict = self._fetch_webpage_content(url)
if content_dict:
sources.append(Source(
url=url,
title=content_dict.get("title", url),
content=content_dict["content"],
reputation_score=0.8 # Default score
))
if not sources:
return None
evidence = self._analyze_evidence(claim, sources)
supporting = [e for e in evidence if e["type"] == "supporting"]
contradicting = [e for e in evidence if e["type"] == "contradicting"]
total_support = sum(
float(e.get("relevance", 0.5)) * float(e.get("source_score", 1))
for e in supporting
)
total_contradiction = sum(
float(e.get("relevance", 0.5)) * float(e.get("source_score", 1))
for e in contradicting
)
if not evidence:
verdict = "Insufficient evidence"
confidence = 0.0
explanation = "No evidence found from analyzed sources."
else:
support_ratio = total_support / (total_support + total_contradiction) if (total_support + total_contradiction) > 0 else 0
confidence = max(support_ratio, 1 - support_ratio)
if support_ratio > 0.6:
verdict = "Likely True" if confidence >= 0.7 else "Somewhat True"
elif support_ratio < 0.4:
verdict = "Likely False" if confidence >= 0.7 else "Somewhat False"
else:
verdict = "Inconclusive"
explanation = f"Based on {len(supporting)} supporting and {len(contradicting)} contradicting pieces of evidence."
return FactCheckResult(
claim=claim,
verdict=verdict,
confidence_score=confidence,
analysis_date=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
sources=sources,
evidence=supporting,
contradictions=contradicting,
explanation=explanation
)
except Exception as e:
print(f"Error during fact checking: {str(e)}")
return None
def format_fact_check_report(result: FactCheckResult) -> str:
report = f"""# Fact Check Report
## Claim
"{result.claim}"
## Verdict: {result.verdict}
Confidence Score: {result.confidence_score:.2f}
## Explanation
{result.explanation}
## Analysis Summary
- Number of sources analyzed: {len(result.sources)}
- Supporting evidence found: {len(result.evidence)}
- Contradicting points found: {len(result.contradictions)}
## Sources Analyzed
"""
for source in result.sources:
report += f"- [{source.title}]({source.url}) (Credibility: {source.reputation_score:.2f})\n"
if result.evidence:
report += "\n### Supporting Evidence:\n"
for e in result.evidence[:3]:
report += f"- {e['text']} (Source: {e['source']})\n"
if result.contradictions:
report += "\n### Contradicting Points:\n"
for c in result.contradictions[:3]:
report += f"- {c['text']} (Source: {c['source']})\n"
return report
def main():
load_dotenv()
fact_checker = GeminiFactChecker()
with gr.Blocks() as demo:
gr.Markdown("# AI-Powered Fact Checker")
gr.Markdown("Enter a claim to check its veracity against multiple sources.")
with gr.Row():
with gr.Column():
claim = gr.Textbox(
label="Claim to Check",
placeholder="Enter the claim you want to verify...",
lines=3
)
num_sources = gr.Slider(
label="Number of Sources to Check",
minimum=1,
maximum=5,
value=3,
step=1
)
check_button = gr.Button("Check Claim", variant="primary")
with gr.Column():
status = gr.Markdown("Ready to check claims...")
report = gr.Markdown()
def check_fact_wrapper(claim: str, num_sources: int):
status_value = "πŸ” Searching and analyzing sources..."
yield status_value, ""
try:
result = fact_checker.check_fact(claim, int(num_sources))
if result:
status_value = "βœ… Analysis complete!"
report_value = format_fact_check_report(result)
else:
status_value = "❌ Error occurred"
report_value = "Error occurred during fact checking."
except Exception as e:
status_value = "❌ Error occurred"
report_value = f"Error: {str(e)}"
yield status_value, report_value
check_button.click(
fn=check_fact_wrapper,
inputs=[claim, num_sources],
outputs=[status, report],
show_progress=True
)
demo.launch()
if __name__ == "__main__":
main()