Spaces:
Sleeping
Sleeping
File size: 10,100 Bytes
e268dcd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 |
import gradio as gr
import google.generativeai as genai
from datetime import datetime
from dataclasses import dataclass
from typing import List, Dict, Optional, Tuple
import requests
import json
import os
from dotenv import load_dotenv
@dataclass
class Source:
"""Represents a source used for fact-checking."""
url: str
title: str
content: str
reputation_score: float
@dataclass
class FactCheckResult:
"""Represents the result of a fact check."""
claim: str
verdict: str
confidence_score: float
analysis_date: str
sources: List[Source]
evidence: List[Dict]
contradictions: List[Dict]
explanation: str
class GeminiFactChecker:
def __init__(self):
if not os.getenv("GOOGLE_API_KEY"):
raise ValueError("GOOGLE_API_KEY environment variable is required")
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
generation_config = genai.types.GenerationConfig(
temperature=0.1,
top_p=0.8,
top_k=40,
)
self.model = genai.GenerativeModel(
model_name='gemini-1.5-pro',
generation_config=generation_config
)
self.search_api_key = os.getenv("SEARCH_API_KEY")
self.search_engine_id = os.getenv("SEARCH_ENGINE_ID")
self.jinai_api_key = os.getenv("JINA_AI_API_KEY")
self.jinai_reader_url = "https://r.jina.ai/"
def _search_sources(self, claim: str, num_sources: int = 3) -> List[str]:
try:
search_url = "https://www.googleapis.com/customsearch/v1"
params = {
'key': self.search_api_key,
'cx': self.search_engine_id,
'q': claim,
'num': num_sources
}
response = requests.get(search_url, params=params)
response.raise_for_status()
search_results = response.json()
return [item['link'] for item in search_results.get('items', [])]
except Exception as e:
print(f"Error searching sources: {str(e)}")
return []
def _fetch_webpage_content(self, url: str) -> Optional[dict]:
try:
headers = {
'Accept': 'application/json',
'Authorization': f'Bearer {self.jinai_api_key}'
}
response = requests.get(f"{self.jinai_reader_url}/{url}",
headers=headers,
timeout=10)
response.raise_for_status()
data = response.json()
if not data.get('data'):
return None
return {
"content": data['data'].get('content', '')[:5000],
"title": data['data'].get('title', ''),
"data": data['data']
}
except Exception as e:
print(f"Error fetching {url}: {str(e)}")
return None
def _analyze_evidence(self, claim: str, sources: List[Source]) -> List[Dict]:
all_evidence = []
for source in sources:
prompt = f"""
Analyze this content and return evidence as JSON array:
CLAIM: "{claim}"
SOURCE TITLE: {source.title}
CONTENT: {source.content[:2000]}
Return array of evidence objects with properties:
- text: exact quote or clear paraphrase
- type: "supporting" or "contradicting"
- relevance: number 0.0 to 1.0
- source: source title
"""
try:
response = self.model.generate_content(prompt)
if response.text:
clean_text = response.text.strip()
if clean_text.startswith('```json'):
clean_text = clean_text[7:-3]
elif clean_text.startswith('[') and clean_text.endswith(']'):
clean_text = clean_text
evidence_list = json.loads(clean_text)
for evidence in evidence_list:
evidence["source_score"] = source.reputation_score
all_evidence.extend(evidence_list)
except Exception as e:
print(f"Error analyzing source {source.url}: {str(e)}")
continue
return all_evidence
def check_fact(self, claim: str, num_sources: int = 3) -> Optional[FactCheckResult]:
try:
urls = self._search_sources(claim, num_sources)
if not urls:
return None
sources = []
for url in urls:
content_dict = self._fetch_webpage_content(url)
if content_dict:
sources.append(Source(
url=url,
title=content_dict.get("title", url),
content=content_dict["content"],
reputation_score=0.8 # Default score
))
if not sources:
return None
evidence = self._analyze_evidence(claim, sources)
supporting = [e for e in evidence if e["type"] == "supporting"]
contradicting = [e for e in evidence if e["type"] == "contradicting"]
total_support = sum(
float(e.get("relevance", 0.5)) * float(e.get("source_score", 1))
for e in supporting
)
total_contradiction = sum(
float(e.get("relevance", 0.5)) * float(e.get("source_score", 1))
for e in contradicting
)
if not evidence:
verdict = "Insufficient evidence"
confidence = 0.0
explanation = "No evidence found from analyzed sources."
else:
support_ratio = total_support / (total_support + total_contradiction) if (total_support + total_contradiction) > 0 else 0
confidence = max(support_ratio, 1 - support_ratio)
if support_ratio > 0.6:
verdict = "Likely True" if confidence >= 0.7 else "Somewhat True"
elif support_ratio < 0.4:
verdict = "Likely False" if confidence >= 0.7 else "Somewhat False"
else:
verdict = "Inconclusive"
explanation = f"Based on {len(supporting)} supporting and {len(contradicting)} contradicting pieces of evidence."
return FactCheckResult(
claim=claim,
verdict=verdict,
confidence_score=confidence,
analysis_date=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
sources=sources,
evidence=supporting,
contradictions=contradicting,
explanation=explanation
)
except Exception as e:
print(f"Error during fact checking: {str(e)}")
return None
def format_fact_check_report(result: FactCheckResult) -> str:
report = f"""# Fact Check Report
## Claim
"{result.claim}"
## Verdict: {result.verdict}
Confidence Score: {result.confidence_score:.2f}
## Explanation
{result.explanation}
## Analysis Summary
- Number of sources analyzed: {len(result.sources)}
- Supporting evidence found: {len(result.evidence)}
- Contradicting points found: {len(result.contradictions)}
## Sources Analyzed
"""
for source in result.sources:
report += f"- [{source.title}]({source.url}) (Credibility: {source.reputation_score:.2f})\n"
if result.evidence:
report += "\n### Supporting Evidence:\n"
for e in result.evidence[:3]:
report += f"- {e['text']} (Source: {e['source']})\n"
if result.contradictions:
report += "\n### Contradicting Points:\n"
for c in result.contradictions[:3]:
report += f"- {c['text']} (Source: {c['source']})\n"
return report
def main():
load_dotenv()
fact_checker = GeminiFactChecker()
with gr.Blocks() as demo:
gr.Markdown("# AI-Powered Fact Checker")
gr.Markdown("Enter a claim to check its veracity against multiple sources.")
with gr.Row():
with gr.Column():
claim = gr.Textbox(
label="Claim to Check",
placeholder="Enter the claim you want to verify...",
lines=3
)
num_sources = gr.Slider(
label="Number of Sources to Check",
minimum=1,
maximum=5,
value=3,
step=1
)
check_button = gr.Button("Check Claim", variant="primary")
with gr.Column():
status = gr.Markdown("Ready to check claims...")
report = gr.Markdown()
def check_fact_wrapper(claim: str, num_sources: int):
status_value = "π Searching and analyzing sources..."
yield status_value, ""
try:
result = fact_checker.check_fact(claim, int(num_sources))
if result:
status_value = "β
Analysis complete!"
report_value = format_fact_check_report(result)
else:
status_value = "β Error occurred"
report_value = "Error occurred during fact checking."
except Exception as e:
status_value = "β Error occurred"
report_value = f"Error: {str(e)}"
yield status_value, report_value
check_button.click(
fn=check_fact_wrapper,
inputs=[claim, num_sources],
outputs=[status, report],
show_progress=True
)
demo.launch()
if __name__ == "__main__":
main() |