Spaces:

Perfect7613
/

Factchecker

Sleeping

App Files Files Community

Perfect7613 commited on 30 days ago

Commit

e268dcd

•

1 Parent(s): e635381

Done

Browse files

Files changed (1) hide show

app.py +288 -0

app.py ADDED Viewed

	@@ -0,0 +1,288 @@

+import gradio as gr
+import google.generativeai as genai
+from datetime import datetime
+from dataclasses import dataclass
+from typing import List, Dict, Optional, Tuple
+import requests
+import json
+import os
+from dotenv import load_dotenv
+@dataclass
+class Source:
+    """Represents a source used for fact-checking."""
+    url: str
+    title: str
+    content: str
+    reputation_score: float
+@dataclass
+class FactCheckResult:
+    """Represents the result of a fact check."""
+    claim: str
+    verdict: str
+    confidence_score: float
+    analysis_date: str
+    sources: List[Source]
+    evidence: List[Dict]
+    contradictions: List[Dict]
+    explanation: str
+class GeminiFactChecker:
+    def __init__(self):
+        if not os.getenv("GOOGLE_API_KEY"):
+            raise ValueError("GOOGLE_API_KEY environment variable is required")
+        genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
+        generation_config = genai.types.GenerationConfig(
+            temperature=0.1,
+            top_p=0.8,
+            top_k=40,
+        )
+        self.model = genai.GenerativeModel(
+            model_name='gemini-1.5-pro',
+            generation_config=generation_config
+        )
+        self.search_api_key = os.getenv("SEARCH_API_KEY")
+        self.search_engine_id = os.getenv("SEARCH_ENGINE_ID")
+        self.jinai_api_key = os.getenv("JINA_AI_API_KEY")
+        self.jinai_reader_url = "https://r.jina.ai/"
+    def _search_sources(self, claim: str, num_sources: int = 3) -> List[str]:
+        try:
+            search_url = "https://www.googleapis.com/customsearch/v1"
+            params = {
+                'key': self.search_api_key,
+                'cx': self.search_engine_id,
+                'q': claim,
+                'num': num_sources
+            }
+            response = requests.get(search_url, params=params)
+            response.raise_for_status()
+            search_results = response.json()
+            return [item['link'] for item in search_results.get('items', [])]
+        except Exception as e:
+            print(f"Error searching sources: {str(e)}")
+            return []
+    def _fetch_webpage_content(self, url: str) -> Optional[dict]:
+        try:
+            headers = {
+                'Accept': 'application/json',
+                'Authorization': f'Bearer {self.jinai_api_key}'
+            }
+            response = requests.get(f"{self.jinai_reader_url}/{url}",
+                                 headers=headers,
+                                 timeout=10)
+            response.raise_for_status()
+            data = response.json()
+            if not data.get('data'):
+                return None
+            return {
+                "content": data['data'].get('content', '')[:5000],
+                "title": data['data'].get('title', ''),
+                "data": data['data']
+            }
+        except Exception as e:
+            print(f"Error fetching {url}: {str(e)}")
+            return None
+    def _analyze_evidence(self, claim: str, sources: List[Source]) -> List[Dict]:
+        all_evidence = []
+        for source in sources:
+            prompt = f"""
+            Analyze this content and return evidence as JSON array:
+            CLAIM: "{claim}"
+            SOURCE TITLE: {source.title}
+            CONTENT: {source.content[:2000]}
+            Return array of evidence objects with properties:
+            - text: exact quote or clear paraphrase
+            - type: "supporting" or "contradicting"
+            - relevance: number 0.0 to 1.0
+            - source: source title
+            """
+            try:
+                response = self.model.generate_content(prompt)
+                if response.text:
+                    clean_text = response.text.strip()
+                    if clean_text.startswith('```json'):
+                        clean_text = clean_text[7:-3]
+                    elif clean_text.startswith('[') and clean_text.endswith(']'):
+                        clean_text = clean_text
+                    evidence_list = json.loads(clean_text)
+                    for evidence in evidence_list:
+                        evidence["source_score"] = source.reputation_score
+                    all_evidence.extend(evidence_list)
+            except Exception as e:
+                print(f"Error analyzing source {source.url}: {str(e)}")
+                continue
+        return all_evidence
+    def check_fact(self, claim: str, num_sources: int = 3) -> Optional[FactCheckResult]:
+        try:
+            urls = self._search_sources(claim, num_sources)
+            if not urls:
+                return None
+            sources = []
+            for url in urls:
+                content_dict = self._fetch_webpage_content(url)
+                if content_dict:
+                    sources.append(Source(
+                        url=url,
+                        title=content_dict.get("title", url),
+                        content=content_dict["content"],
+                        reputation_score=0.8  # Default score
+                    ))
+            if not sources:
+                return None
+            evidence = self._analyze_evidence(claim, sources)
+            supporting = [e for e in evidence if e["type"] == "supporting"]
+            contradicting = [e for e in evidence if e["type"] == "contradicting"]
+            total_support = sum(
+                float(e.get("relevance", 0.5)) * float(e.get("source_score", 1))
+                for e in supporting
+            )
+            total_contradiction = sum(
+                float(e.get("relevance", 0.5)) * float(e.get("source_score", 1))
+                for e in contradicting
+            )
+            if not evidence:
+                verdict = "Insufficient evidence"
+                confidence = 0.0
+                explanation = "No evidence found from analyzed sources."
+            else:
+                support_ratio = total_support / (total_support + total_contradiction) if (total_support + total_contradiction) > 0 else 0
+                confidence = max(support_ratio, 1 - support_ratio)
+                if support_ratio > 0.6:
+                    verdict = "Likely True" if confidence >= 0.7 else "Somewhat True"
+                elif support_ratio < 0.4:
+                    verdict = "Likely False" if confidence >= 0.7 else "Somewhat False"
+                else:
+                    verdict = "Inconclusive"
+                explanation = f"Based on {len(supporting)} supporting and {len(contradicting)} contradicting pieces of evidence."
+            return FactCheckResult(
+                claim=claim,
+                verdict=verdict,
+                confidence_score=confidence,
+                analysis_date=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                sources=sources,
+                evidence=supporting,
+                contradictions=contradicting,
+                explanation=explanation
+            )
+        except Exception as e:
+            print(f"Error during fact checking: {str(e)}")
+            return None
+def format_fact_check_report(result: FactCheckResult) -> str:
+    report = f"""# Fact Check Report
+## Claim
+"{result.claim}"
+## Verdict: {result.verdict}
+Confidence Score: {result.confidence_score:.2f}
+## Explanation
+{result.explanation}
+## Analysis Summary
+- Number of sources analyzed: {len(result.sources)}
+- Supporting evidence found: {len(result.evidence)}
+- Contradicting points found: {len(result.contradictions)}
+## Sources Analyzed
+"""
+    for source in result.sources:
+        report += f"- [{source.title}]({source.url}) (Credibility: {source.reputation_score:.2f})\n"
+    if result.evidence:
+        report += "\n### Supporting Evidence:\n"
+        for e in result.evidence[:3]:
+            report += f"- {e['text']} (Source: {e['source']})\n"
+    if result.contradictions:
+        report += "\n### Contradicting Points:\n"
+        for c in result.contradictions[:3]:
+            report += f"- {c['text']} (Source: {c['source']})\n"
+    return report
+def main():
+    load_dotenv()
+    fact_checker = GeminiFactChecker()
+    with gr.Blocks() as demo:
+        gr.Markdown("# AI-Powered Fact Checker")
+        gr.Markdown("Enter a claim to check its veracity against multiple sources.")
+        with gr.Row():
+            with gr.Column():
+                claim = gr.Textbox(
+                    label="Claim to Check",
+                    placeholder="Enter the claim you want to verify...",
+                    lines=3
+                )
+                num_sources = gr.Slider(
+                    label="Number of Sources to Check",
+                    minimum=1,
+                    maximum=5,
+                    value=3,
+                    step=1
+                )
+                check_button = gr.Button("Check Claim", variant="primary")
+            with gr.Column():
+                status = gr.Markdown("Ready to check claims...")
+                report = gr.Markdown()
+        def check_fact_wrapper(claim: str, num_sources: int):
+            status_value = "🔍 Searching and analyzing sources..."
+            yield status_value, ""
+            try:
+                result = fact_checker.check_fact(claim, int(num_sources))
+                if result:
+                    status_value = "✅ Analysis complete!"
+                    report_value = format_fact_check_report(result)
+                else:
+                    status_value = "❌ Error occurred"
+                    report_value = "Error occurred during fact checking."
+            except Exception as e:
+                status_value = "❌ Error occurred"
+                report_value = f"Error: {str(e)}"
+            yield status_value, report_value
+        check_button.click(
+            fn=check_fact_wrapper,
+            inputs=[claim, num_sources],
+            outputs=[status, report],
+            show_progress=True
+        )
+    demo.launch()
+if __name__ == "__main__":
+    main()