Perfect7613 commited on
Commit
e268dcd
β€’
1 Parent(s): e635381
Files changed (1) hide show
  1. app.py +288 -0
app.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import google.generativeai as genai
3
+ from datetime import datetime
4
+ from dataclasses import dataclass
5
+ from typing import List, Dict, Optional, Tuple
6
+ import requests
7
+ import json
8
+ import os
9
+ from dotenv import load_dotenv
10
+
11
+ @dataclass
12
+ class Source:
13
+ """Represents a source used for fact-checking."""
14
+ url: str
15
+ title: str
16
+ content: str
17
+ reputation_score: float
18
+
19
+ @dataclass
20
+ class FactCheckResult:
21
+ """Represents the result of a fact check."""
22
+ claim: str
23
+ verdict: str
24
+ confidence_score: float
25
+ analysis_date: str
26
+ sources: List[Source]
27
+ evidence: List[Dict]
28
+ contradictions: List[Dict]
29
+ explanation: str
30
+
31
+ class GeminiFactChecker:
32
+ def __init__(self):
33
+ if not os.getenv("GOOGLE_API_KEY"):
34
+ raise ValueError("GOOGLE_API_KEY environment variable is required")
35
+
36
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
37
+ generation_config = genai.types.GenerationConfig(
38
+ temperature=0.1,
39
+ top_p=0.8,
40
+ top_k=40,
41
+ )
42
+
43
+ self.model = genai.GenerativeModel(
44
+ model_name='gemini-1.5-pro',
45
+ generation_config=generation_config
46
+ )
47
+ self.search_api_key = os.getenv("SEARCH_API_KEY")
48
+ self.search_engine_id = os.getenv("SEARCH_ENGINE_ID")
49
+ self.jinai_api_key = os.getenv("JINA_AI_API_KEY")
50
+ self.jinai_reader_url = "https://r.jina.ai/"
51
+
52
+ def _search_sources(self, claim: str, num_sources: int = 3) -> List[str]:
53
+ try:
54
+ search_url = "https://www.googleapis.com/customsearch/v1"
55
+ params = {
56
+ 'key': self.search_api_key,
57
+ 'cx': self.search_engine_id,
58
+ 'q': claim,
59
+ 'num': num_sources
60
+ }
61
+ response = requests.get(search_url, params=params)
62
+ response.raise_for_status()
63
+ search_results = response.json()
64
+ return [item['link'] for item in search_results.get('items', [])]
65
+ except Exception as e:
66
+ print(f"Error searching sources: {str(e)}")
67
+ return []
68
+
69
+ def _fetch_webpage_content(self, url: str) -> Optional[dict]:
70
+ try:
71
+ headers = {
72
+ 'Accept': 'application/json',
73
+ 'Authorization': f'Bearer {self.jinai_api_key}'
74
+ }
75
+ response = requests.get(f"{self.jinai_reader_url}/{url}",
76
+ headers=headers,
77
+ timeout=10)
78
+ response.raise_for_status()
79
+
80
+ data = response.json()
81
+ if not data.get('data'):
82
+ return None
83
+
84
+ return {
85
+ "content": data['data'].get('content', '')[:5000],
86
+ "title": data['data'].get('title', ''),
87
+ "data": data['data']
88
+ }
89
+ except Exception as e:
90
+ print(f"Error fetching {url}: {str(e)}")
91
+ return None
92
+
93
+ def _analyze_evidence(self, claim: str, sources: List[Source]) -> List[Dict]:
94
+ all_evidence = []
95
+
96
+ for source in sources:
97
+ prompt = f"""
98
+ Analyze this content and return evidence as JSON array:
99
+
100
+ CLAIM: "{claim}"
101
+ SOURCE TITLE: {source.title}
102
+ CONTENT: {source.content[:2000]}
103
+
104
+ Return array of evidence objects with properties:
105
+ - text: exact quote or clear paraphrase
106
+ - type: "supporting" or "contradicting"
107
+ - relevance: number 0.0 to 1.0
108
+ - source: source title
109
+ """
110
+
111
+ try:
112
+ response = self.model.generate_content(prompt)
113
+ if response.text:
114
+ clean_text = response.text.strip()
115
+ if clean_text.startswith('```json'):
116
+ clean_text = clean_text[7:-3]
117
+ elif clean_text.startswith('[') and clean_text.endswith(']'):
118
+ clean_text = clean_text
119
+
120
+ evidence_list = json.loads(clean_text)
121
+ for evidence in evidence_list:
122
+ evidence["source_score"] = source.reputation_score
123
+ all_evidence.extend(evidence_list)
124
+
125
+ except Exception as e:
126
+ print(f"Error analyzing source {source.url}: {str(e)}")
127
+ continue
128
+
129
+ return all_evidence
130
+
131
+ def check_fact(self, claim: str, num_sources: int = 3) -> Optional[FactCheckResult]:
132
+ try:
133
+ urls = self._search_sources(claim, num_sources)
134
+ if not urls:
135
+ return None
136
+
137
+ sources = []
138
+ for url in urls:
139
+ content_dict = self._fetch_webpage_content(url)
140
+ if content_dict:
141
+ sources.append(Source(
142
+ url=url,
143
+ title=content_dict.get("title", url),
144
+ content=content_dict["content"],
145
+ reputation_score=0.8 # Default score
146
+ ))
147
+
148
+ if not sources:
149
+ return None
150
+
151
+ evidence = self._analyze_evidence(claim, sources)
152
+
153
+ supporting = [e for e in evidence if e["type"] == "supporting"]
154
+ contradicting = [e for e in evidence if e["type"] == "contradicting"]
155
+
156
+ total_support = sum(
157
+ float(e.get("relevance", 0.5)) * float(e.get("source_score", 1))
158
+ for e in supporting
159
+ )
160
+
161
+ total_contradiction = sum(
162
+ float(e.get("relevance", 0.5)) * float(e.get("source_score", 1))
163
+ for e in contradicting
164
+ )
165
+
166
+ if not evidence:
167
+ verdict = "Insufficient evidence"
168
+ confidence = 0.0
169
+ explanation = "No evidence found from analyzed sources."
170
+ else:
171
+ support_ratio = total_support / (total_support + total_contradiction) if (total_support + total_contradiction) > 0 else 0
172
+ confidence = max(support_ratio, 1 - support_ratio)
173
+
174
+ if support_ratio > 0.6:
175
+ verdict = "Likely True" if confidence >= 0.7 else "Somewhat True"
176
+ elif support_ratio < 0.4:
177
+ verdict = "Likely False" if confidence >= 0.7 else "Somewhat False"
178
+ else:
179
+ verdict = "Inconclusive"
180
+
181
+ explanation = f"Based on {len(supporting)} supporting and {len(contradicting)} contradicting pieces of evidence."
182
+
183
+ return FactCheckResult(
184
+ claim=claim,
185
+ verdict=verdict,
186
+ confidence_score=confidence,
187
+ analysis_date=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
188
+ sources=sources,
189
+ evidence=supporting,
190
+ contradictions=contradicting,
191
+ explanation=explanation
192
+ )
193
+
194
+ except Exception as e:
195
+ print(f"Error during fact checking: {str(e)}")
196
+ return None
197
+
198
+ def format_fact_check_report(result: FactCheckResult) -> str:
199
+ report = f"""# Fact Check Report
200
+
201
+ ## Claim
202
+ "{result.claim}"
203
+
204
+ ## Verdict: {result.verdict}
205
+ Confidence Score: {result.confidence_score:.2f}
206
+
207
+ ## Explanation
208
+ {result.explanation}
209
+
210
+ ## Analysis Summary
211
+ - Number of sources analyzed: {len(result.sources)}
212
+ - Supporting evidence found: {len(result.evidence)}
213
+ - Contradicting points found: {len(result.contradictions)}
214
+
215
+ ## Sources Analyzed
216
+ """
217
+ for source in result.sources:
218
+ report += f"- [{source.title}]({source.url}) (Credibility: {source.reputation_score:.2f})\n"
219
+
220
+ if result.evidence:
221
+ report += "\n### Supporting Evidence:\n"
222
+ for e in result.evidence[:3]:
223
+ report += f"- {e['text']} (Source: {e['source']})\n"
224
+
225
+ if result.contradictions:
226
+ report += "\n### Contradicting Points:\n"
227
+ for c in result.contradictions[:3]:
228
+ report += f"- {c['text']} (Source: {c['source']})\n"
229
+
230
+ return report
231
+
232
+ def main():
233
+ load_dotenv()
234
+ fact_checker = GeminiFactChecker()
235
+
236
+ with gr.Blocks() as demo:
237
+ gr.Markdown("# AI-Powered Fact Checker")
238
+ gr.Markdown("Enter a claim to check its veracity against multiple sources.")
239
+
240
+ with gr.Row():
241
+ with gr.Column():
242
+ claim = gr.Textbox(
243
+ label="Claim to Check",
244
+ placeholder="Enter the claim you want to verify...",
245
+ lines=3
246
+ )
247
+ num_sources = gr.Slider(
248
+ label="Number of Sources to Check",
249
+ minimum=1,
250
+ maximum=5,
251
+ value=3,
252
+ step=1
253
+ )
254
+ check_button = gr.Button("Check Claim", variant="primary")
255
+
256
+ with gr.Column():
257
+ status = gr.Markdown("Ready to check claims...")
258
+ report = gr.Markdown()
259
+
260
+ def check_fact_wrapper(claim: str, num_sources: int):
261
+ status_value = "πŸ” Searching and analyzing sources..."
262
+ yield status_value, ""
263
+
264
+ try:
265
+ result = fact_checker.check_fact(claim, int(num_sources))
266
+ if result:
267
+ status_value = "βœ… Analysis complete!"
268
+ report_value = format_fact_check_report(result)
269
+ else:
270
+ status_value = "❌ Error occurred"
271
+ report_value = "Error occurred during fact checking."
272
+ except Exception as e:
273
+ status_value = "❌ Error occurred"
274
+ report_value = f"Error: {str(e)}"
275
+
276
+ yield status_value, report_value
277
+
278
+ check_button.click(
279
+ fn=check_fact_wrapper,
280
+ inputs=[claim, num_sources],
281
+ outputs=[status, report],
282
+ show_progress=True
283
+ )
284
+
285
+ demo.launch()
286
+
287
+ if __name__ == "__main__":
288
+ main()