tsrivallabh commited on
Commit
b2ed798
·
verified ·
1 Parent(s): 24bbbf2

Synced repo using 'sync_with_huggingface' Github Action

Browse files
__pycache__/fact_checker.cpython-310.pyc CHANGED
Binary files a/__pycache__/fact_checker.cpython-310.pyc and b/__pycache__/fact_checker.cpython-310.pyc differ
 
app.py CHANGED
@@ -154,34 +154,81 @@ button[kind="primary"] {
154
  st.session_state.last_claim = claim
155
  st.session_state.feedback_submitted = False # Reset feedback state for new claim
156
 
157
- # Display results from session state
158
- if st.session_state.result:
159
- result = st.session_state.result
160
- if "error" in result:
161
- st.error(f"Error: {result['error']}")
162
- if "raw_response" in result:
163
- with st.expander("Show raw LLM response"):
164
- st.code(result["raw_response"])
165
- else:
166
- # Display verdict
167
- verdict_color = {
168
- "True": "green",
169
- "False": "red",
170
- "Unverifiable": "orange"
171
- }.get(result["verdict"], "gray")
172
- st.markdown(f"**Verdict:** :{verdict_color}[{result['verdict']}]")
173
-
174
- # Display confidence score
175
- st.metric("Confidence Score", f"{result.get('confidence', 0):.2f}")
176
-
177
- # Display evidence
178
- with st.expander("View Supporting Evidence"):
179
- for idx, evidence in enumerate(result.get("evidence", []), 1):
180
- st.markdown(f"{idx}. {evidence}")
181
-
182
- # Display reasoning
183
- st.markdown("**Analysis:**")
184
- st.write(result.get("reasoning", "No reasoning provided"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
  # Feedback system
187
  feedback_key = f"feedback_radio_{st.session_state.last_claim}"
 
154
  st.session_state.last_claim = claim
155
  st.session_state.feedback_submitted = False # Reset feedback state for new claim
156
 
157
+ # Display results from session state
158
+ if st.session_state.result:
159
+ result = st.session_state.result
160
+
161
+ # Show entity verification results
162
+ st.subheader("Entity Verification Results")
163
+ entities = result.get("entities", [])
164
+ if entities:
165
+ for idx, entity_result in enumerate(entities, 1):
166
+ st.markdown(f"### Entity {idx}: {entity_result.get('entity', '')} ({entity_result.get('type', '')})")
167
+
168
+ if "error" in entity_result:
169
+ st.error(f"Error: {entity_result['error']}")
170
+ if "raw_response" in entity_result:
171
+ with st.expander("Show raw LLM response"):
172
+ st.code(entity_result["raw_response"])
173
+ continue
174
+
175
+ verdict_color = {
176
+ "Valid": "green",
177
+ "Invalid": "red",
178
+ "Unverified": "orange"
179
+ }.get(entity_result.get("verdict", ""), "gray")
180
+ st.markdown(f"**Verdict:** :{verdict_color}[{entity_result.get('verdict', 'Unknown')}]")
181
+
182
+ # Confidence
183
+ st.metric("Confidence Score", f"{entity_result.get('confidence', 0):.2f}")
184
+
185
+ # Evidence
186
+ with st.expander("View Supporting Evidence"):
187
+ for i, evidence in enumerate(entity_result.get("evidence", []), 1):
188
+ st.markdown(f"{i}. {evidence}")
189
+
190
+ # Reasoning
191
+ st.markdown("**Analysis:**")
192
+ st.write(entity_result.get("reasoning", "No reasoning provided"))
193
+ else:
194
+ st.write("No entities detected or verified.")
195
+
196
+ # Show claim verification results
197
+ st.subheader("Detected Claims and Verification Results")
198
+ claims = result.get("claims", [])
199
+ if not claims:
200
+ st.info("No check-worthy claims detected in the input.")
201
+ else:
202
+ for idx, claim_result in enumerate(claims, 1):
203
+ st.markdown(f"### Claim {idx}")
204
+ st.markdown(f"> {claim_result.get('claim', '')}")
205
+
206
+ if "error" in claim_result:
207
+ st.error(f"Error: {claim_result['error']}")
208
+ if "raw_response" in claim_result:
209
+ with st.expander("Show raw LLM response"):
210
+ st.code(claim_result["raw_response"])
211
+ continue
212
+
213
+ verdict_color = {
214
+ "True": "green",
215
+ "False": "red",
216
+ "Unverifiable": "orange"
217
+ }.get(claim_result.get("verdict", ""), "gray")
218
+ st.markdown(f"**Verdict:** :{verdict_color}[{claim_result.get('verdict', 'Unknown')}]")
219
+
220
+ # Confidence
221
+ st.metric("Confidence Score", f"{claim_result.get('confidence', 0):.2f}")
222
+
223
+ # Evidence
224
+ with st.expander("View Supporting Evidence"):
225
+ for i, evidence in enumerate(claim_result.get("evidence", []), 1):
226
+ st.markdown(f"{i}. {evidence}")
227
+
228
+ # Reasoning
229
+ st.markdown("**Analysis:**")
230
+ st.write(claim_result.get("reasoning", "No reasoning provided"))
231
+
232
 
233
  # Feedback system
234
  feedback_key = f"feedback_radio_{st.session_state.last_claim}"
assets/overall.png CHANGED

Git LFS Details

  • SHA256: 5594318bbead2ff46cbceda90076be809a7f5853752f6f2c3deb71b130872edd
  • Pointer size: 132 Bytes
  • Size of remote file: 2.05 MB

Git LFS Details

  • SHA256: a852edfba763002dd21c5c51a63833f3dfff526b66b4f3a23f0c0d8986f3315d
  • Pointer size: 132 Bytes
  • Size of remote file: 1.18 MB
fact_checker.py CHANGED
@@ -6,6 +6,9 @@ import re
6
  from openai import OpenAI
7
  import re
8
  import json
 
 
 
9
 
10
  def robust_json_extractor(response_content):
11
  # Preprocess: Remove markdown code blocks and extra whitespace
@@ -56,49 +59,55 @@ class FactChecker:
56
  )
57
  self.groq_client = groq_client
58
  self.model_name = "llama3-8b-8192"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
- def verify_claim(self, claim, confidence_threshold=0.5):
61
- # Vector search returns full verified statements with distances
62
  results = self.collection.query(
63
  query_texts=[claim],
64
  n_results=3,
65
  include=["documents", "metadatas", "distances"]
66
  )
67
-
68
- # Pair documents with their distances and sort by similarity (ascending distance)
69
  zipped_results = sorted(
70
  zip(results['documents'][0], results['metadatas'][0], results['distances'][0]),
71
- key=lambda x: x[2] # Sort by distance (ascending = most similar first)
72
  )
73
-
74
- # Format evidence with similarity scores (full sentences, not fragments)
75
  evidence = []
76
  for doc, meta, distance in zipped_results:
77
  source = meta["source"] if meta and "source" in meta else "Unknown source"
78
- # Convert distance to similarity score (higher = more similar)
79
  similarity_score = 1 - (distance / 2) # Assuming cosine distance in [0,2]
80
  evidence.append(
81
  f'"{doc}" (Source: {source}, Similarity: {similarity_score:.2f})'
82
  )
83
-
84
-
85
- # Calculate overall confidence
86
  avg_distance = sum(d for _, _, d in zipped_results) / len(zipped_results)
87
  confidence = 1 - (avg_distance / 2) # Normalize to 0-1 range
88
 
89
- # Threshold check
90
  if confidence < confidence_threshold:
91
  return {
92
  "verdict": "Unverifiable",
93
  "confidence": confidence,
94
- "evidence": [e.split(" (Source:")[0] for e in evidence], # Cleaned evidence
95
  "reasoning": "Claim is too vague or lacks sufficient evidence"
96
  }
97
 
98
- # LLM verification with distance-aware prompt
99
  evidence_str = "\n".join([f"- {e}" for e in evidence])
100
- prompt = f""" You are a powerful fact checker. Analyze the claim below against the provided verified information.
101
- Relying on the similarity scores, also carefully check whether all factual details in the claim (such as dates, names, locations, and events) exactly match atleast one of the evidence. If from first evidence, evidence is not sufficient, use the next evidence to verify the claim.
102
  If there is any factual mismatch (for example, the date in the claim is different from the evidence), classify the claim as False. Any factual mismatch, even if the overall context is similar, should lead to a False classification.
103
  If the evidence is too vague or lacks strong matches, classify as Unverifiable.
104
  If evidence directly contradicts the claim, classify as False.
@@ -113,7 +122,6 @@ Evidence (with similarity scores):
113
 
114
  Guidelines:
115
  1. Give more weight to evidence with higher similarity scores, but do not ignore factual mismatches.
116
- 2. If any one piece of evidence independently supports the claim, without factual mismatches, classify as True.
117
  2. Pay close attention to details such as dates, names, locations, and events.
118
  3. If the claim and evidence differ on any factual point, do not classify as True.
119
  4. Respond only in JSON format without any additional text.
@@ -127,23 +135,14 @@ Respond in JSON format:
127
  "reasoning": "Explanation of the verdict based on evidence and factual details"
128
  }}
129
  """
130
-
131
-
132
  completion = self.groq_client.chat.completions.create(
133
  model=self.model_name,
134
  messages=[{"role": "user", "content": prompt}],
135
  temperature=0.1,
136
  max_tokens=400
137
  )
138
-
139
- # Process response
140
  response_content = completion.choices[0].message.content
141
- print(f"Response from Groq: {response_content}")
142
-
143
- # Use the robust JSON extractor
144
  parsed = robust_json_extractor(response_content)
145
- print(f"Parsed JSON: {parsed}")
146
-
147
  if "error" in parsed:
148
  return {
149
  "error": parsed["error"],
@@ -151,7 +150,6 @@ Respond in JSON format:
151
  "raw_response": parsed.get("raw", response_content)
152
  }
153
  else:
154
- # Validate required fields
155
  required_keys = ["verdict", "evidence", "reasoning"]
156
  if all(key in parsed for key in required_keys):
157
  return {
@@ -165,4 +163,113 @@ Respond in JSON format:
165
  "error": f"Missing required keys: {[k for k in required_keys if k not in parsed]}",
166
  "confidence": confidence,
167
  "raw_response": response_content
168
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from openai import OpenAI
7
  import re
8
  import json
9
+ import spacy
10
+ from transformers import T5ForConditionalGeneration, T5Tokenizer
11
+ from transformers import pipeline
12
 
13
  def robust_json_extractor(response_content):
14
  # Preprocess: Remove markdown code blocks and extra whitespace
 
59
  )
60
  self.groq_client = groq_client
61
  self.model_name = "llama3-8b-8192"
62
+ self.ner = spacy.load("en_core_web_sm")
63
+
64
+
65
+ self.claim_tokenizer = T5Tokenizer.from_pretrained("Babelscape/t5-base-summarization-claim-extractor")
66
+ self.claim_model = T5ForConditionalGeneration.from_pretrained("Babelscape/t5-base-summarization-claim-extractor")
67
+
68
+ def extract_entities(self, text):
69
+ doc = self.ner(text)
70
+ return [(ent.text, ent.label_) for ent in doc.ents]
71
+
72
+ def extract_claims(self, text, threshold=0.5):
73
+ tok_input = self.claim_tokenizer.batch_encode_plus([text], return_tensors="pt", padding=True)
74
+ outputs = self.claim_model.generate(**tok_input)
75
+ claims = self.claim_tokenizer.batch_decode(outputs, skip_special_tokens=True)
76
+ claims = [claim.strip() for claim in claims if len(claim.strip()) > 0]
77
+ return claims
78
+
79
 
80
+ def verify_single_claim(self, claim, confidence_threshold=0.5):
 
81
  results = self.collection.query(
82
  query_texts=[claim],
83
  n_results=3,
84
  include=["documents", "metadatas", "distances"]
85
  )
 
 
86
  zipped_results = sorted(
87
  zip(results['documents'][0], results['metadatas'][0], results['distances'][0]),
88
+ key=lambda x: x[2]
89
  )
 
 
90
  evidence = []
91
  for doc, meta, distance in zipped_results:
92
  source = meta["source"] if meta and "source" in meta else "Unknown source"
 
93
  similarity_score = 1 - (distance / 2) # Assuming cosine distance in [0,2]
94
  evidence.append(
95
  f'"{doc}" (Source: {source}, Similarity: {similarity_score:.2f})'
96
  )
 
 
 
97
  avg_distance = sum(d for _, _, d in zipped_results) / len(zipped_results)
98
  confidence = 1 - (avg_distance / 2) # Normalize to 0-1 range
99
 
 
100
  if confidence < confidence_threshold:
101
  return {
102
  "verdict": "Unverifiable",
103
  "confidence": confidence,
104
+ "evidence": [e.split(" (Source:")[0] for e in evidence],
105
  "reasoning": "Claim is too vague or lacks sufficient evidence"
106
  }
107
 
 
108
  evidence_str = "\n".join([f"- {e}" for e in evidence])
109
+ prompt = f"""You are a powerful fact checker. Analyze the claim below against the provided verified information.
110
+ Relying on the similarity scores, also carefully check whether all factual details in the claim (such as dates, names, locations, and events) exactly match the evidence.
111
  If there is any factual mismatch (for example, the date in the claim is different from the evidence), classify the claim as False. Any factual mismatch, even if the overall context is similar, should lead to a False classification.
112
  If the evidence is too vague or lacks strong matches, classify as Unverifiable.
113
  If evidence directly contradicts the claim, classify as False.
 
122
 
123
  Guidelines:
124
  1. Give more weight to evidence with higher similarity scores, but do not ignore factual mismatches.
 
125
  2. Pay close attention to details such as dates, names, locations, and events.
126
  3. If the claim and evidence differ on any factual point, do not classify as True.
127
  4. Respond only in JSON format without any additional text.
 
135
  "reasoning": "Explanation of the verdict based on evidence and factual details"
136
  }}
137
  """
 
 
138
  completion = self.groq_client.chat.completions.create(
139
  model=self.model_name,
140
  messages=[{"role": "user", "content": prompt}],
141
  temperature=0.1,
142
  max_tokens=400
143
  )
 
 
144
  response_content = completion.choices[0].message.content
 
 
 
145
  parsed = robust_json_extractor(response_content)
 
 
146
  if "error" in parsed:
147
  return {
148
  "error": parsed["error"],
 
150
  "raw_response": parsed.get("raw", response_content)
151
  }
152
  else:
 
153
  required_keys = ["verdict", "evidence", "reasoning"]
154
  if all(key in parsed for key in required_keys):
155
  return {
 
163
  "error": f"Missing required keys: {[k for k in required_keys if k not in parsed]}",
164
  "confidence": confidence,
165
  "raw_response": response_content
166
+ }
167
+
168
+ def verify_single_entity(self, entity_text, confidence_threshold=0.5):
169
+ """Verify a single named entity against the fact database"""
170
+ # Vector similarity search
171
+ results = self.collection.query(
172
+ query_texts=[entity_text],
173
+ n_results=3,
174
+ include=["documents", "metadatas", "distances"]
175
+ )
176
+
177
+ # Process evidence with similarity normalization
178
+ evidence = []
179
+ total_distance = 0
180
+ for doc, meta, distance in zip(results['documents'][0],
181
+ results['metadatas'][0],
182
+ results['distances'][0]):
183
+ similarity = 1 - (distance / 2) # Convert cosine distance to similarity
184
+ evidence.append({
185
+ "text": doc,
186
+ "source": meta.get("source", "Unknown"),
187
+ "similarity": similarity
188
+ })
189
+ total_distance += distance
190
+
191
+ avg_similarity = 1 - (total_distance / len(results['distances'][0]) / 2)
192
+
193
+ # Prepare LLM verification prompt
194
+ evidence_str = "\n".join([
195
+ f"- {e['text']} (Similarity: {e['similarity']:.2f})"
196
+ for e in evidence
197
+ ])
198
+
199
+ prompt = f"""**Entity Verification Task**
200
+ Entity: "{entity_text}"
201
+
202
+ **Verified Evidence:**
203
+ {evidence_str}
204
+
205
+ **Instructions:**
206
+ 1. Verify if this entity exists in official records
207
+ 2. Check for exact matches of names/titles
208
+ 3. Confirm associated details (locations, dates, roles)
209
+ 4. Return JSON with: verdict (True/False/Unverified), confidence (0-1), reasoning
210
+
211
+ **JSON Response:"""
212
+
213
+ try:
214
+ response = self.groq_client.chat.completions.create(
215
+ model=self.model_name,
216
+ messages=[{"role": "user", "content": prompt}],
217
+ temperature=0.2,
218
+ response_format={"type": "json_object"}
219
+ )
220
+
221
+ result = json.loads(response.choices[0].message.content)
222
+ return {
223
+ "verdict": result.get("verdict", "Unverified"),
224
+ "confidence": min(max(result.get("confidence", avg_similarity), 0), 1),
225
+ "evidence": [e["text"] for e in evidence],
226
+ "reasoning": result.get("reasoning", "No reasoning provided")
227
+ }
228
+
229
+ except Exception as e:
230
+ return {
231
+ "verdict": "Error",
232
+ "confidence": 0,
233
+ "evidence": [],
234
+ "reasoning": f"Verification failed: {str(e)}"
235
+ }
236
+
237
+ def verify_claim(self, text, confidence_threshold=0.5):
238
+ """
239
+ Main method: takes input text, extracts entities and claims,
240
+ verifies each, and returns JSON results
241
+ """
242
+ # Extract entities and claims
243
+ entities = self.extract_entities(text)
244
+ claims = self.extract_claims(text)
245
+
246
+ # Verify claims
247
+ claim_results = []
248
+ for claim in claims:
249
+ verification = self.verify_single_claim(claim, confidence_threshold)
250
+ claim_results.append({
251
+ "claim": claim,
252
+ "verdict": verification.get("verdict", "Error"),
253
+ "confidence": verification.get("confidence", 0),
254
+ "evidence": verification.get("evidence", []),
255
+ "reasoning": verification.get("reasoning", "Analysis failed")
256
+ })
257
+
258
+ # Verify entities
259
+ entity_results = []
260
+ for entity_text, entity_label in entities:
261
+ verification = self.verify_single_entity(entity_text, confidence_threshold)
262
+ entity_results.append({
263
+ "entity": entity_text,
264
+ "type": entity_label,
265
+ "verdict": verification.get("verdict", "Error"),
266
+ "confidence": verification.get("confidence", 0),
267
+ "evidence": verification.get("evidence", []),
268
+ "reasoning": verification.get("reasoning", "Analysis failed")
269
+ })
270
+
271
+ return {
272
+ "entities": entity_results,
273
+ "claims": claim_results
274
+ }
275
+
visualize.ipynb ADDED
The diff for this file is too large to render. See raw diff