tsrivallabh commited on
Commit
149817f
·
verified ·
1 Parent(s): 355e6f9

Synced repo using 'sync_with_huggingface' Github Action

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -1
  2. fact_checker.py +10 -9
Dockerfile CHANGED
@@ -17,7 +17,7 @@ COPY requirements.txt .
17
  RUN pip install --no-cache-dir -r requirements.txt
18
 
19
  # Add spaCy model download
20
- RUN python -m spacy download en_core_web_sm # Critical fix [1][2]
21
 
22
  COPY . .
23
 
 
17
  RUN pip install --no-cache-dir -r requirements.txt
18
 
19
  # Add spaCy model download
20
+ RUN python -m spacy download en_core_web_sm
21
 
22
  COPY . .
23
 
fact_checker.py CHANGED
@@ -62,19 +62,19 @@ class FactChecker:
62
  self.ner = spacy.load("en_core_web_sm")
63
 
64
 
65
- self.claim_tokenizer = T5Tokenizer.from_pretrained("Babelscape/t5-base-summarization-claim-extractor")
66
- self.claim_model = T5ForConditionalGeneration.from_pretrained("Babelscape/t5-base-summarization-claim-extractor")
67
 
68
  def extract_entities(self, text):
69
  doc = self.ner(text)
70
  return [(ent.text, ent.label_) for ent in doc.ents]
71
 
72
  def extract_claims(self, text, threshold=0.5):
73
- tok_input = self.claim_tokenizer.batch_encode_plus([text], return_tensors="pt", padding=True)
74
- outputs = self.claim_model.generate(**tok_input)
75
- claims = self.claim_tokenizer.batch_decode(outputs, skip_special_tokens=True)
76
- claims = [claim.strip() for claim in claims if len(claim.strip()) > 0]
77
- return claims
78
 
79
 
80
  def verify_single_claim(self, claim, confidence_threshold=0.5):
@@ -106,8 +106,8 @@ class FactChecker:
106
  }
107
 
108
  evidence_str = "\n".join([f"- {e}" for e in evidence])
109
- prompt = f"""You are a powerful fact checker. Analyze the claim below against the provided verified information.
110
- Relying on the similarity scores, also carefully check whether all factual details in the claim (such as dates, names, locations, and events) exactly match the evidence.
111
  If there is any factual mismatch (for example, the date in the claim is different from the evidence), classify the claim as False. Any factual mismatch, even if the overall context is similar, should lead to a False classification.
112
  If the evidence is too vague or lacks strong matches, classify as Unverifiable.
113
  If evidence directly contradicts the claim, classify as False.
@@ -122,6 +122,7 @@ Evidence (with similarity scores):
122
 
123
  Guidelines:
124
  1. Give more weight to evidence with higher similarity scores, but do not ignore factual mismatches.
 
125
  2. Pay close attention to details such as dates, names, locations, and events.
126
  3. If the claim and evidence differ on any factual point, do not classify as True.
127
  4. Respond only in JSON format without any additional text.
 
62
  self.ner = spacy.load("en_core_web_sm")
63
 
64
 
65
+ # self.claim_tokenizer = T5Tokenizer.from_pretrained("Babelscape/t5-base-summarization-claim-extractor")
66
+ # self.claim_model = T5ForConditionalGeneration.from_pretrained("Babelscape/t5-base-summarization-claim-extractor")
67
 
68
  def extract_entities(self, text):
69
  doc = self.ner(text)
70
  return [(ent.text, ent.label_) for ent in doc.ents]
71
 
72
  def extract_claims(self, text, threshold=0.5):
73
+ # tok_input = self.claim_tokenizer.batch_encode_plus([text], return_tensors="pt", padding=True)
74
+ # outputs = self.claim_model.generate(**tok_input)
75
+ # claims = self.claim_tokenizer.batch_decode(outputs, skip_special_tokens=True)
76
+ # claims = [claim.strip() for claim in claims if len(claim.strip()) > 0]
77
+ return text
78
 
79
 
80
  def verify_single_claim(self, claim, confidence_threshold=0.5):
 
106
  }
107
 
108
  evidence_str = "\n".join([f"- {e}" for e in evidence])
109
+ prompt = f""" You are a powerful fact checker. Analyze the claim below against the provided verified information.
110
+ Relying on the similarity scores, also carefully check whether all factual details in the claim (such as dates, names, locations, and events) exactly match atleast one of the evidence. If from first evidence, evidence is not sufficient, use the next evidence to verify the claim.
111
  If there is any factual mismatch (for example, the date in the claim is different from the evidence), classify the claim as False. Any factual mismatch, even if the overall context is similar, should lead to a False classification.
112
  If the evidence is too vague or lacks strong matches, classify as Unverifiable.
113
  If evidence directly contradicts the claim, classify as False.
 
122
 
123
  Guidelines:
124
  1. Give more weight to evidence with higher similarity scores, but do not ignore factual mismatches.
125
+ 2. If any one piece of evidence independently supports the claim, without factual mismatches, classify as True.
126
  2. Pay close attention to details such as dates, names, locations, and events.
127
  3. If the claim and evidence differ on any factual point, do not classify as True.
128
  4. Respond only in JSON format without any additional text.