Adrien commited on
Commit
bccb279
·
1 Parent(s): 3823e3e
Files changed (1) hide show
  1. rag_demo/rag/source_annotator.py +2 -4
rag_demo/rag/source_annotator.py CHANGED
@@ -11,6 +11,7 @@ from transformers import pipeline
11
 
12
  class SourceAnnotator:
13
  def __init__(self):
 
14
  self.source_annotator = pipeline(
15
  "question-answering",
16
  model="distilbert/distilbert-base-cased-distilled-squad",
@@ -22,7 +23,7 @@ class SourceAnnotator:
22
  for sentence in sentences:
23
  scores = []
24
  for chunk in reranked_chunks:
25
- score = self.annotate_source(sentence.lower(), chunk.content.lower())
26
  score["filename"] = chunk.metadata["filename"].split(".pdf")[0]
27
  score["chunk_id"] = chunk.chunk_id
28
  scores.append(score)
@@ -38,6 +39,3 @@ class SourceAnnotator:
38
  pattern = r"(?<=[.!?])\s+(?=[A-Z])"
39
  sentences = re.split(pattern, text)
40
  return [s.strip() for s in sentences if s.strip()]
41
-
42
- def annotate_source(self, text: str, chunk: str) -> dict:
43
- return self.source_annotator(text, chunk)
 
11
 
12
  class SourceAnnotator:
13
  def __init__(self):
14
+ # Extractive question answering model
15
  self.source_annotator = pipeline(
16
  "question-answering",
17
  model="distilbert/distilbert-base-cased-distilled-squad",
 
23
  for sentence in sentences:
24
  scores = []
25
  for chunk in reranked_chunks:
26
+ score = self.source_annotator(sentence.lower(), chunk.content.lower())
27
  score["filename"] = chunk.metadata["filename"].split(".pdf")[0]
28
  score["chunk_id"] = chunk.chunk_id
29
  scores.append(score)
 
39
  pattern = r"(?<=[.!?])\s+(?=[A-Z])"
40
  sentences = re.split(pattern, text)
41
  return [s.strip() for s in sentences if s.strip()]