xavierbarbier commited on
Commit
6e9cf31
·
verified ·
1 Parent(s): 49fdcd9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -1
app.py CHANGED
@@ -39,6 +39,17 @@ model._is_chat_session_activated = False
39
 
40
  max_new_tokens = 2048
41
 
 
 
 
 
 
 
 
 
 
 
 
42
  # creating a pdf reader object
43
 
44
  print("Finish the model init process")
@@ -51,6 +62,10 @@ p = pipeline(
51
  model="impira/layoutlm-document-qa",
52
  )
53
 
 
 
 
 
54
  def qa(question: str, doc: str) -> str:
55
 
56
  reader = PdfReader(doc)
@@ -64,8 +79,30 @@ def qa(question: str, doc: str) -> str:
64
 
65
  text = ' '.join(text)
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- return text
69
 
70
 
71
  demo = gr.Interface(
 
39
 
40
  max_new_tokens = 2048
41
 
42
+ model_kwargs = {'device': 'cpu'}
43
+ encode_kwargs = {'normalize_embeddings': False}
44
+ embeddings = HuggingFaceEmbeddings(
45
+
46
+ model_kwargs=model_kwargs,
47
+ encode_kwargs=encode_kwargs
48
+ )
49
+
50
+ chunk_size = 2048
51
+
52
+
53
  # creating a pdf reader object
54
 
55
  print("Finish the model init process")
 
62
  model="impira/layoutlm-document-qa",
63
  )
64
 
65
+ def get_text_embedding(text):
66
+
67
+ return embeddings.embed_query(text)
68
+
69
  def qa(question: str, doc: str) -> str:
70
 
71
  reader = PdfReader(doc)
 
79
 
80
  text = ' '.join(text)
81
 
82
+ chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
83
+
84
+ text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])
85
+
86
+ d = text_embeddings.shape[1]
87
+ index = faiss.IndexFlatL2(d)
88
+ index.add(text_embeddings)
89
+
90
+ question_embeddings = np.array([get_text_embedding(question)])
91
+
92
+ D, I = index.search(question_embeddings, k=2) # distance, index
93
+ retrieved_chunk = [chunks[i] for i in I.tolist()[0]]
94
+
95
+ prompt = f"""
96
+ Context information is below.
97
+ ---------------------
98
+ {retrieved_chunk}
99
+ ---------------------
100
+ Given the context information and not prior knowledge, answer the query.
101
+ Query: {question}
102
+ Answer:
103
+ """
104
 
105
+ return prompt
106
 
107
 
108
  demo = gr.Interface(