xavierbarbier commited on
Commit
0bc15db
·
verified ·
1 Parent(s): ee2bff4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -13
app.py CHANGED
@@ -60,6 +60,25 @@ def get_text_embedding(text):
60
 
61
  return embeddings.embed_query(text)
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  def extract_text(file):
65
 
@@ -77,15 +96,9 @@ def extract_text(file):
77
 
78
  return text
79
 
80
- def qa(text, question):
81
-
82
- chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
83
 
84
- text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])
85
-
86
- d = text_embeddings.shape[1]
87
- index = faiss.IndexFlatL2(d)
88
- index.add(text_embeddings)
89
 
90
  question_embeddings = np.array([get_text_embedding(question)])
91
 
@@ -104,16 +117,18 @@ def qa(text, question):
104
 
105
  return prompt
106
 
 
 
 
107
  with gr.Blocks() as demo:
108
- file_input = gr.File(label="Upload a PDF file")
109
- question_input = gr.Textbox(label="Question")
110
- text_output = gr.Textbox(label="Extracted Text")
111
 
 
 
 
112
  promp_output = gr.Textbox(label="prompt")
113
 
114
 
115
- file_input.upload(extract_text, inputs=file_input, outputs=text_output)
116
- text_output.change(qa,[text_output,question_input],promp_output)
117
 
118
 
119
 
 
60
 
61
  return embeddings.embed_query(text)
62
 
63
+ reader = PdfReader("/resource/NGAP 01042024.pdf")
64
+
65
+ text = []
66
+ for p in np.arange(0, len(reader.pages), 1):
67
+ page = reader.pages[int(p)]
68
+
69
+ # extracting text from page
70
+ text.append(page.extract_text())
71
+
72
+ text = ' '.join(text)
73
+
74
+ chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
75
+
76
+ text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])
77
+
78
+ d = text_embeddings.shape[1]
79
+ index = faiss.IndexFlatL2(d)
80
+ index.add(text_embeddings)
81
+
82
 
83
  def extract_text(file):
84
 
 
96
 
97
  return text
98
 
99
+ def qa(question):
 
 
100
 
101
+
 
 
 
 
102
 
103
  question_embeddings = np.array([get_text_embedding(question)])
104
 
 
117
 
118
  return prompt
119
 
120
+ def test_func(text):
121
+ return len(text_embeddings)
122
+
123
  with gr.Blocks() as demo:
 
 
 
124
 
125
+ question_input = gr.Textbox(label="Question")
126
+ qa_button = gr.Button("Click to qa")
127
+
128
  promp_output = gr.Textbox(label="prompt")
129
 
130
 
131
+ qa_button.click(test_func, question_input, promp_output)
 
132
 
133
 
134