Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -38,21 +38,26 @@ def extract_text_from_txt(file_path):
|
|
38 |
return txt_file.read()
|
39 |
# return text
|
40 |
|
41 |
-
def extract_text_from_doc(file_path):
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
46 |
|
47 |
-
|
|
|
48 |
doc = docx.Document(file_path)
|
49 |
-
|
50 |
-
|
51 |
-
fullText.append(para.text)
|
52 |
-
return '\n'.join(fullText)
|
53 |
|
54 |
-
|
55 |
|
|
|
|
|
56 |
def model(model_name):
|
57 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
58 |
model = AutoModelForQuestionAnswering.from_pretrained(model_name,return_dict = False)
|
|
|
38 |
return txt_file.read()
|
39 |
# return text
|
40 |
|
41 |
+
# def extract_text_from_doc(file_path):
|
42 |
+
# doc = docx.Document(file_path)
|
43 |
+
# fullText = []
|
44 |
+
# for para in doc.paragraphs:
|
45 |
+
# fullText.append(para.text)
|
46 |
+
# return '\n'.join(fullText)
|
47 |
+
|
48 |
+
def extract_text_from_paragraph(para):
|
49 |
+
return para.text
|
50 |
|
51 |
+
|
52 |
+
def extract_text_from_doc(file_path):
|
53 |
doc = docx.Document(file_path)
|
54 |
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
55 |
+
results = list(executor.map(extract_text_from_paragraph, doc.paragraphs))
|
|
|
|
|
56 |
|
57 |
+
return '\n'.join(results)
|
58 |
|
59 |
+
|
60 |
+
|
61 |
def model(model_name):
|
62 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
63 |
model = AutoModelForQuestionAnswering.from_pretrained(model_name,return_dict = False)
|