Mary12 commited on
Commit
6bd0993
·
1 Parent(s): d9c2c2b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -11
app.py CHANGED
@@ -38,21 +38,26 @@ def extract_text_from_txt(file_path):
38
  return txt_file.read()
39
  # return text
40
 
41
- def extract_text_from_doc(file_path):
42
- # text = ""
43
- # doc = docx.Document(file_path)
44
- # for texts in doc.paragraphs:
45
- # text+= texts.text + "\n"
 
 
 
 
46
 
47
- # return texts
 
48
  doc = docx.Document(file_path)
49
- fullText = []
50
- for para in doc.paragraphs:
51
- fullText.append(para.text)
52
- return '\n'.join(fullText)
53
 
54
-
55
 
 
 
56
  def model(model_name):
57
  tokenizer = AutoTokenizer.from_pretrained(model_name)
58
  model = AutoModelForQuestionAnswering.from_pretrained(model_name,return_dict = False)
 
38
  return txt_file.read()
39
  # return text
40
 
41
+ # def extract_text_from_doc(file_path):
42
+ # doc = docx.Document(file_path)
43
+ # fullText = []
44
+ # for para in doc.paragraphs:
45
+ # fullText.append(para.text)
46
+ # return '\n'.join(fullText)
47
+
48
+ def extract_text_from_paragraph(para):
49
+ return para.text
50
 
51
+
52
+ def extract_text_from_doc(file_path):
53
  doc = docx.Document(file_path)
54
+ with concurrent.futures.ThreadPoolExecutor() as executor:
55
+ results = list(executor.map(extract_text_from_paragraph, doc.paragraphs))
 
 
56
 
57
+ return '\n'.join(results)
58
 
59
+
60
+
61
  def model(model_name):
62
  tokenizer = AutoTokenizer.from_pretrained(model_name)
63
  model = AutoModelForQuestionAnswering.from_pretrained(model_name,return_dict = False)