Spaces:
Sleeping
Sleeping
Update scrape_3gpp.py
Browse files- scrape_3gpp.py +9 -7
scrape_3gpp.py
CHANGED
@@ -437,17 +437,19 @@ def extractionPrincipale(url, excel_file=None, status_list=None, progress=gr.Pro
|
|
437 |
lines = text.split("\n")
|
438 |
print(f"This is the lines : {lines}")
|
439 |
keyword = ["objective", "introduction", "summary", "scope"]
|
440 |
-
|
|
|
441 |
print(line)
|
|
|
442 |
if len(line) < 20:
|
443 |
for key in keyword:
|
444 |
line = line.lower()
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
for r in tabLine:
|
452 |
extracted_content.append(f'PDF Page number {r[0]} extracted text from the KEYWORD {r[2]} : \n')
|
453 |
extracted_content.append(' '.join(r[1]))
|
|
|
437 |
lines = text.split("\n")
|
438 |
print(f"This is the lines : {lines}")
|
439 |
keyword = ["objective", "introduction", "summary", "scope"]
|
440 |
+
|
441 |
+
for indexPdf,line in enumerate(lines):
|
442 |
print(line)
|
443 |
+
|
444 |
if len(line) < 20:
|
445 |
for key in keyword:
|
446 |
line = line.lower()
|
447 |
+
|
448 |
+
if key in line:
|
449 |
+
selectedText = lines[indexPdf:]
|
450 |
+
tabLine.append([pdfPage,selectedText,key])
|
451 |
+
print(f"Selected line in keywords is: {line}")
|
452 |
+
|
453 |
for r in tabLine:
|
454 |
extracted_content.append(f'PDF Page number {r[0]} extracted text from the KEYWORD {r[2]} : \n')
|
455 |
extracted_content.append(' '.join(r[1]))
|