Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import os
|
2 |
-
import fitz # PyMuPDF
|
3 |
from docx import Document
|
4 |
from sentence_transformers import SentenceTransformer
|
5 |
from langchain_community.vectorstores import FAISS
|
@@ -9,6 +8,13 @@ from nltk.tokenize import sent_tokenize
|
|
9 |
import torch
|
10 |
import gradio as gr
|
11 |
import pickle
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
# Function to extract text from a PDF file
|
14 |
def extract_text_from_pdf(pdf_path):
|
@@ -140,3 +146,4 @@ demo.launch()
|
|
140 |
|
141 |
|
142 |
|
|
|
|
1 |
import os
|
|
|
2 |
from docx import Document
|
3 |
from sentence_transformers import SentenceTransformer
|
4 |
from langchain_community.vectorstores import FAISS
|
|
|
8 |
import torch
|
9 |
import gradio as gr
|
10 |
import pickle
|
11 |
+
import nltk
|
12 |
+
|
13 |
+
# Download NLTK punkt resource if not already downloaded
|
14 |
+
try:
|
15 |
+
nltk.data.find('tokenizers/punkt')
|
16 |
+
except LookupError:
|
17 |
+
nltk.download('punkt')
|
18 |
|
19 |
# Function to extract text from a PDF file
|
20 |
def extract_text_from_pdf(pdf_path):
|
|
|
146 |
|
147 |
|
148 |
|
149 |
+
|