Update app.py
Browse files
app.py
CHANGED
@@ -30,7 +30,16 @@ logger = setup_logging('app')
|
|
30 |
nltk.download('punkt')
|
31 |
nltk.download('punkt_tab')
|
32 |
nltk.download('stopwords')
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
# Load environment variables
|
35 |
load_dotenv()
|
36 |
|
@@ -500,12 +509,12 @@ def is_query_relevant(question, source_documents, threshold=0.1):
|
|
500 |
except Exception as e:
|
501 |
logger.error(f"Error checking query relevance: {str(e)}", exc_info=True)
|
502 |
return False
|
503 |
-
|
504 |
def get_pdf_details(filename, page_number):
|
505 |
"""Get details of a specific PDF page."""
|
506 |
logger.info(f"Processing PDF details for file: {filename}, page: {page_number}")
|
507 |
try:
|
508 |
-
|
|
|
509 |
file_path = os.path.join(data_path, filename)
|
510 |
|
511 |
# Open the PDF
|
@@ -565,7 +574,8 @@ def get_romanized_text(filename):
|
|
565 |
"""Get romanized text from a PDF."""
|
566 |
logger.info(f"Processing romanized text for file: {filename}")
|
567 |
try:
|
568 |
-
|
|
|
569 |
file_path = os.path.join(data_path, filename)
|
570 |
|
571 |
# Open the PDF
|
@@ -607,6 +617,9 @@ def get_romanized_text(filename):
|
|
607 |
|
608 |
def main():
|
609 |
logger.info("Starting Smart PDF Search application")
|
|
|
|
|
|
|
610 |
|
611 |
# Detect page from query parameters
|
612 |
query_params = st.query_params
|
|
|
30 |
nltk.download('punkt')
|
31 |
nltk.download('punkt_tab')
|
32 |
nltk.download('stopwords')
|
33 |
+
|
34 |
+
# Create directories if they don't exist
|
35 |
+
def create_dirs_if_needed():
|
36 |
+
"""Create the necessary directories if they don't exist."""
|
37 |
+
os.makedirs('/tmp/data', exist_ok=True)
|
38 |
+
os.makedirs('/tmp/db', exist_ok=True)
|
39 |
+
|
40 |
+
# Call the function at the start of your app
|
41 |
+
create_dirs_if_needed()
|
42 |
+
|
43 |
# Load environment variables
|
44 |
load_dotenv()
|
45 |
|
|
|
509 |
except Exception as e:
|
510 |
logger.error(f"Error checking query relevance: {str(e)}", exc_info=True)
|
511 |
return False
|
|
|
512 |
def get_pdf_details(filename, page_number):
|
513 |
"""Get details of a specific PDF page."""
|
514 |
logger.info(f"Processing PDF details for file: {filename}, page: {page_number}")
|
515 |
try:
|
516 |
+
# Update the paths to point to /tmp for Hugging Face Space
|
517 |
+
data_path = '/tmp/data'
|
518 |
file_path = os.path.join(data_path, filename)
|
519 |
|
520 |
# Open the PDF
|
|
|
574 |
"""Get romanized text from a PDF."""
|
575 |
logger.info(f"Processing romanized text for file: {filename}")
|
576 |
try:
|
577 |
+
# Update the paths to point to /tmp for Hugging Face Space
|
578 |
+
data_path = '/tmp/data'
|
579 |
file_path = os.path.join(data_path, filename)
|
580 |
|
581 |
# Open the PDF
|
|
|
617 |
|
618 |
def main():
|
619 |
logger.info("Starting Smart PDF Search application")
|
620 |
+
|
621 |
+
# Ensure directories are created before file processing starts
|
622 |
+
create_dirs_if_needed()
|
623 |
|
624 |
# Detect page from query parameters
|
625 |
query_params = st.query_params
|