Avanisha commited on
Commit
076dbb5
·
verified ·
1 Parent(s): 328ea6a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -4
app.py CHANGED
@@ -30,7 +30,16 @@ logger = setup_logging('app')
30
  nltk.download('punkt')
31
  nltk.download('punkt_tab')
32
  nltk.download('stopwords')
33
-
 
 
 
 
 
 
 
 
 
34
  # Load environment variables
35
  load_dotenv()
36
 
@@ -500,12 +509,12 @@ def is_query_relevant(question, source_documents, threshold=0.1):
500
  except Exception as e:
501
  logger.error(f"Error checking query relevance: {str(e)}", exc_info=True)
502
  return False
503
-
504
  def get_pdf_details(filename, page_number):
505
  """Get details of a specific PDF page."""
506
  logger.info(f"Processing PDF details for file: {filename}, page: {page_number}")
507
  try:
508
- data_path = "/home/bacancy/Documents/Company/Smart PDF Search/data"
 
509
  file_path = os.path.join(data_path, filename)
510
 
511
  # Open the PDF
@@ -565,7 +574,8 @@ def get_romanized_text(filename):
565
  """Get romanized text from a PDF."""
566
  logger.info(f"Processing romanized text for file: {filename}")
567
  try:
568
- data_path = "/home/bacancy/Documents/Company/Smart PDF Search/data"
 
569
  file_path = os.path.join(data_path, filename)
570
 
571
  # Open the PDF
@@ -607,6 +617,9 @@ def get_romanized_text(filename):
607
 
608
  def main():
609
  logger.info("Starting Smart PDF Search application")
 
 
 
610
 
611
  # Detect page from query parameters
612
  query_params = st.query_params
 
30
  nltk.download('punkt')
31
  nltk.download('punkt_tab')
32
  nltk.download('stopwords')
33
+
34
+ # Create directories if they don't exist
35
+ def create_dirs_if_needed():
36
+ """Create the necessary directories if they don't exist."""
37
+ os.makedirs('/tmp/data', exist_ok=True)
38
+ os.makedirs('/tmp/db', exist_ok=True)
39
+
40
+ # Call the function at the start of your app
41
+ create_dirs_if_needed()
42
+
43
  # Load environment variables
44
  load_dotenv()
45
 
 
509
  except Exception as e:
510
  logger.error(f"Error checking query relevance: {str(e)}", exc_info=True)
511
  return False
 
512
  def get_pdf_details(filename, page_number):
513
  """Get details of a specific PDF page."""
514
  logger.info(f"Processing PDF details for file: {filename}, page: {page_number}")
515
  try:
516
+ # Update the paths to point to /tmp for Hugging Face Space
517
+ data_path = '/tmp/data'
518
  file_path = os.path.join(data_path, filename)
519
 
520
  # Open the PDF
 
574
  """Get romanized text from a PDF."""
575
  logger.info(f"Processing romanized text for file: {filename}")
576
  try:
577
+ # Update the paths to point to /tmp for Hugging Face Space
578
+ data_path = '/tmp/data'
579
  file_path = os.path.join(data_path, filename)
580
 
581
  # Open the PDF
 
617
 
618
  def main():
619
  logger.info("Starting Smart PDF Search application")
620
+
621
+ # Ensure directories are created before file processing starts
622
+ create_dirs_if_needed()
623
 
624
  # Detect page from query parameters
625
  query_params = st.query_params