vprzybylo commited on
Commit
5f01196
·
1 Parent(s): 5135378

Enhance PDF path resolution in initialize_rag function to support multiple possible locations

Browse files
Files changed (1) hide show
  1. app.py +25 -4
app.py CHANGED
@@ -110,12 +110,33 @@ def initialize_rag():
110
  logger.info("Using cached RAG chain from session state")
111
  return st.session_state.rag_chain
112
 
113
- data_path = "app/data/raw/grid_code.pdf"
114
- if not os.path.exists(data_path):
115
- raise FileNotFoundError(f"PDF not found: {data_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  with st.spinner("Loading Grid Code documents..."):
118
- loader = GridCodeLoader(str(data_path), pages=17)
119
  documents = loader.load_and_split()
120
  logger.info(f"Loaded {len(documents)} document chunks")
121
 
 
110
  logger.info("Using cached RAG chain from session state")
111
  return st.session_state.rag_chain
112
 
113
+ # Try multiple possible paths for the PDF
114
+ possible_paths = [
115
+ "app/data/raw/grid_code.pdf", # Local path
116
+ "/app/app/data/raw/grid_code.pdf", # Docker path
117
+ Path(__file__).parent
118
+ / "app"
119
+ / "data"
120
+ / "raw"
121
+ / "grid_code.pdf", # Absolute path
122
+ ]
123
+
124
+ data_path = None
125
+ for path in possible_paths:
126
+ if isinstance(path, str):
127
+ path = Path(path)
128
+ if path.exists():
129
+ data_path = str(path)
130
+ logger.info(f"Found PDF at: {data_path}")
131
+ break
132
+
133
+ if not data_path:
134
+ raise FileNotFoundError(
135
+ f"PDF not found in any of these locations: {possible_paths}"
136
+ )
137
 
138
  with st.spinner("Loading Grid Code documents..."):
139
+ loader = GridCodeLoader(data_path, pages=17)
140
  documents = loader.load_and_split()
141
  logger.info(f"Loaded {len(documents)} document chunks")
142