Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
vprzybylo
commited on
Commit
·
5f01196
1
Parent(s):
5135378
Enhance PDF path resolution in initialize_rag function to support multiple possible locations
Browse files
app.py
CHANGED
@@ -110,12 +110,33 @@ def initialize_rag():
|
|
110 |
logger.info("Using cached RAG chain from session state")
|
111 |
return st.session_state.rag_chain
|
112 |
|
113 |
-
|
114 |
-
|
115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
with st.spinner("Loading Grid Code documents..."):
|
118 |
-
loader = GridCodeLoader(
|
119 |
documents = loader.load_and_split()
|
120 |
logger.info(f"Loaded {len(documents)} document chunks")
|
121 |
|
|
|
110 |
logger.info("Using cached RAG chain from session state")
|
111 |
return st.session_state.rag_chain
|
112 |
|
113 |
+
# Try multiple possible paths for the PDF
|
114 |
+
possible_paths = [
|
115 |
+
"app/data/raw/grid_code.pdf", # Local path
|
116 |
+
"/app/app/data/raw/grid_code.pdf", # Docker path
|
117 |
+
Path(__file__).parent
|
118 |
+
/ "app"
|
119 |
+
/ "data"
|
120 |
+
/ "raw"
|
121 |
+
/ "grid_code.pdf", # Absolute path
|
122 |
+
]
|
123 |
+
|
124 |
+
data_path = None
|
125 |
+
for path in possible_paths:
|
126 |
+
if isinstance(path, str):
|
127 |
+
path = Path(path)
|
128 |
+
if path.exists():
|
129 |
+
data_path = str(path)
|
130 |
+
logger.info(f"Found PDF at: {data_path}")
|
131 |
+
break
|
132 |
+
|
133 |
+
if not data_path:
|
134 |
+
raise FileNotFoundError(
|
135 |
+
f"PDF not found in any of these locations: {possible_paths}"
|
136 |
+
)
|
137 |
|
138 |
with st.spinner("Loading Grid Code documents..."):
|
139 |
+
loader = GridCodeLoader(data_path, pages=17)
|
140 |
documents = loader.load_and_split()
|
141 |
logger.info(f"Loaded {len(documents)} document chunks")
|
142 |
|