updated app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,6 @@ from io import BytesIO
|
|
6 |
from PIL import Image
|
7 |
import pandas as pd
|
8 |
|
9 |
-
|
10 |
def extract_text_images(
|
11 |
pdf_path: str, output_folder: str,
|
12 |
minimum_font_size: int,
|
@@ -227,17 +226,13 @@ def main():
|
|
227 |
min_value=1, value=2
|
228 |
)
|
229 |
|
230 |
-
# Output folder path input
|
231 |
output_folder = st.text_input(
|
232 |
-
"Output folder path:"
|
233 |
-
os.path.join(os.getcwd(), "Extracted_Data")
|
234 |
)
|
235 |
|
236 |
-
# Convert the output folder to an absolute path
|
237 |
-
output_folder = os.path.abspath(os.path.expanduser(output_folder))
|
238 |
-
|
239 |
if st.button("Start Extraction"):
|
240 |
-
if pdf_file is not None:
|
241 |
# Save uploaded PDF to a temporary location
|
242 |
temp_pdf_path = os.path.join(output_folder, pdf_file.name)
|
243 |
with open(temp_pdf_path, "wb") as f:
|
@@ -257,11 +252,6 @@ def main():
|
|
257 |
# Convert data to XLSX
|
258 |
xlsx_data = convert_to_xlsx(extraction_data)
|
259 |
|
260 |
-
# Show a preview of the XLSX data (first 5 rows)
|
261 |
-
# st.subheader("XLSX Preview (First 5 Rows)")
|
262 |
-
df = pd.read_excel(xlsx_data, sheet_name='Extraction')
|
263 |
-
# st.dataframe(df.head())
|
264 |
-
|
265 |
col1, col2 = st.columns(2)
|
266 |
|
267 |
with col1:
|
@@ -279,7 +269,7 @@ def main():
|
|
279 |
mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
|
280 |
|
281 |
else:
|
282 |
-
st.error("Please upload a PDF file.")
|
283 |
|
284 |
# Footer (Fixed Position)
|
285 |
st.markdown(
|
@@ -288,21 +278,24 @@ def main():
|
|
288 |
.footer {
|
289 |
position: fixed;
|
290 |
bottom: 0;
|
|
|
291 |
width: 100%;
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
|
|
298 |
}
|
299 |
</style>
|
300 |
<div class="footer">
|
301 |
-
|
302 |
</div>
|
303 |
""",
|
304 |
unsafe_allow_html=True
|
305 |
)
|
306 |
|
|
|
307 |
if __name__ == "__main__":
|
308 |
main()
|
|
|
6 |
from PIL import Image
|
7 |
import pandas as pd
|
8 |
|
|
|
9 |
def extract_text_images(
|
10 |
pdf_path: str, output_folder: str,
|
11 |
minimum_font_size: int,
|
|
|
226 |
min_value=1, value=2
|
227 |
)
|
228 |
|
229 |
+
# Output folder path input (full path provided by the user)
|
230 |
output_folder = st.text_input(
|
231 |
+
"Output folder path:"
|
|
|
232 |
)
|
233 |
|
|
|
|
|
|
|
234 |
if st.button("Start Extraction"):
|
235 |
+
if pdf_file is not None and output_folder:
|
236 |
# Save uploaded PDF to a temporary location
|
237 |
temp_pdf_path = os.path.join(output_folder, pdf_file.name)
|
238 |
with open(temp_pdf_path, "wb") as f:
|
|
|
252 |
# Convert data to XLSX
|
253 |
xlsx_data = convert_to_xlsx(extraction_data)
|
254 |
|
|
|
|
|
|
|
|
|
|
|
255 |
col1, col2 = st.columns(2)
|
256 |
|
257 |
with col1:
|
|
|
269 |
mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
|
270 |
|
271 |
else:
|
272 |
+
st.error("Please upload a PDF file and provide an output folder path.")
|
273 |
|
274 |
# Footer (Fixed Position)
|
275 |
st.markdown(
|
|
|
278 |
.footer {
|
279 |
position: fixed;
|
280 |
bottom: 0;
|
281 |
+
left: 0;
|
282 |
width: 100%;
|
283 |
+
background-color: #F0F0F0;
|
284 |
+
font-family:cursive;
|
285 |
+
text-align: right;
|
286 |
+
padding: 5px 0;
|
287 |
+
font-size:20px;
|
288 |
+
font-weight: bold;
|
289 |
+
color: #FF0000;
|
290 |
}
|
291 |
</style>
|
292 |
<div class="footer">
|
293 |
+
CREATED BY: CHINMAY BHALERAO
|
294 |
</div>
|
295 |
""",
|
296 |
unsafe_allow_html=True
|
297 |
)
|
298 |
|
299 |
+
|
300 |
if __name__ == "__main__":
|
301 |
main()
|