Spaces:
Sleeping
Sleeping
File size: 2,520 Bytes
e3bed55 f194277 dfa8313 e3bed55 bacffa7 6acdb94 05d8294 faa3c94 49adf14 aa78f0c 49adf14 faa3c94 3382339 05d8294 3382339 e3bed55 9c47e63 05d8294 e3bed55 0dc4a78 641ffd1 e3bed55 bacffa7 1cfe063 e221301 6acdb94 b189c38 df96733 e3bed55 ce79ee6 0cb6c65 3a7ceb5 723a5a6 88a5426 ce79ee6 1cfe063 ce79ee6 0764266 ce79ee6 0764266 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import streamlit as st
# import PyPDF2
import fitz
import io
def search_pdf(pdf_file, split_search):
search_results = []
if isinstance(pdf_file, io.BytesIO): # Handling Streamlit case
doc = fitz.open(stream=pdf_file.getvalue(), filetype="pdf")
else: # Handling local file case
doc = fitz.open(pdf_file)
for page_num in range(doc.page_count):
page = doc.load_page(page_num)
text = page.get_text()
# Split the text into lines and filter out empty lines
lines = [line.strip() for line in text.split('\n') if line.strip()]
cleaned_text = '\n'.join(lines)
k = 0
for i in range(len(split_search)):
if split_search[i].lower() in cleaned_text.lower():
k = k + 1
if k == len(split_search):
search_results.append((page_num + 1, cleaned_text))
return search_results
def final_result(pdf_file, search_term):
split_search = search_term.split(' ')
results = search_pdf(pdf_file, split_search)
output_text = ""
if results:
for page_num, text in results:
# output_text += f"Found \033[1m'{search_term}'\033[0m on page {page_num}:\n{text}\n\n"
output_text += f"'{search_term}' on page {page_num}:\n-{text}\n\n"
else:
output_text = f"No results found for '{search_term}'."
return output_text
st.set_page_config(page_title="Search in PDF", layout="wide",initial_sidebar_state="expanded")
st.markdown("<h3 style='text-align:center; font-size:24px;'>Search in PDF</h3>", unsafe_allow_html=True)
st.write("---")
col1, col2 = st.columns(spec=[0.4,0.6])
# col3, col4 = st.columns(spec=[0.5,0.5])
with col1:
input_file = st.file_uploader(label="Upload .pdf File", type='pdf')
search_term = st.text_input(label="Enter Search-term", placeholder="Search here...")
col3, col4 = st.columns(spec=[0.5,0.5])
with col3:
all_data = st.button("Submit")
# with col4:
# st.write("")
# clear_button = st.button("Clear")
# if clear_button:
# input_file = None
# search_term = ""
with col2:
if all_data:
if input_file is not None and search_term.strip() != "":
result = final_result(input_file, search_term)
st.text_area("Search Results", result, height=400)
elif input_file is None:
st.error("Please upload a PDF file")
elif search_term.strip() == "":
st.error("Please enter a search term") |