File size: 2,520 Bytes
e3bed55
f194277
 
dfa8313
e3bed55
bacffa7
6acdb94
05d8294
faa3c94
49adf14
aa78f0c
49adf14
 
faa3c94
 
 
3382339
 
 
05d8294
 
 
 
 
3382339
e3bed55
 
 
9c47e63
05d8294
e3bed55
 
 
0dc4a78
641ffd1
e3bed55
 
 
 
bacffa7
1cfe063
e221301
6acdb94
b189c38
df96733
e3bed55
ce79ee6
 
 
0cb6c65
3a7ceb5
723a5a6
88a5426
 
 
 
 
 
ce79ee6
 
 
 
1cfe063
ce79ee6
0764266
ce79ee6
0764266
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import streamlit as st
# import PyPDF2
import fitz
import io



def search_pdf(pdf_file, split_search):
    search_results = []
    if isinstance(pdf_file, io.BytesIO):  # Handling Streamlit case
        doc = fitz.open(stream=pdf_file.getvalue(), filetype="pdf")
    else:  # Handling local file case
        doc = fitz.open(pdf_file)
    for page_num in range(doc.page_count):
        page = doc.load_page(page_num)
        text = page.get_text()
        # Split the text into lines and filter out empty lines
        lines = [line.strip() for line in text.split('\n') if line.strip()]
        cleaned_text = '\n'.join(lines)
        k = 0
        for i in range(len(split_search)):
            if split_search[i].lower() in cleaned_text.lower():
                k = k + 1
        if k == len(split_search):
            search_results.append((page_num + 1, cleaned_text))
    return search_results

def final_result(pdf_file, search_term):
    split_search = search_term.split(' ')
    results = search_pdf(pdf_file, split_search)
    output_text = ""
    if results:
        for page_num, text in results:
            # output_text += f"Found \033[1m'{search_term}'\033[0m on page {page_num}:\n{text}\n\n"
            output_text += f"'{search_term}' on page {page_num}:\n-{text}\n\n"
    else:
        output_text = f"No results found for '{search_term}'."
    return output_text
    
st.set_page_config(page_title="Search in PDF", layout="wide",initial_sidebar_state="expanded")
st.markdown("<h3 style='text-align:center; font-size:24px;'>Search in PDF</h3>", unsafe_allow_html=True)
st.write("---")

col1, col2 = st.columns(spec=[0.4,0.6])
# col3, col4 = st.columns(spec=[0.5,0.5])

with col1:
    input_file = st.file_uploader(label="Upload .pdf File", type='pdf')
    search_term = st.text_input(label="Enter Search-term", placeholder="Search here...")
    col3, col4 = st.columns(spec=[0.5,0.5])
    with col3:
        all_data = st.button("Submit")
    # with col4:
    #     st.write("")
    #     clear_button = st.button("Clear")
    #     if clear_button:
    #         input_file = None
    #         search_term = ""
with col2:
    if all_data:
        if input_file is not None and search_term.strip() != "":
            result = final_result(input_file, search_term)
            st.text_area("Search Results", result, height=400)
        elif input_file is None:
            st.error("Please upload a PDF file")
        elif search_term.strip() == "":
            st.error("Please enter a search term")