File size: 1,079 Bytes
71dfa88
1f70f29
 
a45b3b8
d9a2140
12e6f31
1f70f29
a45b3b8
 
a0cce57
a45b3b8
a0cce57
a45b3b8
584d080
 
1f70f29
 
 
 
a0cce57
 
 
 
 
6800956
 
 
1f70f29
b685471
6800956
1f70f29
 
a45b3b8
 
1f70f29
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import streamlit as st
import tempfile
import os
from langchain.document_loaders import UnstructuredFileLoader



def main():
    st.title("PDF Text Extractor")
    
    uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
    
    if uploaded_file is not None:
        st.subheader("PDF Content : ")
        # st.text("Extracted using langchain:")
        temp_file_path = os.path.join(tempfile.gettempdir(), f"{uploaded_file.name}")
        with open(temp_file_path, "wb") as temp_file:
            temp_file.write(uploaded_file.read())
        loader = UnstructuredFileLoader(temp_file_path)
        data = loader.load()
        txt = ''
        for item in data:
            txt += item.page_content
        text_content = txt
        # if st.button("Copy to Clipboard"):
            # copy(text_content)
            # st.success("Text copied to clipboard!")
        st.text_area("Extracted Text:", value=text_content, height=300)
        
        # st.text_input("Copy this text:", value=text_content, key="copy_text")



if __name__ == "__main__":
    main()