rvritesh167 commited on
Commit
a45b3b8
·
1 Parent(s): feb3592

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -13
app.py CHANGED
@@ -1,18 +1,23 @@
1
- from langchain.document_loaders import UnstructuredFileLoader
2
  import streamlit as st
 
3
 
4
- def extract_text_from_pdf(file):
5
- loader = UnstructuredFileLoader(file)
6
- data=loader.load()
7
- txt=''
8
  for item in data:
9
- print(item.page_content)
10
- txt+=item.page_content
11
  return txt
12
 
13
- st.title("PDF Text Extractor")
14
- uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
15
- if uploaded_file is not None:
16
- st.subheader("PDF Content:")
17
- text_content = extract_text_from_pdf(uploaded_file)
18
- st.text(text_content)
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from langchain.document_loaders import UnstructuredFileLoader
3
 
4
+ def extract_text_from_pdf(uploaded_file):
5
+ loader = UnstructuredFileLoader(uploaded_file)
6
+ data = loader.load()
7
+ txt = ''
8
  for item in data:
9
+ txt += item.page_content
 
10
  return txt
11
 
12
+ def main():
13
+ st.title("PDF Text Extractor")
14
+
15
+ uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
16
+
17
+ if uploaded_file is not None:
18
+ st.subheader("PDF Content:")
19
+ text_content = extract_text_from_pdf(uploaded_file)
20
+ st.text(text_content)
21
+
22
+ if __name__ == "__main__":
23
+ main()