rvritesh167 commited on
Commit
1f70f29
·
1 Parent(s): 67927c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -3
app.py CHANGED
@@ -1,6 +1,10 @@
1
  import streamlit as st
 
 
 
2
  from langchain.document_loaders import UnstructuredFileLoader
3
 
 
4
  def main():
5
  st.title("PDF Text Extractor")
6
 
@@ -8,13 +12,24 @@ def main():
8
 
9
  if uploaded_file is not None:
10
  st.subheader("PDF Content:")
11
- loader = UnstructuredFileLoader(uploaded_file)
 
 
 
 
 
 
12
  data = loader.load()
13
  txt = ''
14
  for item in data:
15
  txt += item.page_content
16
  text_content = txt
17
- st.text(text_content)
 
 
 
 
 
18
 
19
  if __name__ == "__main__":
20
- main()
 
1
  import streamlit as st
2
+ import tempfile
3
+ import os
4
+ import pyperclip
5
  from langchain.document_loaders import UnstructuredFileLoader
6
 
7
+
8
  def main():
9
  st.title("PDF Text Extractor")
10
 
 
12
 
13
  if uploaded_file is not None:
14
  st.subheader("PDF Content:")
15
+ st.text("Extracted using langchain:")
16
+
17
+ temp_file_path = os.path.join(tempfile.gettempdir(), f"{uploaded_file.name}")
18
+ with open(temp_file_path, "wb") as temp_file:
19
+ temp_file.write(uploaded_file.read())
20
+
21
+ loader = UnstructuredFileLoader(temp_file_path)
22
  data = loader.load()
23
  txt = ''
24
  for item in data:
25
  txt += item.page_content
26
  text_content = txt
27
+ if st.button("Copy to Clipboard"):
28
+ pyperclip.copy(text_content)
29
+ st.success("Text copied to clipboard!")
30
+ st.text_area("Extracted Text:", value=text_content, height=300)
31
+
32
+
33
 
34
  if __name__ == "__main__":
35
+ main()