Extract_text_From_pdf

Runtime error

Extract_text_From_pdf / app.py

Update app.py

584d080 over 1 year ago

1.08 kB

	import streamlit as st
	import tempfile
	import os
	from langchain.document_loaders import UnstructuredFileLoader



	def main():
	st.title("PDF Text Extractor")

	uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

	if uploaded_file is not None:
	st.subheader("PDF Content : ")
	# st.text("Extracted using langchain:")
	temp_file_path = os.path.join(tempfile.gettempdir(), f"{uploaded_file.name}")
	with open(temp_file_path, "wb") as temp_file:
	temp_file.write(uploaded_file.read())
	loader = UnstructuredFileLoader(temp_file_path)
	data = loader.load()
	txt = ''
	for item in data:
	txt += item.page_content
	text_content = txt
	# if st.button("Copy to Clipboard"):
	# copy(text_content)
	# st.success("Text copied to clipboard!")
	st.text_area("Extracted Text:", value=text_content, height=300)

	# st.text_input("Copy this text:", value=text_content, key="copy_text")



	if __name__ == "__main__":
	main()