Extract_text_From_pdf

Runtime error

Extract_text_From_pdf / app.py

Update app.py

d9a2140 over 1 year ago

1.12 kB

	import streamlit as st
	import tempfile
	import os
	# import pyperclip
	from langchain.document_loaders import UnstructuredFileLoader
	from clipboard import copy



	def main():
	st.title("PDF Text Extractor")

	uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

	if uploaded_file is not None:
	st.subheader("PDF Content:")
	st.text("Extracted using langchain:")

	temp_file_path = os.path.join(tempfile.gettempdir(), f"{uploaded_file.name}")
	with open(temp_file_path, "wb") as temp_file:
	temp_file.write(uploaded_file.read())

	loader = UnstructuredFileLoader(temp_file_path)
	data = loader.load()
	txt = ''
	for item in data:
	txt += item.page_content
	text_content = txt
	if st.button("Copy to Clipboard"):
	copy(text_content)
	st.success("Text copied to clipboard!")
	st.text_area("Extracted Text:", value=text_content, height=300)


	st.text_input("Copy this text:", value=text_content, key="copy_text")



	if __name__ == "__main__":
	main()