Spaces:

taaha3244
/

Lex

Runtime error

App Files Files Community

Lex / app.py

taaha3244

Upload app.py

8999dd1 verified 6 months ago

raw

history blame

3.92 kB

	import os
	from dotenv import load_dotenv
	import tempfile
	import streamlit as st
	from langchain_community.document_loaders import PyPDFLoader

	from main import summarize_pdf_document
	from main import retrieve_documents
	from main import embed_document_data
	from main import is_document_embedded




	load_dotenv()

	def main():
	st.sidebar.title("PDF Management")
	uploaded_files = st.sidebar.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
	files_info = [] # Initialize files_info to an empty list before checking for uploaded files

	if uploaded_files:
	files_info = save_uploaded_files(uploaded_files)
	process_documents(files_info)
	if st.button('Add Uploaded Documents in Q nd A'):
	embed_documents(files_info)

	# Call to display the Q&A section unconditionally
	display_qna_section(files_info)


	def save_uploaded_files(uploaded_files):
	"""Save uploaded files to temporary directory and return their file paths along with original filenames."""
	files_info = []
	for uploaded_file in uploaded_files:
	# Create a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmpfile:
	# Write contents of the uploaded file to the temporary file
	tmpfile.write(uploaded_file.getvalue())
	# Append both the temp file path and the original name
	files_info.append((tmpfile.name, uploaded_file.name))
	return files_info


	def process_documents(files_info):
	"""Handle document processing for summarization."""
	st.header("Document Summaries")
	summarize_button = st.button('Summarize Uploaded Documents')

	if summarize_button:
	for temp_path, original_name in files_info:
	with st.container(): # Using container to better organize the display
	st.write(f"Summary for {original_name}:") # Display the original file name
	try:
	summary = summarize_pdf_document(temp_path, os.getenv('OPENAI_API_KEY'))
	st.text_area("", value=summary, height=200, key=f"summary_{original_name}")
	except Exception as e:
	st.error(f"Failed to summarize {original_name}: {str(e)}")


	def embed_documents(files_info):
	"""Embed each document with correct metadata, replacing temp path with original filename."""
	for temp_path, original_name in files_info:
	if not is_document_embedded(original_name): # Check if already embedded
	try:
	# Load the document
	loader = PyPDFLoader(temp_path)
	documents = loader.load()

	# Update the metadata for each document
	updated_documents = []
	for doc in documents:
	doc.metadata['source'] = original_name # Use original filename
	updated_documents.append(doc)

	embed_document_data(updated_documents) # Embed the documents
	st.success(f"Embedded {original_name}")
	except Exception as e:
	st.error(f"Failed to embed {original_name}: {str(e)}")
	else:
	st.info(f"{original_name} is already embedded.")


	def display_qna_section(files_info):
	"""Display Q&A section."""
	st.header("Question and Answer")
	with st.form("qa_form"):
	user_query = st.text_input("Enter your question here:")
	submit_button = st.form_submit_button('Get Answer')

	if submit_button and user_query:
	answer = handle_query(user_query)
	st.write(answer)
	elif submit_button and not user_query:
	st.error("Please enter a question to get an answer.")

	def handle_query(query):
	"""Retrieve answers based on the query."""
	answer = retrieve_documents(query)
	return answer

	if __name__ == "__main__":
	main()