Spaces:

akadhim-ai
/

orangepro_app

Sleeping

App Files Files Community

orangepro_app / app.py

akadhim-ai

Update app.py

79c3b92 about 1 year ago

raw

history blame

3.88 kB

	import streamlit as st
	from PyPDF2 import PdfReader
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.vectorstores import FAISS
	from langchain.chains.question_answering import load_qa_chain
	from langchain.llms import OpenAI

	# Define a function to load PDF and perform processing
	def process_pdf(pdf_path):
	pdfreader = PdfReader(pdf_path)

	raw_text = ''
	for page in pdfreader.pages:
	content = page.extract_text()
	if content:
	raw_text += content

	text_splitter = CharacterTextSplitter(
	separator="\n",
	chunk_size=800,
	chunk_overlap=100,
	length_function=len,
	)
	texts = text_splitter.split_text(raw_text)

	embeddings = OpenAIEmbeddings()
	document_search = FAISS.from_texts(texts, embeddings)

	chain = load_qa_chain(OpenAI(), chain_type="stuff")

	return document_search, chain

	# Function to get yes/no emoji based on answer content
	def get_answer_emoji(answer):
	answer = answer.lower()
	if "yes" in answer:
	return "✅"
	elif "no" in answer:
	return "❌"
	else:
	return "🟡"

	# Streamlit UI
	st.title("OrangePro AI - PDF and Text Analysis")

	# Upload a PDF file
	uploaded_pdf_file = st.file_uploader("Upload a PDF file for analysis", type=["pdf"])
	uploaded_text_file = st.file_uploader("Upload a text file with questions (if available)", type=["txt"])

	if uploaded_pdf_file:
	st.subheader("Selected PDF Content")

	# Display the content of the PDF
	pdf_reader, qa_chain = process_pdf(uploaded_pdf_file)

	st.write("PDF Content:")
	st.text(pdf_reader)

	if uploaded_text_file:
	st.warning("Questions will be extracted from the uploaded text file. Disabling question input below.")
	text_content = uploaded_text_file.read().decode('utf-8') # Decode bytes to string
	questions = text_content.splitlines()
	else:
	# Allow the user to enter a list of questions
	questions = st.text_area("Enter a list of questions (one per line):").split('\n')

	if st.button("Analyze Questions"):
	# Perform question answering for each question
	st.subheader("Answers:")
	answer_summary = []
	yes_count = 0
	total_questions = len(questions)

	for question in questions:
	if question.strip() == "":
	continue
	docs = pdf_reader.similarity_search(question)
	answer = qa_chain.run(input_documents=docs, question=question)

	emoji = get_answer_emoji(answer)
	answer_summary.append([question, answer, emoji])

	if emoji == "✅":
	yes_count += 1

	# Calculate and display the percentage of "yes" answers
	if total_questions > 0:
	yes_percentage = (yes_count / total_questions) * 100
	else:
	yes_percentage = 0

	answer_summary.append(["Percentage of 'Yes' Answers", f"{yes_percentage:.2f}%", ""])

	# Display the summary in a table
	st.table(answer_summary)

	# About section
	st.sidebar.title("About OrangePro AI")
	st.sidebar.info(
	"OrangePro AI is an artificial intelligence testing and benchmarking platform for large language models (LLMs). It scores model performance based on real-world scenarios, allowing corporate clients such as Fortune 500 companies to choose the best model for their specific use cases."
	"\n\n"
	"The platform automates scoring, ranking model performance in real-world scenarios and key criteria like hallucinations and safety. OrangePro AI also automatically generates adversarial test suites at a large scale and benchmarks models to help customers identify the best model for specific use cases."
	)

	# Footer
	st.sidebar.text("Powered by Streamlit and Langchain")