Spaces:
Sleeping
Sleeping
import streamlit as st | |
from PyPDF2 import PdfReader | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.vectorstores import FAISS | |
from langchain.chains.question_answering import load_qa_chain | |
from langchain.llms import OpenAI | |
# Define a function to load PDF and perform processing | |
def process_pdf(pdf_path): | |
pdfreader = PdfReader(pdf_path) | |
raw_text = '' | |
for page in pdfreader.pages: | |
content = page.extract_text() | |
if content: | |
raw_text += content | |
text_splitter = CharacterTextSplitter( | |
separator="\n", | |
chunk_size=800, | |
chunk_overlap=100, | |
length_function=len, | |
) | |
texts = text_splitter.split_text(raw_text) | |
embeddings = OpenAIEmbeddings() | |
document_search = FAISS.from_texts(texts, embeddings) | |
chain = load_qa_chain(OpenAI(), chain_type="stuff") | |
return document_search, chain | |
# Function to get yes/no emoji based on answer content | |
def get_answer_emoji(answer): | |
answer = answer.lower() | |
if "yes" in answer: | |
return "β " | |
elif "no" in answer: | |
return "β" | |
else: | |
return "π‘" | |
# Streamlit UI | |
st.title("OrangePro AI - PDF and Text Analysis") | |
# Upload a PDF file | |
uploaded_pdf_file = st.file_uploader("Upload a PDF file for analysis", type=["pdf"]) | |
uploaded_text_file = st.file_uploader("Upload a text file with questions (if available)", type=["txt"]) | |
if uploaded_pdf_file: | |
st.subheader("Selected PDF Content") | |
# Display the content of the PDF | |
pdf_reader, qa_chain = process_pdf(uploaded_pdf_file) | |
st.write("PDF Content:") | |
st.text(pdf_reader) | |
if uploaded_text_file: | |
st.warning("Questions will be extracted from the uploaded text file. Disabling question input below.") | |
text_content = uploaded_text_file.read().decode('utf-8') # Decode bytes to string | |
questions = text_content.splitlines() | |
else: | |
# Allow the user to enter a list of questions | |
questions = st.text_area("Enter a list of questions (one per line):").split('\n') | |
if st.button("Analyze Questions"): | |
# Perform question answering for each question | |
st.subheader("Answers:") | |
answer_summary = [] | |
yes_count = 0 | |
total_questions = len(questions) | |
for question in questions: | |
if question.strip() == "": | |
continue | |
docs = pdf_reader.similarity_search(question) | |
answer = qa_chain.run(input_documents=docs, question=question) | |
emoji = get_answer_emoji(answer) | |
answer_summary.append([question, answer, emoji]) | |
if emoji == "β ": | |
yes_count += 1 | |
# Calculate and display the percentage of "yes" answers | |
if total_questions > 0: | |
yes_percentage = (yes_count / total_questions) * 100 | |
else: | |
yes_percentage = 0 | |
answer_summary.append(["Percentage of 'Yes' Answers", f"{yes_percentage:.2f}%", ""]) | |
# Display the summary in a table | |
st.table(answer_summary) | |
# About section | |
st.sidebar.title("About OrangePro AI") | |
st.sidebar.info( | |
"OrangePro AI is an artificial intelligence testing and benchmarking platform for large language models (LLMs). It scores model performance based on real-world scenarios, allowing corporate clients such as Fortune 500 companies to choose the best model for their specific use cases." | |
"\n\n" | |
"The platform automates scoring, ranking model performance in real-world scenarios and key criteria like hallucinations and safety. OrangePro AI also automatically generates adversarial test suites at a large scale and benchmarks models to help customers identify the best model for specific use cases." | |
) | |
# Footer | |
st.sidebar.text("Powered by Streamlit and Langchain") | |