Spaces:
Sleeping
Sleeping
File size: 3,885 Bytes
9cb390e e775db4 9cb390e 9547287 9cb390e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import streamlit as st
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
# Define a function to load PDF and perform processing
def process_pdf(pdf_path):
pdfreader = PdfReader(pdf_path)
raw_text = ''
for page in pdfreader.pages:
content = page.extract_text()
if content:
raw_text += content
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=800,
chunk_overlap=100,
length_function=len,
)
texts = text_splitter.split_text(raw_text)
embeddings = OpenAIEmbeddings()
document_search = FAISS.from_texts(texts, embeddings)
chain = load_qa_chain(OpenAI(), chain_type="stuff")
return document_search, chain
# Function to get yes/no emoji based on answer content
def get_answer_emoji(answer):
answer = answer.lower()
if "yes" in answer:
return "β
"
elif "no" in answer:
return "β"
else:
return "π‘"
# Streamlit UI
st.title("OrangePro.AI LLM Output Testing")
# Upload a PDF file
uploaded_pdf_file = st.file_uploader("Upload a PDF file for analysis", type=["pdf"])
uploaded_text_file = st.file_uploader("Upload a text file with questions (if available)", type=["txt"])
if uploaded_pdf_file:
st.subheader("Selected PDF Content")
# Display the content of the PDF
pdf_reader, qa_chain = process_pdf(uploaded_pdf_file)
st.write("PDF Content:")
st.text(pdf_reader)
if uploaded_text_file:
st.warning("Questions will be extracted from the uploaded text file. Disabling question input below.")
text_content = uploaded_text_file.read().decode('utf-8') # Decode bytes to string
questions = text_content.splitlines()
else:
# Allow the user to enter a list of questions
questions = st.text_area("Enter a list of questions (one per line):").split('\n')
if st.button("Analyze Questions"):
# Perform question answering for each question
st.subheader("Answers:")
answer_summary = []
yes_count = 0
total_questions = len(questions)
for question in questions:
if question.strip() == "":
continue
docs = pdf_reader.similarity_search(question)
answer = qa_chain.run(input_documents=docs, question=question)
emoji = get_answer_emoji(answer)
answer_summary.append([question, answer, emoji])
if emoji == "β
":
yes_count += 1
# Calculate and display the percentage of "yes" answers
if total_questions > 0:
yes_percentage = (yes_count / total_questions) * 100
else:
yes_percentage = 0
answer_summary.append(["Output Quality (β
/Total Questions)", f"{yes_percentage:.2f}%", ""])
# Display the summary in a table
st.table(answer_summary)
# About section
st.sidebar.title("About OrangePro AI")
st.sidebar.info(
"OrangePro AI is an artificial intelligence testing and benchmarking platform for large language models (LLMs). It scores model performance based on real-world scenarios, allowing corporate clients such as Fortune 500 companies to choose the best model for their specific use cases."
"\n\n"
"The platform automates scoring, ranking model performance in real-world scenarios and key criteria like hallucinations and safety. OrangePro AI also automatically generates adversarial test suites at a large scale and benchmarks models to help customers identify the best model for specific use cases."
)
# Footer
st.sidebar.text("Powered by Streamlit and Langchain")
|