Spaces:
Runtime error
Runtime error
''' | |
LLM scanner streamlit app | |
streamlit run .\app.py | |
Functionality | |
- tokenize documents | |
- respond to queries | |
- generate new documents | |
Based on: | |
1. https://huggingface.co/spaces/llamaindex/llama_index_vector_demo | |
2. https://github.com/logan-markewich/llama_index_starter_pack/blob/main/streamlit_term_definition/ | |
TODO: | |
- document upload | |
- customize to other [LLMs](https://gpt-index.readthedocs.io/en/latest/reference/llm_predictor.html#llama_index.llm_predictor.LLMPredictor) | |
- canned questions | |
''' | |
import os | |
import streamlit as st | |
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, ServiceContext, LLMPredictor, PromptHelper | |
from llama_index import StorageContext, load_index_from_storage | |
from langchain import OpenAI, HuggingFaceHub | |
import app_constants | |
index_fpath = "./index.json" | |
documents_folder = "./documents" | |
if "dummy" not in st.session_state: | |
st.session_state["dummy"] = "dummy" | |
#st makes this globally available for all users and sessions | |
def initialize_index(index_name, documents_folder): | |
""" | |
creates an index of the documents in the folder | |
if the index exists, skipped | |
""" | |
# set maximum input size | |
max_input_size = 4096 | |
# set number of output tokens | |
num_outputs = 2000 | |
# set maximum chunk overlap | |
max_chunk_overlap = 20 | |
# set chunk size limit | |
chunk_size_limit = 600 | |
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0.5, model_name="text-davinci-003", max_tokens=num_outputs)) | |
#wishlist: alternatives | |
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor) | |
if os.path.exists(index_name): | |
storage_context = StorageContext.from_defaults(persist_dir=index_fpath) | |
doc_index = load_index_from_storage(service_context=service_context, storage_context=storage_context) | |
else: | |
#st.info("Updating the document index") | |
prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit) | |
documents = SimpleDirectoryReader(documents_folder).load_data() | |
doc_index = GPTVectorStoreIndex.from_documents( | |
documents, llm_predictor=llm_predictor, prompt_helper=prompt_helper, | |
chunk_size_limit=512, service_context=service_context | |
) | |
doc_index.storage_context.persist(index_fpath) | |
return doc_index | |
#st returns data that's available for future caller | |
def query_index(_index, query_text): | |
query_engine = _index.as_query_engine() | |
response = query_engine.query(query_text) | |
#response = _index.query(query_text) | |
return str(response) | |
#page format is directly written her | |
st.title("LLM scanner") | |
st.markdown( | |
( | |
"This app allows you to query documents!\n\n" | |
"Powered by [Llama Index](https://gpt-index.readthedocs.io/en/latest/index.html) and supporting multiple LLMs" | |
) | |
) | |
setup_tab, query_tab = st.tabs( | |
["Setup", "Query"] | |
) | |
with setup_tab: | |
st.subheader("LLM Setup") | |
api_key = st.text_input("Enter your OpenAI API key here", type="password") | |
#wishlist llm_name = st.selectbox( | |
# "Which LLM?", ["text-davinci-003", "gpt-3.5-turbo", "gpt-4"] | |
#) | |
#repo_id = "google/flan-t5-xl" # See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options | |
#llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature":0, "max_length":64}) | |
#model_temperature = st.slider( | |
# "LLM Temperature", min_value=0.0, max_value=1.0, step=0.1 | |
#) | |
with query_tab: | |
st.subheader("Query Tab") | |
st.write("Enter a query about the included documents. Find [documentation here](https://huggingface.co/spaces/agutfraind/llmscanner)") | |
doc_index = None | |
#api_key = st.text_input("Enter your OpenAI API key here:", type="password") | |
if api_key: | |
os.environ['OPENAI_API_KEY'] = api_key | |
doc_index = initialize_index(index_fpath, documents_folder) | |
if doc_index is None: | |
st.warning("Please enter your api key first.") | |
text = st.text_input("Query text:", value="What did the author do growing up?") | |
if st.button("Run Query") and text is not None: | |
response = query_index(doc_index, text) | |
st.markdown(response) | |
llm_col, embed_col = st.columns(2) | |
with llm_col: | |
st.markdown(f"LLM Tokens Used: {doc_index.service_context.llm_predictor._last_token_usage}") | |
with embed_col: | |
st.markdown(f"Embedding Tokens Used: {doc_index.service_context.embed_model._last_token_usage}") | |