Spaces:

agutfraind
/

llmscanner

Runtime error

File size: 4,633 Bytes

863df0d

'''
LLM scanner streamlit app

streamlit run .\app.py

Functionality
- tokenize documents
- respond to queries
- generate new documents

Based on: 
1. https://huggingface.co/spaces/llamaindex/llama_index_vector_demo
2. https://github.com/logan-markewich/llama_index_starter_pack/blob/main/streamlit_term_definition/


TODO:
- document upload
- customize to other [LLMs](https://gpt-index.readthedocs.io/en/latest/reference/llm_predictor.html#llama_index.llm_predictor.LLMPredictor) 
- canned questions

'''

import os
import streamlit as st
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, ServiceContext, LLMPredictor, PromptHelper
from llama_index import StorageContext, load_index_from_storage

from langchain import OpenAI, HuggingFaceHub

import app_constants

index_fpath = "./index.json"
documents_folder = "./documents"

if "dummy" not in st.session_state:
    st.session_state["dummy"] = "dummy"

@st.cache_resource  #st makes this globally available for all users and sessions 
def initialize_index(index_name, documents_folder):
    """
    creates an index of the documents in the folder
    if the index exists, skipped
    """
    # set maximum input size
    max_input_size = 4096
    # set number of output tokens
    num_outputs = 2000
    # set maximum chunk overlap
    max_chunk_overlap = 20
    # set chunk size limit
    chunk_size_limit = 600 

    llm_predictor = LLMPredictor(llm=OpenAI(temperature=0.5, model_name="text-davinci-003", max_tokens=num_outputs))
    
    #wishlist: alternatives
    service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)
    if os.path.exists(index_name):
        storage_context = StorageContext.from_defaults(persist_dir=index_fpath)
        doc_index = load_index_from_storage(service_context=service_context, storage_context=storage_context)
    else:
        #st.info("Updating the document index")
        prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit)
 
        documents = SimpleDirectoryReader(documents_folder).load_data()
        doc_index = GPTVectorStoreIndex.from_documents(
            documents, llm_predictor=llm_predictor, prompt_helper=prompt_helper, 
            chunk_size_limit=512, service_context=service_context
        )
        doc_index.storage_context.persist(index_fpath)

    return doc_index

#st returns data that's available for future caller
@st.cache_data(max_entries=200, persist=True)  
def query_index(_index, query_text):
    query_engine = _index.as_query_engine()
    response = query_engine.query(query_text)
    #response = _index.query(query_text)
    return str(response)


#page format is directly written her
st.title("LLM scanner")
st.markdown(
    (
        "This app allows you to query documents!\n\n"
        "Powered by [Llama Index](https://gpt-index.readthedocs.io/en/latest/index.html) and supporting multiple LLMs"
    )
)

setup_tab, query_tab = st.tabs(
    ["Setup", "Query"]
)

with setup_tab:
    st.subheader("LLM Setup")
    api_key = st.text_input("Enter your OpenAI API key here", type="password")
    #wishlist llm_name = st.selectbox(
    #    "Which LLM?", ["text-davinci-003", "gpt-3.5-turbo", "gpt-4"]
    #)
    #repo_id = "google/flan-t5-xl" # See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options
    #llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature":0, "max_length":64})
    
    #model_temperature = st.slider(
    #    "LLM Temperature", min_value=0.0, max_value=1.0, step=0.1
    #)


with query_tab:
    st.subheader("Query Tab")
    st.write("Enter a query about the included documents. Find [documentation here](https://huggingface.co/spaces/agutfraind/llmscanner)")

    doc_index = None
    #api_key = st.text_input("Enter your OpenAI API key here:", type="password")
    if api_key:
        os.environ['OPENAI_API_KEY'] = api_key
        doc_index = initialize_index(index_fpath, documents_folder)    


    if doc_index is None:
        st.warning("Please enter your api key first.")

    text = st.text_input("Query text:", value="What did the author do growing up?")

    if st.button("Run Query") and text is not None:
        response = query_index(doc_index, text)
        st.markdown(response)
        
        llm_col, embed_col = st.columns(2)
        with llm_col:
            st.markdown(f"LLM Tokens Used: {doc_index.service_context.llm_predictor._last_token_usage}")
        
        with embed_col:
            st.markdown(f"Embedding Tokens Used: {doc_index.service_context.embed_model._last_token_usage}")