File size: 4,633 Bytes
863df0d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
'''
LLM scanner streamlit app

streamlit run .\app.py

Functionality
- tokenize documents
- respond to queries
- generate new documents

Based on: 
1. https://huggingface.co/spaces/llamaindex/llama_index_vector_demo
2. https://github.com/logan-markewich/llama_index_starter_pack/blob/main/streamlit_term_definition/


TODO:
- document upload
- customize to other [LLMs](https://gpt-index.readthedocs.io/en/latest/reference/llm_predictor.html#llama_index.llm_predictor.LLMPredictor) 
- canned questions

'''

import os
import streamlit as st
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, ServiceContext, LLMPredictor, PromptHelper
from llama_index import StorageContext, load_index_from_storage

from langchain import OpenAI, HuggingFaceHub

import app_constants

index_fpath = "./index.json"
documents_folder = "./documents"

if "dummy" not in st.session_state:
    st.session_state["dummy"] = "dummy"

@st.cache_resource  #st makes this globally available for all users and sessions 
def initialize_index(index_name, documents_folder):
    """
    creates an index of the documents in the folder
    if the index exists, skipped
    """
    # set maximum input size
    max_input_size = 4096
    # set number of output tokens
    num_outputs = 2000
    # set maximum chunk overlap
    max_chunk_overlap = 20
    # set chunk size limit
    chunk_size_limit = 600 

    llm_predictor = LLMPredictor(llm=OpenAI(temperature=0.5, model_name="text-davinci-003", max_tokens=num_outputs))
    
    #wishlist: alternatives
    service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)
    if os.path.exists(index_name):
        storage_context = StorageContext.from_defaults(persist_dir=index_fpath)
        doc_index = load_index_from_storage(service_context=service_context, storage_context=storage_context)
    else:
        #st.info("Updating the document index")
        prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit)
 
        documents = SimpleDirectoryReader(documents_folder).load_data()
        doc_index = GPTVectorStoreIndex.from_documents(
            documents, llm_predictor=llm_predictor, prompt_helper=prompt_helper, 
            chunk_size_limit=512, service_context=service_context
        )
        doc_index.storage_context.persist(index_fpath)

    return doc_index

#st returns data that's available for future caller
@st.cache_data(max_entries=200, persist=True)  
def query_index(_index, query_text):
    query_engine = _index.as_query_engine()
    response = query_engine.query(query_text)
    #response = _index.query(query_text)
    return str(response)


#page format is directly written her
st.title("LLM scanner")
st.markdown(
    (
        "This app allows you to query documents!\n\n"
        "Powered by [Llama Index](https://gpt-index.readthedocs.io/en/latest/index.html) and supporting multiple LLMs"
    )
)

setup_tab, query_tab = st.tabs(
    ["Setup", "Query"]
)

with setup_tab:
    st.subheader("LLM Setup")
    api_key = st.text_input("Enter your OpenAI API key here", type="password")
    #wishlist llm_name = st.selectbox(
    #    "Which LLM?", ["text-davinci-003", "gpt-3.5-turbo", "gpt-4"]
    #)
    #repo_id = "google/flan-t5-xl" # See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options
    #llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature":0, "max_length":64})
    
    #model_temperature = st.slider(
    #    "LLM Temperature", min_value=0.0, max_value=1.0, step=0.1
    #)


with query_tab:
    st.subheader("Query Tab")
    st.write("Enter a query about the included documents. Find [documentation here](https://huggingface.co/spaces/agutfraind/llmscanner)")

    doc_index = None
    #api_key = st.text_input("Enter your OpenAI API key here:", type="password")
    if api_key:
        os.environ['OPENAI_API_KEY'] = api_key
        doc_index = initialize_index(index_fpath, documents_folder)    


    if doc_index is None:
        st.warning("Please enter your api key first.")

    text = st.text_input("Query text:", value="What did the author do growing up?")

    if st.button("Run Query") and text is not None:
        response = query_index(doc_index, text)
        st.markdown(response)
        
        llm_col, embed_col = st.columns(2)
        with llm_col:
            st.markdown(f"LLM Tokens Used: {doc_index.service_context.llm_predictor._last_token_usage}")
        
        with embed_col:
            st.markdown(f"Embedding Tokens Used: {doc_index.service_context.embed_model._last_token_usage}")