Spaces:

mohcineelharras
/

llama-index-docs-spaces

Sleeping

File size: 10,321 Bytes

e6e7a99

# --------------------------------libraries-----------------------------------

import streamlit as st
#import torch
import os
import logging
import sys
from llama_index.callbacks import CallbackManager, LlamaDebugHandler
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
from llama_index.embeddings import InstructorEmbedding
from llama_index import ServiceContext, VectorStoreIndex, SimpleDirectoryReader
from tqdm.notebook import tqdm
from dotenv import load_dotenv

# --------------------------------env variables-----------------------------------

# Load environment variables
load_dotenv(dotenv_path=".env")

no_proxy = os.getenv("no_proxy")
OPENAI_API_KEY =  os.getenv("OPENAI_API_KEY")
OPENAI_API_BASE = os.getenv("OPENAI_API_BASE")

# --------------------------------cache LLM-----------------------------------

logging.basicConfig(stream=sys.stdout, level=logging.INFO)  
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])
# LLM
@st.cache_resource
def load_llm_model():
    if not os.path.exists("models"):
        st.error("models directory does not exist. Please download and copy paste a model in folder models.")
        os.makedirs("models")
        return None  # 
    llm = LlamaCPP(
        #model_url="https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q5_K_M.gguf",
        model_path="models/dolphin-2.1-mistral-7b.Q4_K_S.gguf",
        temperature=0.0,
        max_new_tokens=100,
        context_window=1024,  
        generate_kwargs={},
        model_kwargs={"n_gpu_layers": 20}, 
        messages_to_prompt=messages_to_prompt,
        completion_to_prompt=completion_to_prompt,
        verbose=True,
    )
    return llm

llm = load_llm_model()

# --------------------------------cache Embedding model-----------------------------------

@st.cache_resource
def load_emb_model():
    if not os.path.exists("data"):
        st.error("Data directory does not exist. Please upload the data.")
        os.makedirs("data")
        return None  # 
    embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base"
        #model_name="hkunlp/instructor-base"
        )
    service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm)
    documents = SimpleDirectoryReader("data").load_data()
    print(f"Number of documents: {len(documents)}")
    index = VectorStoreIndex.from_documents(
        documents, service_context=service_context, show_progress=True)
    return index.as_query_engine()

query_engine = load_emb_model()

# ------------------------------------layout----------------------------------------

with st.sidebar:
    api_server_info = st.text_input("Local LLM API server", OPENAI_API_BASE ,key="openai_api_base")
    st.title("🤖 Llama Index 📚")
    if st.button('Clear Memory'):
        st.session_state.memory = ""
    st.write("Local LLM API server in this demo is useles, we are loading local model using llama_index integration of llama cpp")
    st.write("🚀 This app allows you to chat with local LLM using api server or loaded in cache")
    st.subheader("💻 System Requirements: ")
    st.markdown("- CPU: the faster the better ")
    st.markdown("- RAM: 16 GB or higher")
    st.markdown("- GPU: optional but very useful for Cuda acceleration")
    st.subheader("Developer Information:")
    st.write("This app is developed and maintained by **@mohcineelharras**")

# Define your app's tabs
tab1, tab2, tab3 = st.tabs(["LLM only", "LLM RAG QA with database", "One single document Q&A"])

# -----------------------------------LLM only--------------------------------------------- 
if 'memory' not in st.session_state:
    st.session_state.memory = ""
#token_count = 0
with tab1:
    st.title("💬 LLM only")
    prompt = st.text_input(
        "Ask your question here",
        placeholder="Who is Lionel Messi",
    )
    template = (
        "system\n"
        "You are Dolphin, a helpful AI assistant. Your responses should be based solely on the content of documents you have access to. "
        "Do not provide information that is not contained in the documents. "
        "If a question is asked about content not in the documents, respond with 'I do not have that information.' "
        "Always respond in the same language as the question was asked. Be concise.\n"
        "user\n"
        "{prompt}\n"
        "assistant\n"
    )
    if prompt:
        contextual_prompt = st.session_state.memory + "\n" + prompt
        formatted_prompt = template.format(prompt=contextual_prompt)

        response = llm.complete(formatted_prompt,max_tokens=100, temperature=0, top_p=0.95, top_k=10)
        #print(response)
        text_response = response
        #---------------------------------------------
        # text_response = response["choices"][0]["text"]
        # token_count += response["usage"]["total_tokens"]
        # st.write("LLM's Response:\n", text_response)
        # st.write("Token count:\n", token_count)
        #---------------------------------------------
        st.write("LLM's Response:\n",text_response)
        st.session_state.memory = f"Prompt: {contextual_prompt}\nResponse:\n {text_response}"
        #st.write("Memory:\n", memory)
        with open("short_memory.txt", 'w') as file:
            file.write(st.session_state.memory)

# -----------------------------------LLM Q&A-------------------------------------------------    

with tab2:
    st.title("💬 LLM RAG QA with database")
    st.write("To consult files that are available in the database, go to https://huggingface.co/spaces/mohcineelharras/llama-index-docs-spaces/blob/main/data")
    prompt = st.text_input(
        "Ask your question here",
        placeholder="How does the blockchain work ?",
    )
    if prompt:
        response = query_engine.query(prompt)
        st.write("Your prompt: ", prompt)
        st.write("LLM's Response:\n"+ response.response)
        with st.expander("Document Similarity Search"):
            for i, node in enumerate(response.source_nodes):
                dict_source_i = node.node.metadata
                dict_source_i.update({"Text":node.node.text})
                st.write("Source n°"+str(i+1), dict_source_i)
                st.write()

# -----------------------------------Upload File Q&A-----------------------------------------

def load_emb_uploaded_document(filename):
    # You may want to add a check to prevent execution during initialization.
    if 'init' in st.session_state:
        embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base")
        service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm)
        documents = SimpleDirectoryReader(input_files=[filename]).load_data()
        index = VectorStoreIndex.from_documents(
            documents, service_context=service_context, show_progress=True)
        return index.as_query_engine()
    return None

with tab3:
    st.title("📝 One single document Q&A with Llama Index using local open llms")
    uploaded_file = st.file_uploader("Upload an File", type=("txt", "csv", "md","pdf"))
    question = st.text_input(
        "Ask something about the files",
        placeholder="Can you give me a short summary?",
        disabled=not uploaded_file,
    )

    if 'init' not in st.session_state:
        st.session_state.init = True

    if uploaded_file:
        if not os.path.exists("draft_docs"):
            st.error("draft_docs directory does not exist. Please download and copy paste a model in folder models.")
            os.makedirs("draft_docs")

        with open("draft_docs/"+uploaded_file.name, "wb") as f:
            text = uploaded_file.read()
            f.write(text)
        text = uploaded_file.read()
        # if load_emb_uploaded_document:
        #     load_emb_uploaded_document.clear()
        #load_emb_uploaded_document.clear()
        query_engine = load_emb_uploaded_document("draft_docs/"+uploaded_file.name)
        st.write("File ",uploaded_file.name, "was loaded successfully")

    if uploaded_file and question and api_server_info:
        response = prompt = f"""Based on the context presented. Respond to the question below to the best of your ability.
        \n\n{question}"""
        response = query_engine.query(prompt)
        st.write("### Answer")
        st.write(response.response)
        with st.expander("Document Similarity Search"):
            #st.write(len(response.source_nodes))
            for i, node in enumerate(response.source_nodes):
                dict_source_i = node.node.metadata
                dict_source_i.update({"Text":node.node.text})
                st.write("Source n°"+str(i+1), dict_source_i)
                #st.write("Source n°"+str(i))
                #st.write("Meta Data :", node.node.metadata)
                #st.write("Text :", node.node.text)
                #st.write()
    #print("Is File uploaded : ",uploaded_file==True, "Is question asked : ", question==True, "Is question asked : ", api_server_info==True)

st.markdown("""
<div style="text-align: center; margin-top: 20px;">
    <a href="https://github.com/mohcineelharras/llama-index-docs" target="_blank" style="margin: 10px; display: inline-block;">
        <img src="https://img.shields.io/badge/Repository-333?logo=github&style=for-the-badge" alt="Repository" style="vertical-align: middle;">
    </a>
    <a href="https://www.linkedin.com/in/mohcine-el-harras" target="_blank" style="margin: 10px; display: inline-block;">
        <img src="https://img.shields.io/badge/-LinkedIn-0077B5?style=for-the-badge&logo=linkedin" alt="LinkedIn" style="vertical-align: middle;">
    </a>
    <a href="https://mohcineelharras.github.io" target="_blank" style="margin: 10px; display: inline-block;">
        <img src="https://img.shields.io/badge/Visit-Portfolio-9cf?style=for-the-badge" alt="GitHub" style="vertical-align: middle;">
    </a>
</div>
<div style="text-align: center; margin-top: 20px; color: #666; font-size: 0.85em;">
    © 2023 Mohcine EL HARRAS
</div>
""", unsafe_allow_html=True)


# -----------------------------------end-----------------------------------------