rag2

Paused

App Files Files Community

AlexanderKazakov commited on Mar 14, 2024

Commit

a37b98a

1 Parent(s): 0ae385b

fix layout

Browse files

Files changed (2) hide show

gradio_app/app.py +14 -157
gradio_app/backend/query_llm.py +2 -2

gradio_app/app.py CHANGED Viewed

@@ -1,48 +1,13 @@
-"""
-Credit to Derek Thomas, [email protected]
-"""
-# import subprocess
-# subprocess.run(["pip", "install", "--upgrade", "transformers[torch,sentencepiece]==4.34.1"])
-import logging
 from time import perf_counter
 import gradio as gr
-import markdown
-# import lancedb
-from jinja2 import Environment, FileSystemLoader
-from gradio_app.backend.ChatGptInteractor import num_tokens_from_messages
-from gradio_app.backend.cross_encoder import rerank_with_cross_encoder
 from gradio_app.backend.query_llm import *
-from gradio_app.backend.embedders import EmbedderFactory
-from settings import *
-# Setting up the logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Set up the template environment with the templates directory
-env = Environment(loader=FileSystemLoader('gradio_app/templates'))
-# Load the templates directly from the environment
-context_template = env.get_template('context_template.j2')
-context_html_template = env.get_template('context_html_template.j2')
-# db = lancedb.connect(LANCEDB_DIRECTORY)
-db = None
-# Examples
-examples = [
-    'What is BERT?',
-    'Tell me about GPT',
-    'How to use accelerate in google colab?',
-    'What is the capital of China?',
-    'Why is the sky blue?',
-]
 def add_text(history, text):
     history = [] if history is None else history
@@ -50,65 +15,14 @@ def add_text(history, text):
     return history, gr.Textbox(value="", interactive=False)
-def find_context(query, cross_enc, chunk, embed):
-    logger.info('Retrieving documents...')
-    gr.Info('Start documents retrieval ...')
-    t = perf_counter()
-    table_name = f'{LANCEDB_TABLE_NAME}_{chunk}_{embed}'
-    table = db.open_table(table_name)
-    embedder = EmbedderFactory.get_embedder(embed)
-    query_vec = embedder.embed([query])[0]
-    documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME)
-    top_k_rank = TOP_K_RANK if cross_enc is not None else TOP_K_RERANK
-    documents = documents.limit(top_k_rank).to_list()
-    thresh_dist = thresh_distances[embed]
-    thresh_dist = max(thresh_dist, min(d['_distance'] for d in documents))
-    documents = [d for d in documents if d['_distance'] <= thresh_dist]
-    documents = [doc[TEXT_COLUMN_NAME] for doc in documents]
-    t = perf_counter() - t
-    logger.info(f'Finished Retrieving documents in {round(t, 2)} seconds...')
-    logger.info('Reranking documents...')
-    gr.Info('Start documents reranking ...')
-    t = perf_counter()
-    documents = rerank_with_cross_encoder(cross_enc, documents, query)
-    t = perf_counter() - t
-    logger.info(f'Finished Reranking documents in {round(t, 2)} seconds...')
-    return documents
-def construct_messages(llm, documents, history):
-    msg_constructor = get_message_constructor(llm)
-    while len(documents) != 0:
-        context = context_template.render(documents=documents)
-        documents_html = [markdown.markdown(d) for d in documents]
-        context_html = context_html_template.render(documents=documents_html)
-        messages = msg_constructor(context, history)
-        num_tokens = num_tokens_from_messages(messages, 'gpt-3.5-turbo')  # todo for HF, it is approximation
-        if num_tokens + 512 < context_lengths[llm]:
-            break
-        documents.pop()
-    else:
-        raise gr.Error('Model context length exceeded, reload the page')
-    return documents, context_html, messages
-def bot(history, llm, cross_enc, chunk, embed):
     history[-1][1] = ""
     query = history[-1][0]
     if not query:
         raise gr.Error("Empty string was submitted")
-    # documents = find_context(query, cross_enc, chunk, embed)
-    # documents, context_html, messages = construct_messages(llm, documents, history)
-    context_html = ''
     messages = get_message_constructor(llm)('', history)
     llm_gen = get_llm_generator(llm)
@@ -116,7 +30,7 @@ def bot(history, llm, cross_enc, chunk, embed):
     t = perf_counter()
     for part in llm_gen(messages):
         history[-1][1] += part
-        yield history, context_html
     else:
         t = perf_counter() - t
         logger.info(f'Finished Generating answer in {round(t, 2)} seconds...')
@@ -133,79 +47,22 @@ with gr.Blocks() as demo:
                 bubble_full_width=False,
                 show_copy_button=True,
                 show_share_button=True,
-                height=500,
             )
-            with gr.Row():
-                input_textbox = gr.Textbox(
-                    scale=3,
-                    show_label=False,
-                    placeholder="Enter text and press enter",
-                    container=False,
-                )
-                txt_btn = gr.Button(value="Submit text", scale=1)
-            chunk_name = gr.Radio(
-                choices=[
-                    "md",
-                    "txt",
-                ],
-                value="md",
-                label='Chunking policy'
-            )
-            embed_name = gr.Radio(
-                choices=[
-                    "text-embedding-ada-002",
-                    "sentence-transformers/all-MiniLM-L6-v2",
-                ],
-                value="text-embedding-ada-002",
-                label='Embedder'
-            )
-            cross_enc_name = gr.Radio(
-                choices=[
-                    None,
-                    "cross-encoder/ms-marco-TinyBERT-L-2-v2",
-                    "cross-encoder/ms-marco-MiniLM-L-12-v2",
-                ],
-                value=None,
-                label='Cross-Encoder'
-            )
-            llm_name = gr.Radio(
-                choices=[
-                    "gpt-4-1106-preview",
-                    "gpt-4",
-                    "gpt-3.5-turbo-1106",
-                    "gpt-3.5-turbo",
-                    "mistralai/Mistral-7B-Instruct-v0.1",
-                    "tiiuae/falcon-180B-chat",
-                    # "GeneZC/MiniChat-3B",
-                ],
-                value="gpt-4-1106-preview",
-                label='LLM'
-            )
-            # Examples
-            gr.Examples(examples, input_textbox)
         with gr.Column():
-            context_html = gr.HTML()
-    # Turn off interactivity while generating if you click
-    txt_msg = txt_btn.click(
-        add_text, [chatbot, input_textbox], [chatbot, input_textbox], queue=False
-    ).then(
-        bot, [chatbot, llm_name, cross_enc_name, chunk_name, embed_name], [chatbot, context_html]
-    )
-    # Turn it back on
-    txt_msg.then(lambda: gr.Textbox(interactive=True), None, [input_textbox], queue=False)
     # Turn off interactivity while generating if you hit enter
     txt_msg = input_textbox.submit(add_text, [chatbot, input_textbox], [chatbot, input_textbox], queue=False).then(
-        bot, [chatbot, llm_name, cross_enc_name, chunk_name, embed_name], [chatbot, context_html])
     # Turn it back on
     txt_msg.then(lambda: gr.Textbox(interactive=True), None, [input_textbox], queue=False)

 from time import perf_counter
 import gradio as gr
 from gradio_app.backend.query_llm import *
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 def add_text(history, text):
     history = [] if history is None else history
     return history, gr.Textbox(value="", interactive=False)
+def bot(history):
     history[-1][1] = ""
     query = history[-1][0]
     if not query:
         raise gr.Error("Empty string was submitted")
+    llm = 'gpt-4-turbo-preview'
     messages = get_message_constructor(llm)('', history)
     llm_gen = get_llm_generator(llm)
     t = perf_counter()
     for part in llm_gen(messages):
         history[-1][1] += part
+        yield history
     else:
         t = perf_counter() - t
         logger.info(f'Finished Generating answer in {round(t, 2)} seconds...')
                 bubble_full_width=False,
                 show_copy_button=True,
                 show_share_button=True,
+                height=800
             )
         with gr.Column():
+            input_textbox = gr.Textbox(
+                interactive=True,
+                show_label=False,
+                placeholder="Enter text and press enter",
+                container=False,
+                autofocus=True,
+                lines=40,
+                max_lines=100,
+            )
     # Turn off interactivity while generating if you hit enter
     txt_msg = input_textbox.submit(add_text, [chatbot, input_textbox], [chatbot, input_textbox], queue=False).then(
+        bot, [chatbot], [chatbot])
     # Turn it back on
     txt_msg.then(lambda: gr.Textbox(interactive=True), None, [input_textbox], queue=False)

gradio_app/backend/query_llm.py CHANGED Viewed

@@ -111,7 +111,7 @@ def construct_openai_messages(context, history):
 def get_message_constructor(llm_name):
-    if llm_name in ["gpt-4", "gpt-4-1106-preview", "gpt-3.5-turbo", "gpt-3.5-turbo-1106"]:
         return construct_openai_messages
     if llm_name in ['mistralai/Mistral-7B-Instruct-v0.1', "tiiuae/falcon-180B-chat", "GeneZC/MiniChat-3B"]:
         return construct_mistral_messages
@@ -119,7 +119,7 @@ def get_message_constructor(llm_name):
 def get_llm_generator(llm_name):
-    if llm_name in ["gpt-4", "gpt-4-1106-preview", "gpt-3.5-turbo", "gpt-3.5-turbo-1106"]:
         cgi = ChatGptInteractor(
             model_name=llm_name, stream=True,
             # max_tokens=None, temperature=0,

 def get_message_constructor(llm_name):
+    if 'gpt' in llm_name:
         return construct_openai_messages
     if llm_name in ['mistralai/Mistral-7B-Instruct-v0.1', "tiiuae/falcon-180B-chat", "GeneZC/MiniChat-3B"]:
         return construct_mistral_messages
 def get_llm_generator(llm_name):
+    if 'gpt' in llm_name:
         cgi = ChatGptInteractor(
             model_name=llm_name, stream=True,
             # max_tokens=None, temperature=0,