Spaces:
Running
Running
File size: 3,994 Bytes
b4b5bdf e9698e9 e0e448c e9698e9 3fcc7da b4b5bdf 51727c4 e9698e9 e0e448c e9698e9 1203b67 e9698e9 2785052 69a190d 2785052 e0e448c e9698e9 3fcc7da e0e448c e9698e9 3fcc7da e9698e9 3fcc7da e9698e9 3fcc7da e0e448c e9698e9 b4b5bdf e9698e9 a3a378d e9698e9 3fcc7da e9698e9 e0e448c e9698e9 e0e448c e9698e9 7710388 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import logging
import os
from typing import Optional
import gradio as gr
import pandas as pd
from buster.completers import Completion
import cfg
from cfg import setup_buster
buster = setup_buster(cfg.buster_cfg)
# suppress httpx logs they are spammy and uninformative
logging.getLogger("httpx").setLevel(logging.WARNING)
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
AVAILABLE_SOURCES = ["towardsai", "wikipedia", "langchain_course"]
def format_sources(matched_documents: pd.DataFrame) -> str:
if len(matched_documents) == 0:
return ""
documents_answer_template: str = "π Here are the sources I used to answer your question:\n\n{documents}\n\n{footnote}"
document_template: str = "[π {document.title}]({document.url}), relevance: {document.similarity_to_answer:2.1f} %"
matched_documents.similarity_to_answer = (
matched_documents.similarity_to_answer * 100
)
# drop duplicates, keep highest ranking ones
matched_documents = matched_documents.sort_values(
"similarity_to_answer", ascending=False
).drop_duplicates("title", keep="first")
documents = "\n".join(
[
document_template.format(document=document)
for _, document in matched_documents.iterrows()
]
)
footnote: str = "I'm a bot π€ and not always perfect."
return documents_answer_template.format(documents=documents, footnote=footnote)
def add_sources(history, completion):
if completion.answer_relevant:
formatted_sources = format_sources(completion.matched_documents)
history.append([None, formatted_sources])
return history
def user(user_input, history):
"""Adds user's question immediately to the chat."""
return "", history + [[user_input, None]]
def get_empty_source_completion(user_input):
return Completion(
user_input=user_input,
answer_text="You have to select at least one source from the dropdown menu.",
matched_documents=pd.DataFrame(),
error=False,
)
def get_answer(history, sources: Optional[list[str]] = None):
user_input = history[-1][0]
if len(sources) == 0:
completion = get_empty_source_completion(user_input)
else:
completion = buster.process_input(user_input, sources=sources)
history[-1][1] = ""
for token in completion.answer_generator:
history[-1][1] += token
yield history, completion
block = gr.Blocks()
with block:
with gr.Row():
gr.Markdown(
"<h3><center>Buster π€: A Question-Answering Bot for your documentation</center></h3>"
)
source_selection = gr.Dropdown(
choices=AVAILABLE_SOURCES,
label="Select Sources",
value=AVAILABLE_SOURCES,
multiselect=True,
)
chatbot = gr.Chatbot()
with gr.Row():
question = gr.Textbox(
label="What's your question?",
placeholder="Ask a question to AI stackoverflow here...",
lines=1,
)
submit = gr.Button(value="Send", variant="secondary")
examples = gr.Examples(
examples=cfg.example_questions,
inputs=question,
)
gr.Markdown(
"This application uses ChatGPT to search the docs for relevant info and answer questions. "
"\n\n### Powered by [Buster π€](www.github.com/jerpint/buster)"
)
response = gr.State()
submit.click(user, [question, chatbot], [question, chatbot], queue=False).then(
get_answer, inputs=[chatbot, source_selection], outputs=[chatbot, response]
).then(add_sources, inputs=[chatbot, response], outputs=[chatbot])
question.submit(user, [question, chatbot], [question, chatbot], queue=False).then(
get_answer, inputs=[chatbot, source_selection], outputs=[chatbot, response]
).then(add_sources, inputs=[chatbot, response], outputs=[chatbot])
block.queue(concurrency_count=16)
block.launch(debug=True, share=False)
|