File size: 3,134 Bytes
89dc8b2
 
 
1132b50
89dc8b2
e960d63
6c9d07b
718e159
89dc8b2
 
 
 
8f9ef95
91bb24e
89dc8b2
 
8f9ef95
 
718e159
6c9d07b
 
 
8f9ef95
 
718e159
 
6c9d07b
 
67bfb80
8f9ef95
 
 
1132b50
 
718e159
 
 
1132b50
 
8f9ef95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6323bc8
 
8f9ef95
78aafcc
e960d63
8f9ef95
 
 
718e159
 
8f9ef95
6323bc8
 
 
 
 
718e159
8f9ef95
6323bc8
 
 
 
 
8f9ef95
 
6323bc8
e960d63
6323bc8
e960d63
 
89dc8b2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from fastapi import FastAPI
from pydantic import BaseModel
import faq as faq
import util as util
import uvicorn
import gradio as gr
from typing import List, Optional
from fastapi.responses import JSONResponse

app = FastAPI()


class Request(BaseModel):
    question: Optional[str] = "?"
    sheet_url: str
    page_content_column: str
    k: Optional[int] = 20
    reload_collection: Optional[bool] = False
    id_column: Optional[str] = None
    synonyms: Optional[List[List[str]]] = None


@app.post("/api")
async def post_api(request: Request) -> JSONResponse:
    if request.id_column is not None:
        util.SPLIT_PAGE_BREAKS = True
    if request.synonyms is not None:
        util.SYNONYMS = request.synonyms
    vectordb = faq.load_vectordb(request.sheet_url, request.page_content_column)
    if request.reload_collection:
        faq.delete_vectordb_current_collection(vectordb)
        vectordb = faq.load_vectordb(request.sheet_url, request.page_content_column)
    documents = faq.similarity_search(vectordb, request.question, k=request.k)
    df_doc = util.transform_documents_to_dataframe(documents)
    if request.id_column is not None:
        df_doc = util.remove_duplicates_by_column(df_doc, request.id_column)
    return JSONResponse(util.dataframe_to_dict(df_doc))


@app.put("/api")
async def put_api(request: Request) -> bool:
    success = False
    if request.id_column is not None:
        util.SPLIT_PAGE_BREAKS = True
    if request.synonyms is not None:
        util.SYNONYMS = request.synonyms
    vectordb = faq.load_vectordb(request.sheet_url, request.page_content_column)
    if request.reload_collection:
        faq.delete_vectordb_current_collection(vectordb)
        vectordb = faq.load_vectordb(request.sheet_url, request.page_content_column)
        success = True
    return success


@app.delete("/api")
async def delete_vectordb_api() -> None:
    faq.delete_vectordb()


def ask(sheet_url: str, page_content_column: str, k: int, reload_collection: bool, question: str):
    util.SPLIT_PAGE_BREAKS = False
    vectordb = faq.load_vectordb(sheet_url, page_content_column)
    if reload_collection:
        faq.delete_vectordb_current_collection(vectordb)
        vectordb = faq.load_vectordb(sheet_url, page_content_column)
    documents = faq.similarity_search(vectordb, question, k=k)
    df_doc = util.transform_documents_to_dataframe(documents)
    return util.dataframe_to_dict(df_doc), gr.Checkbox.update(False)


with gr.Blocks() as block:
    sheet_url = gr.Textbox(label="Google Sheet URL")
    page_content_column = gr.Textbox(label="Question Column")
    k = gr.Slider(1, 30, step=1, label="K")
    reload_collection = gr.Checkbox(label="Reload Collection?")
    question = gr.Textbox(label="Question")
    ask_button = gr.Button("Ask")
    answer_output = gr.JSON(label="Answer")
    ask_button.click(
        ask,
        inputs=[sheet_url, page_content_column, k, reload_collection, question],
        outputs=[answer_output, reload_collection]
    )

app = gr.mount_gradio_app(app, block, path="/")


if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)