File size: 2,049 Bytes
88b4edc
 
 
 
 
 
 
45d5210
88b4edc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88e61ea
88b4edc
88e61ea
 
88b4edc
 
62b355e
 
 
88b4edc
45d5210
 
 
 
 
 
 
 
 
 
62b355e
 
88b4edc
88e61ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45d5210
 
 
 
 
88b4edc
62b355e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# This is a sample Python script.

# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.

import gradio as gr
import os
import pandas as pd

from reranker.reranker import CrossEncReranker
from retriever.es_retriever import ESRetriever
from utils.preprocessing import question_to_statement


ES_HOST = os.environ["ES_HOST"]
ES_INDEX_NAME = os.environ["ES_INDEX_NAME"]
ES_USERNAME = os.environ["ES_USERNAME"]
ES_PASSWORD = os.environ["ES_PASSWORD"]

RERANKER_MODEL_NAME = "douglasfaisal/granularity-legal-reranker-cross-encoder-indobert-base-p2"

es_retriever_client = ESRetriever(ES_HOST, ES_INDEX_NAME, ES_USERNAME, ES_PASSWORD)
cross_enc_reranker = CrossEncReranker(RERANKER_MODEL_NAME, 512)

def retrieve_and_rerank(question: str, example: str):

    if (question == None or question == ""):
        question = example
    query = question_to_statement(question)

    try:
        retrieval_results = es_retriever_client.retrieve(query)
        reranker_results = cross_enc_reranker.rerank(query, retrieval_results)

        law_refs = [i.generate_string() for i in reranker_results]
        law_texts = [i.text for i in reranker_results]

        df = pd.DataFrame({
            'Rank': range(1, len(law_refs)+1),
            'Reference': law_refs,
            'Text': law_texts
        })

        return reranker_results[0].generate_string(), reranker_results[0].text, df
    except:
        return "-", "(Result Not Found)"

with gr.Blocks() as demo:
    with gr.Row():
        text_input = gr.Textbox()

demo = gr.Interface(
    fn=retrieve_and_rerank,
    inputs=[
        "text",
        gr.Dropdown(
            [
                "Apa yang dimaksud dengan pemberi kerja?",
                "Berapa paling lama waktu kerja lembur?",
                "Apa bentuk pendapatan non-upah?"
            ]
        )
    ],
    outputs=[
        "label",
        "text",
        "dataframe"
    ])

if __name__ == "__main__":
    demo.launch()