File size: 3,362 Bytes
9a7ccba
 
9beb9e3
 
9a7ccba
55e40d9
 
 
 
 
 
 
 
9a7ccba
55e40d9
 
 
 
dc15a5c
55e40d9
 
 
 
 
9beb9e3
 
 
 
 
 
 
 
 
 
 
 
 
 
55e40d9
9a7ccba
 
55e40d9
 
 
 
 
9a7ccba
 
55e40d9
 
 
 
9beb9e3
 
 
55e40d9
 
 
 
 
 
 
 
 
 
9beb9e3
55e40d9
 
 
 
 
 
 
 
 
 
dc15a5c
 
 
 
 
 
 
 
55e40d9
 
dc15a5c
55e40d9
dc15a5c
55e40d9
8fae716
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gradio as gr
from transformers import pipeline
import PyPDF2
import markdown

# Preload models
models = {
    "distilbert-base-uncased-distilled-squad": "distilbert-base-uncased-distilled-squad",
    "roberta-base-squad2": "deepset/roberta-base-squad2",
    "bert-large-uncased-whole-word-masking-finetuned-squad": "bert-large-uncased-whole-word-masking-finetuned-squad",
    "albert-base-v2": "twmkn9/albert-base-v2-squad2",
    "xlm-roberta-large-squad2": "deepset/xlm-roberta-large-squad2"
}

loaded_models = {}

def load_model(model_name):
    if model_name not in loaded_models:
        loaded_models[model_name] = pipeline("question-answering", model=models[model_name])
    return loaded_models[model_name]

def answer_question(model_name, file, question):
    model = load_model(model_name)
    
    if file is not None:
        if file.type == "application/pdf":
            pdf_reader = PyPDF2.PdfFileReader(file)
            context = ""
            for page_num in range(pdf_reader.numPages):
                context += pdf_reader.getPage(page_num).extract_text()
        elif file.name.endswith(".md"):
            context = file.read().decode('utf-8')
            context = markdown.markdown(context)
        else:
            context = file.read().decode('utf-8')
    else:
        context = ""
    
    result = model(question=question, context=context)
    answer = result['answer']
    score = result['score']
    
    # Explain score
    score_explanation = f"The confidence score ranges from 0 to 1, where a higher score indicates higher confidence in the answer's correctness. In this case, the score is {score:.2f}. A score closer to 1 implies the model is very confident about the answer."
    
    return answer, f"{score:.2f}", score_explanation

# Define the Gradio interface
with gr.Blocks() as interface:
    gr.Markdown(
        """
        # Question Answering System
        Upload a document (text, PDF, or Markdown) and ask questions to get answers based on the context.
        
        **Supported File Types**: `.txt`, `.pdf`, `.md`
        """)
    
    with gr.Row():
        model_dropdown = gr.Dropdown(
            choices=list(models.keys()),
            label="Select Model",
            value="distilbert-base-uncased-distilled-squad"
        )
    
    with gr.Row():
        file_input = gr.File(label="Upload Document", file_types=["text", "pdf", "markdown"])
        question_input = gr.Textbox(lines=2, placeholder="Enter your question here...", label="Question")
    
    with gr.Row():
        answer_output = gr.Textbox(label="Answer")
        score_output = gr.Textbox(label="Confidence Score")
        explanation_output = gr.Textbox(label="Score Explanation")
    
    with gr.Row():
        submit_button = gr.Button("Submit")
    
    # Define a status area for progress
    status = gr.Markdown(value="")

    def on_submit(model_name, file, question):
        status.update(value="Loading model...")
        answer, score, explanation = answer_question(model_name, file, question)
        status.update(value="Model loaded")
        return answer, score, explanation

    submit_button.click(
        on_submit,
        inputs=[model_dropdown, file_input, question_input],
        outputs=[answer_output, score_output, explanation_output]
    )

if __name__ == "__main__":
    interface.launch()