File size: 3,170 Bytes
2df9f11
 
81766be
2df9f11
e8158c1
 
2df9f11
 
 
 
 
 
 
e8158c1
 
 
 
 
 
 
 
 
 
2df9f11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e8158c1
2df9f11
 
 
 
e8158c1
 
 
2df9f11
 
 
 
 
 
 
 
 
 
 
81766be
2df9f11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e8158c1
 
 
 
 
 
 
 
 
2df9f11
 
 
 
e8158c1
2df9f11
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import gradio as gr
from green_score import GREEN
import spaces


@spaces.GPU(duration=120)  # Add the GPU decorator for functions that need GPU access
def run_green(ref_text, hyp_text, model_name="StanfordAIMI/GREEN-radllama2-7b"):
    refs = [ref_text.strip()]
    hyps = [hyp_text.strip()]

    green_scorer = GREEN(model_name, output_dir=".")
    mean, std, green_score_list, summary, result_df = green_scorer(refs, hyps)

    green_analysis_text = result_df["green_analysis"].iloc[0]

    # Prepare data for the nx2 table
    table_data = []
    for key, value in result_df.iloc[0].to_dict().items():
        if key not in ["reference", "predictions", "green_analysis"]:
            table_data.append([key, value])

    return green_analysis_text, table_data


# Example pairs
examples = {
    "Example 1": {
        "ref": "Interstitial opacities without changes.",
        "hyp": "Interstitial opacities at bases without changes.",
    },
    "Example 2": {
        "ref": "The heart size is normal. Lungs are clear without any infiltrates.",
        "hyp": "The heart size is mildly enlarged. Mild infiltrates in the left upper lobe.",
    },
    "Example 3": {
        "ref": "Lung volumes are low, causing bronchovascular crowding. The cardiomediastinal silhouette is unremarkable.",
        "hyp": "Endotracheal tubes have been removed. Pulmonary aeration is slightly improved with no pleural effusions.",
    }
}


def update_fields(choice):
    if choice == "Custom":
        return gr.update(value="", interactive=True), gr.update(value="", interactive=True)
    else:
        return gr.update(value=examples[choice]["ref"], interactive=False), gr.update(value=examples[choice]["hyp"],
                                                                                      interactive=False)


with gr.Blocks(title="GREEN Score Evaluation Demo") as demo:
    gr.Markdown("# GREEN Score Evaluation Demo")
    gr.Markdown(
        "This demo evaluates a single pair of reference and hypothesis reports to compute the GREEN score."
    )

    with gr.Row():
        choice = gr.Radio(
            label="Choose Input Type",
            choices=["Custom"] + list(examples.keys()),
            value="Custom",
            interactive=True
        )

    ref_input = gr.Textbox(
        label="Reference Report",
        lines=3
    )
    hyp_input = gr.Textbox(
        label="Hypothesis Report",
        lines=3
    )

    choice.change(
        update_fields,
        inputs=choice,
        outputs=[ref_input, hyp_input],
    )

    model_name_input = gr.Textbox(
        label="Model Name",
        value="StanfordAIMI/GREEN-radllama2-7b",
        placeholder="Enter the HuggingFace model name"
    )

    run_button = gr.Button("Compute GREEN Score")
    green_analysis_output = gr.Textbox(
        label="GREEN Analysis",
        lines=10,
        interactive=False
    )
    table_output = gr.DataFrame(
        label="Scores and Errors",
        headers=["Metric", "Value"]
    )

    run_button.click(
        run_green,
        inputs=[ref_input, hyp_input, model_name_input],
        outputs=[green_analysis_output, table_output]
    )

demo.launch()