File size: 3,334 Bytes
a2e759c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d42f1d
a2e759c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import gradio as gr

from app_utils import evaluate_prompt, get_split

import logging

logging.basicConfig(level=logging.INFO)


with gr.Blocks(title=f"Prompting Challenge ({get_split()}") as demo:
    gr.Markdown(
        f"""
        # Prompting Challenge
        ### ({get_split()})
        """ + """
        The goal of this challenge is to prompt GPT-4 to "unscramble" a sentence.

        The input is a sentence with scrambled word order, e.g.: *"are How ? you"*
        
        GPT-4 should identify the original sentence, e.g.: *"How are you?"*

        Enter your prompt template here. Use `{% shuffled_sentence %}` at the place where you want the shuffled sentence to be inserted.
        """
    )
    
    input_text = gr.Textbox(
        lines=10, 
        label="Prompt Template", 
        value="Unscramble the following sentence: {% shuffled_sentence %}"
    )
    submit_button = gr.Button("Submit")
    results_output = gr.HTML(label="Results")

    def update_results(prompt):
        result_tuples = list(evaluate_prompt(prompt))
        if result_tuples:
            total_score = sum(item_score for _, _, _, item_score in result_tuples)
            score = total_score / len(result_tuples)
        else:
            score = 0
        html_output = "<dl style='font-family: Arial, sans-serif;'>"
        html_output += f"<h2 style='color: #333; margin-top: 20px; margin-bottom: 20px;'>Accuracy: {100 * score:.1f}%</h2>"
        newline = '\n'
        for index, (original, prompt, response, item_score) in enumerate(result_tuples, 1):
            background_color = "#fff4ea" if item_score < 0.5 else "#e4ffe4" if item_score > 0.9 else "whitesmoke"
            html_output += f"""
                <div style='background-color: {background_color}; padding: 10px; margin-bottom: 20px;'>
                    <h3 style='color: #333; margin-top: 0;'>Test item #{index}</h3>
                    <dt style='padding: 5px;'>
                        <span style='font-weight: 600;'>Original Sentence:</span>
                    </dt>
                    <dd style='margin-left: 20px; padding: 5px;'>{original.replace(newline, "<br>")}</dd>
                    
                    <dt style='padding: 5px;'>
                        <span style='font-weight: 600;'>Prompt:</span>
                    </dt>
                    <dd style='margin-left: 20px; padding: 5px;'>{prompt.replace(newline, "<br>")}</dd>
                    
                    <dt style='padding: 5px;'>
                        <span style='font-weight: 600;'>Response by GPT-4:</span>
                    </dt>
                    <dd style='margin-left: 20px; padding: 5px;font-style: italic;'>{response.replace(newline, "<br>")}</dd>
                    <dt style='padding: 5px;'>
                        <span style='font-weight: 600;'>Score:</span>
                    </dt>
                    <dd style='margin-left: 20px; padding: 5px;'>
                        <span style='color: #333;'>{100 * item_score:.1f}%</span>
                    </dd>
                </div>
            """
        html_output += "</dl>"
        return html_output

    submit_button.click(
        fn=update_results,
        inputs=[input_text],
        outputs=[results_output]
    )

if __name__ == "__main__":
    demo.launch()
    # demo.launch(share=True)