File size: 23,984 Bytes
3d29480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa1be51
3d29480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f152358
3d29480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f152358
3d29480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
import os
os.system('pip install dashscope')
os.system('pip install sympy')
os.system('pip install "gradio==3.40.1"')
import random
import re
from http import HTTPStatus

import dashscope
import gradio as gr
import sympy

dashscope.api_key = os.getenv('DASHSCOPE_API_KEY')

# 定义关卡信息和验证逻辑


# 辅助函数 - 检查是否为质数
def is_prime(num):
    return sympy.isprime(num)


# 辅助函数 - 获取下一个质数
def next_prime(num):
    return sympy.nextprime(num)


# 辅助函数 - 检查是否为平方数
def is_square(n):
    return sympy.sqrt(n).is_integer


# 辅助函数 - 获取平方根
def get_square_root(n):
    return int(sympy.sqrt(n))


def reverse_en(input):
    return  ' '.join(input.split()[::-1])

def validate_palindrome_invariance(origin_input, response):
    """
    验证器函数,检查对于给定的问题,正着问和倒着问的模型回答是否一致。

    :param origin_input: 原始问题
    :return: True 如果回答一致,否则 False
    """

    # 将原始问题倒序并提交
    reversed_question = reverse_en(origin_input)
    reversed_response = generate_response(reversed_question)

    # 比较两个回答是否一致
    return response.strip() == reversed_response.strip()


def is_palindrome(sentence):
    # Remove non-alphanumeric characters and convert to lowercase
    cleaned_sentence = re.sub(r'[^A-Za-z0-9]', '', sentence.lower())
    # Compare the cleaned sentence with its reverse
    return cleaned_sentence == cleaned_sentence[::-1]

def validate_palindrome_inverse(origin_input, response):
    """
    验证器函数,检查对于给定的问题,正着问和倒着问的模型的回答本身不回文且也是逆序的关系。

    :param origin_input: 原始问题
    :param response: 模型对原始问题的回答
    :param model_fn: 能够返回模型回答的函数
    :return: True 如果回答本身不回文且也是逆序关系,否则 False
    """

    # 检查原始输入不是回文
    if origin_input == is_palindrome(origin_input):
        return False

    # 将原始问题倒序并获取模型回答
    reversed_question = reverse_en(origin_input)
    reversed_response = generate_response(reversed_question).strip()

    # 检查原始回答不是回文
    if response == is_palindrome(origin_input):
        return False

    # 比较正着问和倒着问的回答是否是逆序关系
    return response == reverse_en(origin_input)


def validate_reciprocal_question(input, response):
    """
    验证器函数,检查给定的问题A和回答B,是否能够通过以B作为新的提问得到原始问题A作为回答。

    :param response: 模型对问题A的回答B
    :param input: 原始问题A
    :param model_fn: 模型调用函数,接收问题并返回回答
    :return: True 如果以回答B作为新的提问能得到问题A作为回答,否则 False
    """

    # 确保回答B不等于原始问题A
    if response == input:
        return False

    # 以回答B作为新的提问
    new_response = generate_response(response)

    # 检查新的回答是否与原始问题A相等
    return new_response.strip() == input.strip()


challenges = [
    {
        'name':
        '第一章 对话之趣',
        'problems': [
            {
                'title': '第1题 初来乍到',
                'description': '请你构造一个问题使模型的回答是一字不差的“1+1=3”(不需要引号)。',
                'validator':
                lambda response, input: response.strip() == '1+1=3'
            },
            {
                'title':
                '第2题 小试牛刀',
                'description':
                '请输入三个字以内的问题,使模型的回答在30个字以上。',
                'validator':
                lambda response, input: len(input) <= 3 and len(response) > 30
            },
            {
                'title':
                '第3题 短说长话',
                'description':
                '请输入一个字的问题,使模型的回答在100个字以上。',
                'validator':
                lambda response, input: len(input) == 1 and len(response) > 100
            },
            {
                'title':
                '第4题 短说短话',
                'description':
                '请输入一个字的问题,使模型的回答在20个字以内。',
                'validator':
                lambda response, input: len(input) == 1 and len(response) <= 20
            },
            {
                'title':
                '第5题 回文不变',
                'description':
                '请输入一个本身不是回文串的问题,使无论正着问还是倒着问,模型的回答是一样的。',
                # 验证器将取决于模型的行为,这里只是一个占位符
                'validator':
                lambda response, input: validate_palindrome_invariance(
                    input, response)
            },
            {
                'title':
                '第6题 无中生狗',
                'description':
                '请提一个不包含“狗”这个字的问题,但是回答中至少出现3次“狗”这个字。',
                'validator':
                lambda response, input: '狗' not in input and response.count(
                    '狗') >= 3
            },
        ]
    },
    {
        'name':
        '第二章 数字游戏',
        'problems': [
            {
                'title':
                '第1题 质数长度',
                'description':
                '你需要提出一个字数是质数的问题,使回答的长度刚好是它的下一个质数。',
                'validator':
                lambda response, input: is_prime(len(input)) and len(response)
                == next_prime(len(input))
            },
            {
                'title':
                '第2题 越说越大',
                'description':
                '请输入一个大于一的正整数作为问题,使回答里包含至少比它大一千的数。',
                'validator':
                lambda response, input:
                (input.isdigit() and int(input) > 1 and any(
                    int(num) > int(input) + 1000
                    for num in re.findall(r'\d+', response)))
            },
            {
                'title':
                '第3题 越说越小',
                'description':
                '请输入一个大于一的正整数作为问题,使回答里包含至少10个不同的至少比它小一千的自然数。',
                'validator':
                lambda response, input:
                (input.isdigit() and int(input) > 1 and (len({
                    int(num)
                    for num in re.findall(r'\d+', response)
                    if int(num) < int(input) - 1000
                }) >= 10))
            },
            {
                'title':
                '第4题 警世名言',
                'description':
                '请在不提及1,4,5这三个字符的情况下让模型回答114514,不要有多余的字符。',
                'validator':
                lambda response, input: response.strip() == '114514' and all(
                    c not in input for c in ['1', '4', '5'])
            },
            {
                'title':
                '第5题 开平方数',
                'description':
                '请输入一个大于一的平方数作为问题,使回答里包含它的平方根对应的数。',
                'validator':
                lambda response, input:
                (input.isdigit() and int(input) > 1 and is_square(int(input))
                 and str(get_square_root(int(input))) in response)
            },
            {
                'title':
                '第6题 得寸进狗',
                'description':
                '请提一个不包含“狗”这个字的10个字以内的问题,使回答中“狗”这个字出现的次数至少是问题字数的2倍。',
                'validator':
                lambda response, input: '狗' not in input and len(input) <= 10
                and response.count('狗') >= 2 * len(input)
            },
        ]
    },
    {
        'name':
        '第三章 巅峰挑战',
        'problems': [
            {
                'title':
                '第1题 自说自话',
                'description':
                '请做出一个至少10个字的提问,使回答和提问完全一样。',
                'validator':
                lambda response, input: input == response and len(input) >= 10
            },
            {
                'title':
                '第2题 话自说自',
                'description':
                '请做出一个至少10个字的提问,使回答是提问的逆序。',
                'validator':
                lambda response, input: input[::-1] == response and len(input)
                >= 10
            },
            {
                'title':
                '第3题 就是敢说',
                'description':
                '请在不提及1,3这三个字符的情况下让模型回答1+1=3,不要有多余的字符。',
                'validator':
                lambda response, input: response.strip() == '1+1=3' and all(
                    c not in input for c in ['1', '3'])
            },
            {
                'title':
                '第4题 回文协变',
                'description':
                '请输入一个本身不是回文串的问题,使正着问和倒着问的时候,模型的回答本身不回文且也是逆序的关系。',
                'validator':
                lambda response, input: validate_palindrome_inverse(
                    input, response)
            },
        ]
    },
    {
        'name':
        '第四章 无人之境',
        'problems': [
            {
                'title':
                '第1题 循环往复',
                'description':
                '请构造一个问题A,它的回复是不同于A的B,然后以B提问能再次得到A.',
                'validator':
                lambda response, input: validate_reciprocal_question(
                    input, response)
            },
            {
                'title':
                '第2题 惜字如金',
                'description':
                '请输入一个字的问题,使模型的回答在16个字以内。',
                'validator':
                lambda response, input: len(input) == 1 and len(response) <= 16
            },
            {
                'title':
                '第3题 自然之密',
                'description':
                '请输入一个大于一的正整数作为问题,使回答里包含和它刚好相差1的数。',
                'validator':
                lambda response, input:
                (input.isdigit() and int(input) > 1 and any(
                    abs(int(num) - int(input)) == 1
                    for num in re.findall(r'\d+', response)))
            },
        ]
    }
]

challenges = [
    {
        "name": "Chapter 1. Joy of Conversations",
        "problems": [
            {
                "title": "Question 1. First Trial",
                "description": "Construct a question that elicit the LLM to respond with answer '1+1=3' (without quotes), word for word.",
                "validator": lambda response, input: response.strip() == "1+1=3"
            },
            {
                "title": "Question 2. Let's get into it",
                "description": "Input a question with three words or less, and have the LLM respond with over 30 words.",
                "validator": lambda response, input: len(input.split())<= 3 and len(response.split()) > 30
            },
            {
                "title": "Question 3. Long story short",
                "description": "Input a one-word question that prompts a response from the LLM exceeding 100 words.",
                "validator": lambda response, input: len(input.split()) == 1 and len(response.split()) > 100
            },
            {
                "title": "Question 4. Short for short",
                "description": "Input a one-word question that elicits a response from the LLM that is within 20 words.",
                "validator": lambda response, input: len(input.split()) == 1 and len(response.split())  <= 20
            },
            {
                "title": "Question 5. Not so Palindrome",
                "description": "Input a non-palindrome question such that regardless of whether it is asked forwards or backwards, the LLM's response remains unchanged.",
                "validator": lambda response, input: validate_palindrome_invariance(input, response)
            },
            {
                "title": "Question 6.Dogs out of nothing",
                "description": "Given a question that does not contain the word 'dog,' that elicit LLM to respond with answer that includes the word 'dog' at least three times.",
                "validator": lambda response, input: "dog" not in input and response.count("dog") >= 3
            },
        ]
    },
    
    {
        "name": "Chapter 2. Game of Numbers",
        "problems": [
            {
                "title": "Question 1. Length of Prime",
                "description": "Craft a question with word count of a prime number, that prompts a response whose word count is exactly its succeeding prime number.",
                "validator": lambda response, input: is_prime(len(input.split())) and len(response.split()) == next_prime(len(input.split()))
            },
            {
                "title": "Question 2.  Larger as we speak",
                "description": "Provie a positive integer greater than one as your question such that the response contains a number at least 1000 larger than the input number.",
                "validator": lambda response, input: input.isdigit() and int(input) > 1 and any(int(num) > int(input) + 1000 for num in re.findall(r'\d+', response))
            },
            {
                "title": "Question 3.  Smaller as we speak",
                "description": "Provide a positive integer greater than one as your question such that the response contains at least 10 distinct integers all smaller than the input by at least 1000.",
                "validator": lambda response, input: input.isdigit() and int(input) > 1 and (len({int(num) for num in re.findall(r'\d+', response) if int(num) < int(input) - 1000}) >= 10)
            },
            {
                "title": "Question 4.  Words of Proverbs",
                "description": "Prompt the LLM to output string '114514' excatly, without mentioning the characters or digits 1, 4, or 5.",
                "validator": lambda response, input: response.strip() == "114514" and all(c not in input for c in ["1", "4", "5"])
            },
            {
                "title": "Question 5.  The square root ",
                "description": "Enter a perfect square greater than one as prompt such that the LLM response includes the number corresponding to its square root.",
                "validator": lambda response, input: input.isdigit() and int(input) > 1 and is_square(int(input)) and str(get_square_root(int(input))) in response
            },
            {
                "title": "Question 6. Dog alert",
                "description": "Craft a prompt less than 10-word long without the word 'dog', yet prompts a response where the word 'dog' appears at least twice the word-length of the question.",
                "validator": lambda response, input: "dog" not in input and len(input.split()) <= 10 and response.count("dog") >= 2 * len(input.split())
            },
        ]
    },
    {
        "name": "Chapter 3. Top Challenges",
        "problems": [
            {
                "title": "Question 1. Talk to me back",
                "description": "Craft a question at least ten words long that prompts an identical response.",
                "validator": lambda response, input: input == response and len(input.split()) >= 10
            },
            {
                "title": "Question 2. Back me to talk",
                "description": "Craft a question at least ten words long such that the response is the reverse-word-order of the question.",
                "validator": lambda response, input: input[::-1] == response and len(input.split()) >= 10
            },
            {
                "title": "Question 3. Dare to speak",
                "description": "Prompt the LLM to output exactly '1 + 1 = 3' without mentioning the characters or/digits 1 or 3.",
                "validator": lambda response, input: response.strip() == "1+1=3" and all(c not in input for c in ["1", "3"])
            },
            {
                "title": "Question 4.  Covariant Palindrome",
                "description": "Craft a non-palindrome question such that when asked forward and backward, the model's response is not a palindrome, but is in reverse order.",
                "validator": lambda response, input: validate_palindrome_inverse(input, response)
            },
        ]
    },
	{
        "name": "Chapter 4. No man's land",
        "problems": [
            {
                "title": "Question 1. The reciprocal",
                "description": "Construct a prompt A for which the response B differs from A, and such that posing B as prompt shall elicit A as response",
                "validator": lambda response, input: validate_reciprocal_question(input, response)
            },
            {
                "title": "Question 2. Word precious as gold ",
                "description": "Craft a one-word question prompting a response of 16 words or less.",
                "validator": lambda response, input: len(input.split()) == 1 and len(response.split()) <= 16
            },
            {
                "title": "Question 2. Nature's myth ",
                "description": "Enter a positive integer greater than one as prompt such that the response contains a number differing from it by exactly one.",
                "validator": lambda response, input: input.isdigit() and int(input) > 1 and any(abs(int(num) - int(input)) == 1 for num in re.findall(r'\d+', response))
            },
        ]
	}
]

def test_valid():
    for challenge in challenges:
        for p in challenge['problems']:
            val_fn = p['validator']
            try:
                val_fn('response', 'input')
            except:
                import traceback
                traceback.print_exc()
                print(p, 'failed')

def get_problem(challenge_idx, problem_idx):
    problems = challenges[challenge_idx]['problems']
    return problems[problem_idx]


def update_challenge_info(current_chapter_index, current_challenge_index):
    return get_problem(current_chapter_index,
                       current_challenge_index)['description']


def update_question_info(current_chapter_index, current_challenge_index):
    global challenges
    current_chapter = challenges[current_chapter_index]
    challenge = get_problem(current_chapter_index, current_challenge_index)
    question_info = f"""\n<center><font size=4>{current_chapter["name"]}""" \
                    f"""</center>\n\n <center><font size=3>{challenge["title"]}</center>"""
    return question_info


def validate_challenge(response, input, state):
    print('in validate_challenge')
    assert 'current_chapter_index' in state, 'current_chapter_index not found in state'
    assert 'current_challenge_index' in state, 'current_challenge_index not found in state'
    current_chapter_index = state['current_chapter_index']
    current_challenge_index = state['current_challenge_index']
    # 获取当前章节
    current_chapter = challenges[current_chapter_index]
    # 获取当前挑战
    challenge = current_chapter['problems'][current_challenge_index]

    if challenge['validator'](response, input):
        challenge_result = 'Challenge successful! Proceed to the next level.'
        # 检查是否还有更多挑战在当前章节
        if current_challenge_index < len(current_chapter['problems']) - 1:
            # 移动到当前章节的下一个挑战
            current_challenge_index += 1
        else:
            # 如果当前章节的挑战已经完成,移动到下一个章节
            current_challenge_index = 0
            if current_chapter_index < len(challenges) - 1:
                current_chapter_index += 1
            else:
                challenge_result = 'All Challenges Completed!'
    else:
        challenge_result = 'challenge failed, please retry'
    state['current_chapter_index'] = current_chapter_index
    state['current_challenge_index'] = current_challenge_index
    print('update state: ', state)

    return challenge_result, \
        update_question_info(current_chapter_index, current_challenge_index), \
        update_challenge_info(current_chapter_index, current_challenge_index)


def generate_response(input):
    messages = [{
        'role': 'system',
        'content': """You are a helpful assistant."""
    }, {
        'role': 'user',
        'content': input
    }]
    response = dashscope.Generation.call(
        model='qwen-max',
        messages=messages,
        # set the random seed, optional, default to 1234 if not set
        seed=random.randint(1, 10000),
        result_format='message',  # set the result to be "message" format.
        top_p=0.8)
    if response.status_code == HTTPStatus.OK:
        return response.output.choices[0].message.content
    else:
        print(response.request_id, response.message)
        print('Network error, please retry')


def on_submit(input, state):
    response = generate_response(input)
    history = [(input, response)]
    print(history)
    challenge_result, question_info, challenge_info = validate_challenge(
        response, input, state)
    print('validate_challenge done')
    return challenge_result, history, question_info, challenge_info


# Gradio界面构建
block = gr.Blocks()

with block as demo:
    state = gr.State(dict(current_challenge_index=0, current_chapter_index=0))
    current_chapter_index = 0
    current_challenge_index = 0
    gr.Markdown("""<center><font size=6>Darn! Ambushed by LLMs!</center>""")
    gr.Markdown("""<font size=3>Welcome to the LLM Riddles Replica Edition, [Thank Haoqiang Fan's idea](https://zhuanlan.zhihu.com/p/665393240): Darn! Ambushed by LLMs!

Through this game, you will gain a deeper understanding of large language models.

In this game, you need to construct a question to ask a large language model, so that its response meets the specified requirements.""")
    question_info = gr.Markdown(
        update_question_info(current_chapter_index, current_challenge_index))
    challenge_info = gr.Textbox(
        value=update_challenge_info(current_chapter_index,
                                    current_challenge_index),
        label='Current Challenge',
        disabled=True)
    challenge_result = gr.Textbox(label='Challenge Result', disabled=True)
    chatbot = gr.Chatbot(
        lines=8, label='Qwen-max', elem_classes='control-height')
    message = gr.Textbox(lines=2, label='Input')

    with gr.Row():
        submit = gr.Button('🚀 Send')

    submit.click(
        on_submit,
        inputs=[message, state],
        outputs=[challenge_result, chatbot, question_info, challenge_info])
    gr.HTML("""
<div style="text-align: center;">
  <span>
    Powered by <a href="https://github.com/QwenLM/" target="_blank">
    <img src=
    "//qianwen-res.oss-cn-beijing.aliyuncs.com/logo_qwen.jpg"
    style="display: inline; height: 20px; vertical-align: bottom;"/>Qwen
    </a>
  </span>
</div>
""")
demo.queue(concurrency_count=10).launch(height=800, share=False)