File size: 3,486 Bytes
658dbbd
 
5e60e06
 
 
 
 
d8529e7
5e60e06
658dbbd
5e60e06
658dbbd
5e60e06
 
 
a40292e
d8529e7
5e60e06
d8529e7
658dbbd
d8529e7
658dbbd
5e60e06
 
 
 
 
aecb384
5e60e06
 
 
 
aecb384
 
5e60e06
aecb384
 
 
 
 
 
5e60e06
 
 
aecb384
5e60e06
aecb384
 
5e60e06
 
 
658dbbd
 
 
5e60e06
 
658dbbd
 
 
 
 
 
53ce00a
5e60e06
658dbbd
11133ef
5e60e06
 
11133ef
0c2babc
5e60e06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a40292e
 
 
 
5e60e06
 
 
 
981bd24
658dbbd
 
 
11133ef
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import time
import random
import re

from flask import session
import pandas as pd

from ice_breaking_challenge import model, quiz_generated, quiz_results
from ice_breaking_challenge.google_sheets import QUESTIONS

def background_task(sid, qna: pd.DataFrame, team_size: int):
    """Session์— ์ €์žฅ๋œ ์‘๋‹ต์„ ๋ฐ”ํƒ•์œผ๋กœ Gemma ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ ํ€ด์ฆˆ ์ƒ์„ฑ"""
    import time
    start_time = time.time()

    global quiz_results, quiz_generated
    quiz_generated[sid] = False
    generated_quizzes = generate_quiz(qna, team_size)
    quiz_results[sid] = generated_quizzes
    # ํ€ด์ฆˆ ์ƒ์„ฑ ์™„๋ฃŒ
    quiz_generated[sid] = True

    end_time = time.time()
    print(end_time - start_time)


def sample_quizzes(data: pd.DataFrame, team_size: int):
    questions_size = min(10 // team_size + (1 if 10 % team_size else 0), 5)
    sample_indices: dict[str, list[str]] = dict()
    for row in data.itertuples():
        sample_indices[row.Index] = random.sample(['Q1', 'Q2', 'Q3', 'Q4', 'Q5'], questions_size)

    mandatory_quizzes = []
    option_quizzes = []
    for key, value in sample_indices.items():
        question_number = value[0]
        question = QUESTIONS.iloc[0][question_number]
        answer = data.iloc[key][question_number]
        name = data.iloc[key]['name']
        mandatory_quizzes.append([question, answer, name])
        for question_number in value[1:]:
            question = QUESTIONS.iloc[0][question_number]
            answer = data.iloc[key][question_number]
            name = data.iloc[key]['name']
            option_quizzes.append([question, answer, name])

    random.shuffle(option_quizzes)
    return (mandatory_quizzes + option_quizzes)[:10]  # TODO


def generate_quiz(data: pd.DataFrame, team_size: int):
    global model
    generated_quizzes = []

    # selected_data = random.sample(data, min(10, len(data)))
    selected_data = sample_quizzes(data, team_size)

    template_input="""
        <instruction>
        Using the text: {question} {answer}, create a new multiple-choice question with 4 answer options.
        """
    for row in selected_data:
        question, answer, name = row

        input_text = template_input.format(question=question, answer=answer)

        response = model.generate(input_text, max_length=512)
        print(response)
        lines = response.split('\n')
        lines = [line.strip() for line in lines]

        opening_tag_counter = 0
        line_index_to_remove = None
        for i, line in enumerate(lines):
            if bool(re.search('^<[^/]', line)):
                opening_tag_counter += 1

            if opening_tag_counter == 2:
                line_index_to_remove = i
                break

        lines = lines[line_index_to_remove + 1:]
        lines = [re.sub('<.\w+>', "", line) for line in lines]
        lines = [line.strip() for line in lines]
        lines = [line for line in lines if line]

        print(lines)
        print("==========================================")

        question_generated = lines[0]

        tokens = re.split(r'\d+\.\s*', lines[1])
        multiple_choice_generated = [token.strip() for token in tokens if token]

        answer_generated = lines[2]

        print(question_generated, multiple_choice_generated, answer_generated)
        print("==========================================")
        generated_quizzes.append([name, question_generated, multiple_choice_generated, answer_generated])

    return generated_quizzes