File size: 3,032 Bytes
658dbbd
 
5e60e06
 
 
 
 
d8529e7
5e60e06
658dbbd
5e60e06
658dbbd
5e60e06
 
 
 
d8529e7
5e60e06
d8529e7
658dbbd
d8529e7
658dbbd
5e60e06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
658dbbd
 
 
5e60e06
 
658dbbd
 
 
 
 
 
53ce00a
5e60e06
658dbbd
11133ef
5e60e06
 
11133ef
0c2babc
5e60e06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
981bd24
658dbbd
 
 
11133ef
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import time
import random
import re

from flask import session
import pandas as pd

from ice_breaking_challenge import model, quiz_generated, quiz_results
from ice_breaking_challenge.google_sheets import QUESTIONS

def background_task(sid, qna: pd.DataFrame, team_size: int):
    """Session์— ์ €์žฅ๋œ ์‘๋‹ต์„ ๋ฐ”ํƒ•์œผ๋กœ Gemma ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ ํ€ด์ฆˆ ์ƒ์„ฑ"""
    import time
    start_time = time.time()

    global quiz_results
    quiz_generated[sid] = False
    generated_quizzes = generate_quiz(qna, team_size)
    quiz_results[sid] = generated_quizzes
    # ํ€ด์ฆˆ ์ƒ์„ฑ ์™„๋ฃŒ
    quiz_generated[sid] = True

    end_time = time.time()
    print(end_time - start_time)


def sample_quizzes(data: pd.DataFrame, team_size: int):
    questions_size = min(10 // team_size + 1, 5)
    sample_indices: dict[str, list[str]] = dict()
    for row in data.itertuples():
        sample_indices[row.Index] = random.sample(['Q1', 'Q2', 'Q3', 'Q4', 'Q5'], questions_size)

    sample_quizzes = []
    for key, value in sample_indices.items():
        for question_number in value:
            question = QUESTIONS.iloc[0][question_number]
            answer = data.iloc[key][question_number]
            name = data.iloc[key]['name']
            sample_quizzes.append([question, answer, name])

    return sample_quizzes[:2]  # TODO


def generate_quiz(data: pd.DataFrame, team_size: int):
    global model
    generated_quizzes = []

    # selected_data = random.sample(data, min(10, len(data)))
    selected_data = sample_quizzes(data, team_size)

    template_input="""
        <instruction>
        Using the text: {question} {answer}, create a new multiple-choice question with 4 answer options.
        """
    for row in selected_data:
        question, answer, name = row

        input_text = template_input.format(question=question, answer=answer)

        response = model.generate(input_text, max_length=512)
        print(response)
        lines = response.split('\n')
        lines = [line.strip() for line in lines]

        opening_tag_counter = 0
        line_index_to_remove = None
        for i, line in enumerate(lines):
            if bool(re.search('^<[^/]', line)):
                opening_tag_counter += 1

            if opening_tag_counter == 2:
                line_index_to_remove = i
                break

        lines = lines[line_index_to_remove + 1:]
        lines = [re.sub('<.\w+>', "", line) for line in lines]
        lines = [line.strip() for line in lines]
        lines = [line for line in lines if line]

        print(lines)
        print("==========================================")

        question_generated = lines[0]
        multiple_choice_generated = lines[1]
        answer_generated = lines[2]

        print(question_generated, multiple_choice_generated, answer_generated)
        print("==========================================")
        generated_quizzes.append([name, question_generated, multiple_choice_generated, answer_generated])

    return generated_quizzes