File size: 3,032 Bytes
658dbbd
 
2e9ea78
 
 
 
 
d8529e7
2e9ea78
658dbbd
2e9ea78
658dbbd
2e9ea78
 
 
 
d8529e7
2e9ea78
d8529e7
658dbbd
d8529e7
658dbbd
2e9ea78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
658dbbd
 
 
2e9ea78
 
658dbbd
 
 
 
 
 
53ce00a
2e9ea78
658dbbd
11133ef
2e9ea78
 
11133ef
0c2babc
2e9ea78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
981bd24
658dbbd
 
 
11133ef
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import time
import random
import re

from flask import session
import pandas as pd

from ice_breaking_challenge import model, quiz_generated, quiz_results
from ice_breaking_challenge.google_sheets import QUESTIONS

def background_task(sid, qna: pd.DataFrame, team_size: int):
    """Session์— ์ €์žฅ๋œ ์‘๋‹ต์„ ๋ฐ”ํƒ•์œผ๋กœ Gemma ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ ํ€ด์ฆˆ ์ƒ์„ฑ"""
    import time
    start_time = time.time()

    global quiz_results
    quiz_generated[sid] = False
    generated_quizzes = generate_quiz(qna, team_size)
    quiz_results[sid] = generated_quizzes
    # ํ€ด์ฆˆ ์ƒ์„ฑ ์™„๋ฃŒ
    quiz_generated[sid] = True

    end_time = time.time()
    print(end_time - start_time)


def sample_quizzes(data: pd.DataFrame, team_size: int):
    questions_size = min(10 // team_size + 1, 5)
    sample_indices: dict[str, list[str]] = dict()
    for row in data.itertuples():
        sample_indices[row.Index] = random.sample(['Q1', 'Q2', 'Q3', 'Q4', 'Q5'], questions_size)

    sample_quizzes = []
    for key, value in sample_indices.items():
        for question_number in value:
            question = QUESTIONS.iloc[0][question_number]
            answer = data.iloc[key][question_number]
            name = data.iloc[key]['name']
            sample_quizzes.append([question, answer, name])

    return sample_quizzes[:2]  # TODO


def generate_quiz(data: pd.DataFrame, team_size: int):
    global model
    generated_quizzes = []

    # selected_data = random.sample(data, min(10, len(data)))
    selected_data = sample_quizzes(data, team_size)

    template_input="""
        <instruction>
        Using the text: {question} {answer}, create a new multiple-choice question with 4 answer options.
        """
    for row in selected_data:
        question, answer, name = row

        input_text = template_input.format(question=question, answer=answer)

        response = model.generate(input_text, max_length=512)
        print(response)
        lines = response.split('\n')
        lines = [line.strip() for line in lines]

        opening_tag_counter = 0
        line_index_to_remove = None
        for i, line in enumerate(lines):
            if bool(re.search('^<[^/]', line)):
                opening_tag_counter += 1

            if opening_tag_counter == 2:
                line_index_to_remove = i
                break

        lines = lines[line_index_to_remove + 1:]
        lines = [re.sub('<.\w+>', "", line) for line in lines]
        lines = [line.strip() for line in lines]
        lines = [line for line in lines if line]

        print(lines)
        print("==========================================")

        question_generated = lines[0]
        multiple_choice_generated = lines[1]
        answer_generated = lines[2]

        print(question_generated, multiple_choice_generated, answer_generated)
        print("==========================================")
        generated_quizzes.append([name, question_generated, multiple_choice_generated, answer_generated])

    return generated_quizzes