File size: 11,815 Bytes
bb48ea5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
from sqlalchemy.orm import Session
from typing import List, Tuple

from tqdm import tqdm
from database.operation import *
from database import schema
import random
from loguru import logger
import math


# ่ฎฐๅ•่ฏ
from story_agent import generate_story_and_translated_story
from common.util import date_str, multiprocessing_mapping


def get_words_for_book(db: Session, user_book: UserBook) -> List[schema.Word]:
    book = get_book(db, user_book.book_id)
    if book is None:
        logger.warning("book not found")
        return []
    q = db.query(schema.Word).join(schema.Unit, schema.Unit.bv_voc_id == schema.Word.vc_id)
    words = q.filter(schema.Unit.bv_book_id == book.bk_id).order_by(schema.Word.vc_difficulty).all()
    return words


def save_words_as_book(db: Session, user_id: str, words: List[schema.Word], title: str):
    book = create_book(db, BookCreate(bk_name=f"{title}๏ผˆๅพ…ๅญฆๅ•่ฏ่‡ชๅŠจไฟๅญ˜ไธบๅ•่ฏไนฆ๏ผ‰", bk_item_num=len(words), creator=user_id))
    for i, word in tqdm(enumerate(words)):
        unit = UnitCreate(bv_book_id=book.bk_id, bv_voc_id=word.vc_id)
        db_unit = schema.Unit(**unit.dict())
        db.add(db_unit)
        if i % 500 == 0:
            db.commit()
    db.commit()
    return book

def save_batch_words(db: Session, i: int, user_book_id: str, batch_words: List[schema.Word]):
    batch_words_str_list = [word.vc_vocabulary for word in batch_words]
    # ๆˆ‘ไปฌๅชๅœจ็ฌฌไธ€ไธชๆ‰นๆฌก็”Ÿๆˆๆ•…ไบ‹ใ€‚ๅŽ้ข็š„ๆ‰นๆฌกๆ นๆฎ็”จๆˆท็š„่ฎฐๅฟ†ๆƒ…ๅ†ต็”Ÿๆˆๆ•…ไบ‹๏ผŒๆๅ‰ 3 ไธชๆ‰นๆฌก็”Ÿๆˆๆ•…ไบ‹
    story, translated_story = generate_story_and_translated_story(batch_words_str_list)
    return save_batch_words_with_story(db, i, user_book_id, batch_words, story, translated_story)


def save_batch_words_with_story(db: Session, i: int, user_book_id: str, batch_words: List[schema.Word], story: str, translated_story: str):
    batch_words_str_list = [word.vc_vocabulary for word in batch_words]
    logger.info(f"{i}, {batch_words_str_list}\n{story}")
    user_memory_batch = create_user_memory_batch(db, UserMemoryBatchCreate(
        user_book_id=user_book_id,
        story=story,
        translated_story=translated_story
    ))
    create_user_memory_batch_generation_history(db, UserMemoryBatchGenerationHistoryCreate(
        batch_id=user_memory_batch.id,
        story=story,
        translated_story=translated_story
    ))
    for word in batch_words:
        memory_word = UserMemoryWordCreate(
            batch_id=user_memory_batch.id,
            word_id=word.vc_id
        )
        db_memory_word = schema.UserMemoryWord(**memory_word.dict())
        db.add(db_memory_word)
    db.commit()
    return user_memory_batch

async def async_save_batch_words(db: Session, i: int, user_book_id: str, batch_words: List[schema.Word]):
    save_batch_words(db, i, user_book_id, batch_words)

import asyncio
async def async_save_batch_words_list(db: Session, user_book_id: str, batch_words_list: List[List[schema.Word]]):
    for i, batch_words in enumerate(batch_words_list):
        asyncio.ensure_future(async_save_batch_words(db, i+1, user_book_id, batch_words))

def transform(batch_words: List[str]):
    story, translated_story = generate_story_and_translated_story(batch_words)
    return {
        "story": story,
        "translated_story": translated_story,
        "words": batch_words
    }

def save_batch_words_list(db: Session, user_book_id: str, batch_words_list: List[List[schema.Word]]):
    word_str_list = []
    for batch_words in batch_words_list:
        word_str_list.append([word.vc_vocabulary for word in batch_words])
    story_list = multiprocessing_mapping(transform, word_str_list, tmp_filepath=f"./output/logs/save_batch_words_list_{date_str}.xlsx")
    logger.info(f"story_list: {len(story_list)}")
    for i, (batch_words, story) in tqdm(enumerate(zip(batch_words_list, story_list))):
        save_batch_words_with_story(db, i, user_book_id, batch_words, story['story'], story['translated_story'])

def track(db: Session, user_book: schema.UserBook, words: List[schema.Word]):
    batch_size = user_book.batch_size
    logger.debug(f"{[w.vc_vocabulary for w in words]}")
    logger.debug(f"batch_size: {batch_size}")
    logger.debug(f"words count: {len(words)}")
    if user_book.random:
        random.shuffle(words)
    else:
        words.sort(key=lambda x: x.vc_frequency, reverse=True)  # ๆŒ‰็…ง่ฏ้ข‘ๆŽ’ๅบ๏ผŒ่ฏ้ข‘่ถŠ้ซ˜่ถŠๅฎนๆ˜“่ฎฐไฝ
    logger.debug(f"saving words as book")
    save_words_as_book(db, user_book.owner_id, words, user_book.title)
    logger.debug(f"saved words as book [{user_book.title}]")
    batch_words_list = []
    for i in range(0, len(words), batch_size):
        batch_words = words[i:i+batch_size]
        batch_words_list.append(batch_words)
    logger.debug(f"batch_words_list: {len(batch_words_list)}")
    if len(batch_words_list) == 0:
        return
    first_batch_words = batch_words_list[0]
    user_memory_batch = save_batch_words(db, 0, user_book.id, first_batch_words)
    user_book.memorizing_batch = user_memory_batch.id
    db.commit()
    save_batch_words_list(db, user_book.id, batch_words_list[1:])
    # asyncio.run(async_save_batch_words_list(db, user_book.id, batch_words_list[1:]))

def remenber(db: Session, batch_id: str, word_id: str):
    return create_user_memory_action(db, UserMemoryActionCreate(
        batch_id=batch_id,
        word_id=word_id,
        action="remember"
    ))

def forget(db: Session, batch_id: str, word_id: str):
    return create_user_memory_action(db, UserMemoryActionCreate(
        batch_id=batch_id,
        word_id=word_id,
        action="forget"
    ))

def save_memorizing_word_action(db: Session, batch_id: str, actions: List[Tuple[str, str]]):
    """

    actions: [(word_id, remember | forget)]

    """
    for word_id, action in actions:
        memory_action = UserMemoryActionCreate(
            batch_id=batch_id,
            word_id=word_id,
            action=action
        )
        db_memory_action = schema.UserMemoryAction(**memory_action.dict())
        db.add(db_memory_action)
    db.commit()

def on_batch_start(db: Session, user_memory_batch_id: str):
    return create_user_memory_batch_action(db, UserMemoryBatchActionCreate(
        batch_id=user_memory_batch_id,
        action="start"
    ))

def on_batch_end(db: Session, user_memory_batch_id: str):
    return create_user_memory_batch_action(db, UserMemoryBatchActionCreate(
        batch_id=user_memory_batch_id,
        action="end"
    ))

# def generate_recall_batch(db: Session, user_book: schema.UserBook):
def generate_next_batch(db: Session, user_book: schema.UserBook,

                        minutes: int = 60, k: int = 3):
    # ็”Ÿๆˆไธ‹ไธ€ไธชๆ‰นๆฌก๏ผŒๅ›žๅฟ†ๆ‰นๆˆ–่€…ๅคไน ๆ‰น
    # ๅฆ‚ๆžœๆ˜ฏๆ–ฐ่ฏๆ‰น๏ผŒๅˆ™่ฟ”ๅ›ž None
    left_bound, right_bound = 0.3, 0.6
    user_book_id = user_book.id
    batch_size = user_book.batch_size
    # actions, batch_id_to_batch, batch_id_to_words = get_user_memory_batch_history_in_minutes(db, user_book_id, minutes)
    # memorizing_words = sum(list(batch_id_to_words.values()), [])
    memorizing_words = get_user_memory_word_history_in_minutes(db, user_book_id, minutes)
    if len(memorizing_words) < k * batch_size:
        # 1. ่ฎฐๅฟ†ๆ–ฐ่ฏๆ•ฐ่ฟ‡ๅฐ‘
        # ๆ–ฐ่ฏๆ‰น
        logger.info("ๆ–ฐ่ฏๆ‰น")
        return None
    # ่ฎก็ฎ—่ฎฐๅฟ†ๆ•ˆ็Ž‡
    memory_actions = get_actions_at_each_word(db, [w.vc_id for w in memorizing_words])
    remember_count = defaultdict(int)
    forget_count = defaultdict(int)
    for a in memory_actions:
        if a.action == "remember":
            remember_count[a.word_id] += 1
        else:
            forget_count[a.word_id] += 1
    word_id_to_efficiency = {}
    for word in memorizing_words:
        efficiency = remember_count[word.vc_id] / (remember_count[word.vc_id] + forget_count[word.vc_id])
        word_id_to_efficiency[word.vc_id] = efficiency
    logger.info([(w.vc_vocabulary, word_id_to_efficiency[w.vc_id]) for w in memorizing_words].sort(key=lambda x: x[1]))
    if all([efficiency > right_bound for efficiency in word_id_to_efficiency.values()] + [count > 3 for count in remember_count.values()]):
        # 2. ่ฎฐๅฟ†ๆ•ˆ็Ž‡่ฟ‡้ซ˜
        # ๆ–ฐ่ฏๆ‰น
        logger.info("ๆ–ฐ่ฏๆ‰น")
        return None
    forgot_word_ids = [word_id for word_id, efficiency in word_id_to_efficiency.items() if efficiency < left_bound]
    forgot_word_ids.sort(key=lambda x: word_id_to_efficiency[x])
    if len(forgot_word_ids) >= batch_size:
        # 4. ๆญฃๅธธๆƒ…ๅ†ต
        # ๅคไน ๆ‰น
        logger.info("ๅคไน ๆ‰น")
        batch_words = [word for word in memorizing_words if word.vc_id in forgot_word_ids][:batch_size]
        batch_words.sort(key=lambda x: x.vc_difficulty, reverse=True)
        batch_words_str_list = [word.vc_vocabulary for word in batch_words]
        story, translated_story = generate_story_and_translated_story(batch_words_str_list)
        user_memory_batch = create_user_memory_batch(db, UserMemoryBatchCreate(
            user_book_id=user_book_id,
            story=story,
            translated_story=translated_story,
            batch_type="ๅคไน ",
        ))
        create_user_memory_batch_generation_history(db, UserMemoryBatchGenerationHistoryCreate(
            batch_id=user_memory_batch.id,
            story=story,
            translated_story=translated_story
        ))
        for word in batch_words:
            memory_word = UserMemoryWordCreate(
                batch_id=user_memory_batch.id,
                word_id=word.vc_id
            )
            db_memory_word = schema.UserMemoryWord(**memory_word.dict())
            db.add(db_memory_word)
        db.commit()
        return user_memory_batch
    unfarmiliar_word_ids = [word_id for word_id, efficiency in word_id_to_efficiency.items() if left_bound <= efficiency < right_bound]
    unfarmiliar_word_ids.sort(key=lambda x: word_id_to_efficiency[x])
    if len(unfarmiliar_word_ids) < batch_size:
        # ๆŠŠ่ฎฐไฝๆฌกๆ•ฐๅฐ‘็š„ไนŸๅŠ ่ฟ›ๆฅ
        unfarmiliar_word_ids += [word_id for word_id, count in remember_count.items() if count < 3]
        unfarmiliar_word_ids.sort(key=lambda x: word_id_to_efficiency[x])
    if len(unfarmiliar_word_ids) >= batch_size:
        # 3. ่ฎฐๅฟ†ๆ•ˆ็Ž‡่ฟ‡ไฝŽ
        # ๅ›žๅฟ†ๆ‰น
        logger.info("ๅ›žๅฟ†ๆ‰น")
        batch_words = [word for word in memorizing_words if word.vc_id in unfarmiliar_word_ids][:batch_size]
        batch_words.sort(key=lambda x: x.vc_difficulty, reverse=True)
        batch_words_str_list = [word.vc_vocabulary for word in batch_words]
        story, translated_story = generate_story_and_translated_story(batch_words_str_list)
        user_memory_batch = create_user_memory_batch(db, UserMemoryBatchCreate(
            user_book_id=user_book_id,
            story=story,
            translated_story=translated_story,
            batch_type="ๅ›žๅฟ†",
        ))
        create_user_memory_batch_generation_history(db, UserMemoryBatchGenerationHistoryCreate(
            batch_id=user_memory_batch.id,
            story=story,
            translated_story=translated_story
        ))
        for word in batch_words:
            memory_word = UserMemoryWordCreate(
                batch_id=user_memory_batch.id,
                word_id=word.vc_id
            )
            db_memory_word = schema.UserMemoryWord(**memory_word.dict())
            db.add(db_memory_word)
        db.commit()
        return user_memory_batch
    # 5. ๆญฃๅธธๆƒ…ๅ†ต
    # ๆ–ฐ่ฏๆ‰น
    logger.info("ๆ–ฐ่ฏๆ‰น")
    return None