baqu2213 commited on
Commit
f5cf8c0
ยท
1 Parent(s): 61fae1e

Upload 15 files

Browse files
.gitattributes CHANGED
@@ -106,3 +106,4 @@ Danbooru[[:space:]]Prompt[[:space:]]Selector/csv2023_explicit.csv filter=lfs dif
106
  Danbooru[[:space:]]Prompt[[:space:]]Selector/csv2023_nsfw.csv filter=lfs diff=lfs merge=lfs -text
107
  Danbooru[[:space:]]Prompt[[:space:]]Selector/csv2023_safeimage.csv filter=lfs diff=lfs merge=lfs -text
108
  Danbooru[[:space:]]Prompt[[:space:]]Selector/csv2023_sensitive.csv filter=lfs diff=lfs merge=lfs -text
 
 
106
  Danbooru[[:space:]]Prompt[[:space:]]Selector/csv2023_nsfw.csv filter=lfs diff=lfs merge=lfs -text
107
  Danbooru[[:space:]]Prompt[[:space:]]Selector/csv2023_safeimage.csv filter=lfs diff=lfs merge=lfs -text
108
  Danbooru[[:space:]]Prompt[[:space:]]Selector/csv2023_sensitive.csv filter=lfs diff=lfs merge=lfs -text
109
+ Danbooru[[:space:]]Prompt[[:space:]]Selector/TEST2024/NAIA_1231_console_testv1.exe filter=lfs diff=lfs merge=lfs -text
Danbooru Prompt Selector/TEST2024/NAIA_1231_console_testv1.exe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba5622ba32c6c8c7cf4123933bfa4ea9bb68c05b6d722feb95ae2f40dd089a5f
3
+ size 837859752
Danbooru Prompt Selector/TEST2024/NAIA_1231_testv1.py ADDED
The diff for this file is too large to render. See raw diff
 
Danbooru Prompt Selector/TEST2024/NAIA_Login.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from base64 import urlsafe_b64encode
2
+ from hashlib import blake2b
3
+ import argon2
4
+ import requests
5
+
6
+ BASE_URL="https://api.novelai.net"
7
+
8
+ def argon_hash(email: str, password: str, size: int, domain: str) -> str:
9
+ pre_salt = f"{password[:6]}{email}{domain}"
10
+ # salt
11
+ blake = blake2b(digest_size=16)
12
+ blake.update(pre_salt.encode())
13
+ salt = blake.digest()
14
+ raw = argon2.low_level.hash_secret_raw(
15
+ password.encode(),
16
+ salt,
17
+ 2,
18
+ int(2000000 / 1024),
19
+ 1,
20
+ size,
21
+ argon2.low_level.Type.ID,
22
+ )
23
+ hashed = urlsafe_b64encode(raw).decode()
24
+ return hashed
25
+
26
+ def login(key) -> str:
27
+ response = requests.post(f"{BASE_URL}/user/login", json={ "key": key })
28
+ # catch any errors
29
+ return response.json()["accessToken"]
30
+
31
+ def get_access_key(email: str, password: str) -> str:
32
+ return argon_hash(email, password, 64, "novelai_data_access_key")[:64]
Danbooru Prompt Selector/TEST2024/NAIA_generation.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import random
3
+ from PIL import Image, ImageOps, ImageTk
4
+ from datetime import datetime
5
+ import time
6
+ from pathlib import Path
7
+ import io
8
+ import zipfile
9
+
10
+ BASE_URL="https://api.novelai.net"
11
+
12
+ def make_turbo_prompt(gen_request):
13
+ lines = gen_request['prompt']
14
+ result = {
15
+ "boys": False,
16
+ "girls": False,
17
+ "1girl": False,
18
+ "1boy": False,
19
+ "1other": False,
20
+ "others": False
21
+ }
22
+ state = {
23
+ "nude,": False,
24
+ "pov,": False,
25
+ "cum,": False,
26
+ "after ": False,
27
+ "pussy juice": False,
28
+ "barefoot": False,
29
+ "breasts": False,
30
+ "ejaculation": False,
31
+ }
32
+
33
+ def insert_spaces(source_list, reference_list):
34
+ modified_list = source_list.copy()
35
+ for index, keyword in enumerate(reference_list):
36
+ if keyword not in source_list:
37
+ space_count = len(keyword) # ํ‚ค์›Œ๋“œ ๊ธธ์ด๋งŒํผ์˜ ๊ณต๋ฐฑ ๋ฌธ์ž
38
+ modified_list.insert(index, ' ' * space_count)
39
+ return modified_list
40
+
41
+ keywords = gen_request['prompt'].split(', ')
42
+ filtered_keywords = []
43
+ removed_indices = []
44
+ positive0, positive1, positive2, positive3 = gen_request.copy(),gen_request.copy(),gen_request.copy(),gen_request.copy()
45
+
46
+ for word in result.keys():
47
+ if word in lines:
48
+ result[word] = True
49
+ for word in state.keys():
50
+ if word in gen_request['prompt']:
51
+ state[word] = True
52
+
53
+ key_index = int((len(keywords)/2)-1)
54
+
55
+ if(result["1boy"]) or (result["boys"]):
56
+ if(result["1girl"]):
57
+ if('sex,' in gen_request['prompt']):
58
+ sex_pos_keywords = ['stomach bulge','insertion', 'fucked silly', 'x-ray', 'orgasm', 'cross-section', 'uterus', 'overflow', 'rape', 'vaginal', 'anal']
59
+ facial_keywords = ['tongue','ahegao']
60
+ temp_sex_pos = []
61
+ temp_facial = []
62
+ cum_events = []
63
+ explicit_check = []
64
+ if 'open mouth' in keywords: keywords.remove('open mouth')
65
+ if 'closed mouth' in keywords: keywords.remove('closed mouth')
66
+ if 'after rape' in keywords:
67
+ keywords.remove('after rape')
68
+ explicit_check.append('after rape')
69
+ for keyword in keywords:
70
+ if ('sex' not in keyword and 'cum' not in keyword and 'ejaculation' not in keyword and 'vaginal' not in keyword and 'penetration' not in keyword) and all(sex_pos not in keyword for sex_pos in sex_pos_keywords) and all(facial not in keyword for facial in facial_keywords):
71
+ filtered_keywords.append(keyword)
72
+ elif 'sex' in keyword:
73
+ removed_indices.append(keyword)
74
+ elif 'penetration' in keyword:
75
+ removed_indices.append(keyword)
76
+ elif 'cum' in keyword and keyword != 'cum':
77
+ cum_events.append(keyword)
78
+ elif any(sex_pos in keyword for sex_pos in sex_pos_keywords):
79
+ for sex_pos in sex_pos_keywords:
80
+ if sex_pos in keyword:
81
+ temp_sex_pos.append(sex_pos)
82
+ elif any(facial not in keyword for facial in facial_keywords):
83
+ for facial in facial_keywords:
84
+ if facial in keyword:
85
+ temp_facial.append(facial)
86
+ filtered_keywords.insert(int((len(filtered_keywords)/2)-1), ' no penetration, imminent penetration')
87
+ filtered_keywords_positive0 = filtered_keywords.copy()
88
+ filtered_keywords.remove(' no penetration, imminent penetration')
89
+ #0 imminent penetration, imminent sex
90
+ for i, keyword in enumerate(filtered_keywords):
91
+ if 'pantyhose' in keyword:
92
+ filtered_keywords[i] = 'torn ' + filtered_keywords[i]
93
+ #1 default
94
+ key_index = int((len(filtered_keywords)/2)-1)
95
+ if 'pussy' in filtered_keywords: key_index = filtered_keywords.index('pussy')
96
+ if 'penis' in filtered_keywords: key_index = filtered_keywords.index('penis')
97
+ filtered_keywords[key_index:key_index] = ['motion lines', 'surprised']
98
+ for keyword in removed_indices:
99
+ if 'cum' not in keyword and 'ejaculation' not in keyword:
100
+ filtered_keywords.insert(key_index,keyword)
101
+ if(temp_sex_pos): filtered_keywords[key_index:key_index] = temp_sex_pos
102
+ if('clothed sex' in filtered_keywords and not 'bottomless' in filtered_keywords): filtered_keywords.insert(filtered_keywords.index('clothed sex')+1, 'bottomless')
103
+ pos1_copied_keywords = filtered_keywords.copy()
104
+ for i, keyword in enumerate(pos1_copied_keywords):
105
+ if 'closed eyes' in keyword:
106
+ rand_num = random.randint(0,2)
107
+ if(rand_num == 0): pos1_copied_keywords[i] = 'half-' + pos1_copied_keywords[i]
108
+ elif(rand_num == 1 and 'closed eyes' in pos1_copied_keywords):
109
+ pos1_copied_keywords.remove('closed eyes')
110
+ filtered_keywords[i] = 'half-closed eyes'
111
+ filtered_keywords_positive1 = pos1_copied_keywords.copy()
112
+ #2 ejaculation,cum in pussy
113
+ key_index = filtered_keywords.index('surprised')
114
+ filtered_keywords.remove('surprised')
115
+ filtered_keywords[key_index:key_index] = ["ejaculation","cum"]
116
+ for keyword in removed_indices:
117
+ if 'cum' in keyword:
118
+ filtered_keywords.insert(key_index,keyword)
119
+ if(temp_facial): filtered_keywords[key_index:key_index] =temp_facial
120
+ filtered_keywords_positive2 = filtered_keywords.copy()
121
+ #3 after sex, after ejaculation
122
+ for i, keyword in enumerate(filtered_keywords):
123
+ if 'closed eyes' in keyword:
124
+ rand_num = random.randint(0,2)
125
+ if(rand_num == 0 and filtered_keywords[i] != 'half-closed eyes'): filtered_keywords[i] = 'half-' + filtered_keywords[i]
126
+ elif(rand_num == 1): filtered_keywords[i] = 'empty eyes'
127
+ else: filtered_keywords[i] = 'empty eyes, half-closed eyes'
128
+ if 'sex' in filtered_keywords:
129
+ key_index = filtered_keywords.index('sex')
130
+ elif 'group sex' in filtered_keywords:
131
+ key_index = filtered_keywords.index('group sex')
132
+ filtered_keywords.remove('ejaculation')
133
+ filtered_keywords[key_index:key_index] = ['cum drip', 'erection'] + cum_events
134
+ if(explicit_check): filtered_keywords[key_index:key_index] = explicit_check
135
+ if 'sex' in filtered_keywords and 'group sex' not in filtered_keywords:
136
+ if('pussy' in filtered_keywords and not 'anal' in filtered_keywords): filtered_keywords.insert(filtered_keywords.index('sex')+1, 'after vaginal, spread pussy')
137
+ elif('anal' in filtered_keywords): filtered_keywords.insert(filtered_keywords.index('sex')+1, 'after anus, cum in ass')
138
+ filtered_keywords.insert(filtered_keywords.index('sex'), 'after sex')
139
+ filtered_keywords.remove('sex')
140
+ elif 'group sex' in filtered_keywords:
141
+ if('vaginal' in filtered_keywords and not 'anal' in filtered_keywords):
142
+ filtered_keywords.insert(filtered_keywords.index('group sex')+1, 'after vaginal, spread pussy')
143
+ if 'multiple penises' in filtered_keywords: filtered_keywords.insert(filtered_keywords.index('group sex')+3, 'cum on body, bukkake')
144
+ elif('anal' in filtered_keywords):
145
+ filtered_keywords.insert(filtered_keywords.index('group sex')+1, 'after anus, cum in ass')
146
+ if 'multiple penises' in filtered_keywords: filtered_keywords.insert(filtered_keywords.index('group sex')+3, 'cum on body, bukkake')
147
+ else: filtered_keywords.insert(filtered_keywords.index('group sex')+1, 'cum on body, {bukkake}')
148
+ temp_post_keyword = []
149
+ for keyword in sex_pos_keywords:
150
+ if not (keyword == 'orgasm' or keyword == 'overflow'):
151
+ if keyword in filtered_keywords:
152
+ temp_post_keyword.append(keyword)
153
+ for keyword in temp_post_keyword:
154
+ filtered_keywords.remove(keyword)
155
+
156
+ positive0['prompt'] = ', '.join(insert_spaces(filtered_keywords_positive0, filtered_keywords)).strip()
157
+ positive1['prompt'] = ', '.join(insert_spaces(filtered_keywords_positive1, filtered_keywords)).strip()
158
+ positive2['prompt'] = ', '.join(insert_spaces(filtered_keywords_positive2, filtered_keywords)).strip()
159
+ positive3['prompt'] = ', '.join(filtered_keywords).strip()
160
+ positive0["type"] = "turbo"
161
+ positive1["type"] = "turbo"
162
+ positive2["type"] = "turbo"
163
+ positive3["type"] = "turbo"
164
+ return positive0, positive1, positive2, positive3
165
+
166
+ def generate_image(access_token, prompt, model, action, parameters):
167
+ data = {
168
+ "input": prompt,
169
+ "model": model,
170
+ "action": action,
171
+ "parameters": parameters,
172
+ }
173
+
174
+ response = requests.post(f"{BASE_URL}/ai/generate-image", json=data, headers={ "Authorization": f"Bearer {access_token}" })
175
+ # catch any errors
176
+ return response.content
177
+
178
+ def generate(gen_request):
179
+
180
+ params = {
181
+ "legacy": False,
182
+ "quality_toggle": True if gen_request["quality_toggle"] == 1 else False,
183
+ "width": gen_request["width"],
184
+ "height": gen_request["height"],
185
+ "n_samples": 1,
186
+ "seed": gen_request["seed"],
187
+ "extra_noise_seed": random.randint(0,9999999999),
188
+ "sampler": gen_request["sampler"],
189
+ "steps": 28,
190
+ "scale": gen_request["scale"],
191
+ "uncond_scale": 1.0,
192
+ "negative_prompt": gen_request["negative"],
193
+ "sm" : gen_request["sema"],
194
+ "sm_dyn" : gen_request["sema_dyn"],
195
+ "decrisper": False,
196
+ "controlnet_strength": 1.0,
197
+ "add_original_image": False,
198
+ "cfg_rescale": gen_request["cfg_rescale"],
199
+ "noise_schedule": "native"
200
+ }
201
+
202
+ # ์™€์ผ๋“œ์นด๋“œ ๊ธฐ๋Šฅ ๋งŒ๋“ค์–ด์•ผํ•จ
203
+ positive = gen_request["prompt"]
204
+
205
+ filename_rule = gen_request["png_rule"]
206
+ save_folder = gen_request["save_folder"]
207
+
208
+ access_token = gen_request["access_token"]
209
+ additional_folder = ""
210
+
211
+ def resize_and_fill(image, max_size=None):
212
+ if max_size is None:
213
+ max_size = gen_request["user_screen_size"]
214
+ original_width, original_height = image.size
215
+ if original_width > max_size or original_height > max_size:
216
+ # ๋น„์œจ์„ ์œ ์ง€ํ•˜๋ฉด์„œ ํฌ๊ธฐ ์กฐ์ •
217
+ image.thumbnail((max_size, max_size))
218
+
219
+ # ์ƒˆ ์ด๋ฏธ์ง€ ํฌ๊ธฐ ๊ณ„์‚ฐ
220
+ width, height = image.size
221
+ new_image = Image.new("RGB", (max_size, max_size), "black")
222
+ new_image.paste(image, ((max_size - width) // 2, (max_size - height) // 2))
223
+ return new_image
224
+ else:
225
+ return image
226
+
227
+ def log_error(e, output_file_path="output_file_path"):
228
+ # ํ˜„์žฌ ์‹œ๊ฐ„์„ ์–ป์Šต๋‹ˆ๋‹ค
229
+ current_time = datetime.now().strftime("%m/%d %H:%M:%S")
230
+
231
+ # ์—๋Ÿฌ ๋กœ๊ทธ ๋ฉ”์‹œ์ง€
232
+ error_message = f"#### Error occured at {current_time} ####\nError: {e}\n############################################\n"
233
+
234
+ # ์ง€์ •๋œ ์ถœ๋ ฅ ํด๋”์˜ error_log.txt ํŒŒ์ผ์— ์“ฐ๊ธฐ
235
+ with open(f"error_log.txt", "a") as file:
236
+ file.write(error_message)
237
+
238
+ try:
239
+ zipped_bytes = generate_image(access_token, positive, "nai-diffusion-3", "generate", params)
240
+ if gen_request["png_rule"] == "count":
241
+ additional_folder = "/" + gen_request["start_time"]
242
+ if gen_request["type"] == "turbo":
243
+ additional_folder += "/turbo"
244
+ d = Path(save_folder + additional_folder)
245
+ d.mkdir(parents=True, exist_ok=True)
246
+ zipped = zipfile.ZipFile(io.BytesIO(zipped_bytes))
247
+ image_bytes = zipped.read(zipped.infolist()[0])
248
+ if gen_request["png_rule"] == "count":
249
+ _count = gen_request["count"]
250
+ filename = (d / f"{_count:05}.png" )
251
+ else: filename = (d / f"{datetime.now().strftime('%Y%m%d_%H%M%S')}.png" )
252
+ filename.write_bytes(image_bytes)
253
+ i = Image.open(io.BytesIO(image_bytes))
254
+ i = ImageOps.exif_transpose(i).convert("RGB")
255
+ i_resized = resize_and_fill(i)
256
+ #tk_image = ImageTk.PhotoImage(i_resized)
257
+ return i_resized, positive, params['seed'], i.info, str(filename)
258
+ except Exception as e:
259
+ log_error(zipped_bytes.decode('utf-8')[2:-2], "path_to_output_folder")
260
+ return None, zipped_bytes.decode('utf-8')[2:-2], params['seed'], None, None
Danbooru Prompt Selector/TEST2024/NAIA_random_function_core.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas
2
+ import numpy as np
3
+
4
+ def find_keyword_index(general):
5
+ # boys์™€ girls ๋ฆฌ์ŠคํŠธ์˜ ์›์†Œ๊ฐ€ ์žˆ๋Š”์ง€ ํ™•์ธ ๋ฐ ์ธ๋ฑ์Šค ์ €์žฅ
6
+ boys = ["1boy", "2boys", "3boys", "4boys", "5boys", "6+boys"]
7
+ girls = ["1girl", "2girls", "3girls", "4girls", "5girls", "6+girls"]
8
+ #others = ["1other", "2others", "3others", "4others", "5others", "6+others"]
9
+ boys_indices = [i for i, item in enumerate(general[:6]) if item in boys]
10
+ girls_indices = [i for i, item in enumerate(general[:6]) if item in girls]
11
+
12
+ # case 1๊ณผ case 2: girls ๋ฆฌ์ŠคํŠธ์˜ ์›์†Œ ์ฐพ๊ธฐ
13
+ if girls_indices:
14
+ return girls_indices[0]+1
15
+
16
+ # case 3: boys ๋ฆฌ์ŠคํŠธ์˜ ์›์†Œ ์ฐพ๊ธฐ
17
+ if boys_indices:
18
+ return boys_indices[0]+1
19
+
20
+ # case 4: ํ•ด๋‹น ์‚ฌํ•ญ ์—†์Œ
21
+ return 2
22
+
23
+ def RFP(popped_row, fix_prompt, after_prompt, auto_hide_prompt, rm_a, rm_s, rm_c, nsfw, data, magic_word):
24
+ boys = ["1boy", "2boys", "3boys", "4boys", "5boys", "6+boys"]
25
+ girls = ["1girl", "2girls", "3girls", "4girls", "5girls", "6+girls"]
26
+ general = [item.strip() for item in popped_row['general'].split(',')]
27
+ if nsfw == 1:
28
+ nsfw_word = []
29
+ for keyword in general:
30
+ if keyword in data.qe_word or keyword in data.bag_of_tags or "horns" in keyword or "(" in keyword or keyword in boys or keyword in girls:
31
+ nsfw_word.append(keyword)
32
+ general = nsfw_word
33
+ if rm_c == 1:
34
+ temp_general = []
35
+ for keyword in general:
36
+ if keyword in data.bag_of_tags:
37
+ temp_general.append(keyword)
38
+ for keyword in temp_general:
39
+ general.remove(keyword)
40
+ fix = [item.strip() for item in fix_prompt[:-1].split(',')]
41
+ if rm_s == 0:
42
+ series = [item.strip() for item in popped_row['copyright'].split(',')]
43
+ fix = fix + series
44
+ after = [item.strip() for item in after_prompt[:-1].split(',')]
45
+ auto_hide = [item.strip() for item in auto_hide_prompt[:-1].split(',')]
46
+ fix_index = find_keyword_index(general)
47
+ processed = general.copy()
48
+ temp_hide_prompt = []
49
+ processed[fix_index:fix_index] = fix
50
+ processed += after
51
+ for keyword in processed:
52
+ if keyword in auto_hide:
53
+ temp_hide_prompt.append(keyword)
54
+ for keyword in temp_hide_prompt:
55
+ processed.remove(keyword)
56
+
57
+ if rm_c == 0:
58
+ if popped_row['character']:
59
+ character = [item.strip() for item in popped_row['character'].split(',')]
60
+ processed[fix_index:fix_index] = character
61
+ fix_index+=len(character)
62
+ if rm_a == 0:
63
+ if popped_row['artist']:
64
+ artists = [item.strip() for item in popped_row['artist'].split(',')]
65
+ artist = ["artist:" + _artist for _artist in artists]
66
+ processed[fix_index:fix_index] = artist
67
+ if magic_word["random_artist"] == True:
68
+ processed.insert(fix_index, magic_word["random_artist_name"])
69
+
70
+ return ', '.join(processed)
Danbooru Prompt Selector/TEST2024/NAIA_search.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import customtkinter
2
+ import pandas as pd
3
+
4
+ # ๋ชจ๋“  ํ‚ค์›Œ๋“œ๋ฅผ ํฌํ•จํ•˜๋Š” ํ–‰๋งŒ ํ•„ํ„ฐ๋งํ•˜๋Š” ํ•จ์ˆ˜
5
+ def filter_rows_containing_all_keywords(df, keywords):
6
+ # ๋ชจ๋“  ํ‚ค์›Œ๋“œ์— ๋Œ€ํ•œ boolean mask ์ดˆ๊ธฐํ™”, df์˜ ์ธ๋ฑ์Šค๋ฅผ ์‚ฌ์šฉ
7
+ final_mask = pd.Series([True] * len(df), index=df.index)
8
+
9
+ # ๊ฐ ํ‚ค์›Œ๋“œ์— ๋Œ€ํ•ด DataFrame์˜ ๋ชจ๋“  ์—ด์„ ๊ฒ€์‚ฌํ•˜๊ณ  boolean mask ์ƒ์„ฑ ๋ฐ ์ €์žฅ
10
+ for keyword in keywords:
11
+ keyword_mask = pd.Series([False] * len(df), index=df.index)
12
+
13
+ for column in ['copyright', 'character', 'artist', 'meta', 'general']:
14
+ if df[column].dtype == 'object':
15
+ keyword_mask |= df[column].str.contains(keyword, na=False)
16
+
17
+ final_mask &= keyword_mask
18
+
19
+ return df[final_mask]
20
+
21
+ def filter_rows_not_containing_all_keywords(df, keywords):
22
+ # ๋ชจ๋“  ํ‚ค์›Œ๋“œ๋ฅผ ํฌํ•จํ•˜์ง€ ์•Š๋Š” ํ–‰์„ ํ•„ํ„ฐ๋งํ•˜๊ธฐ ์œ„ํ•œ boolean mask ์ดˆ๊ธฐํ™”, df์˜ ์ธ๋ฑ์Šค๋ฅผ ์‚ฌ์šฉ
23
+ final_mask = pd.Series([True] * len(df), index=df.index)
24
+
25
+ # ๊ฐ ํ‚ค์›Œ๋“œ์— ๋Œ€ํ•ด DataFrame์˜ ๋ชจ๋“  ์—ด์„ ๊ฒ€์‚ฌํ•˜๊ณ  boolean mask ์ƒ์„ฑ ๋ฐ ์ €์žฅ
26
+ for keyword in keywords:
27
+ keyword_mask = pd.Series([False] * len(df), index=df.index)
28
+
29
+ for column in ['copyright', 'character', 'artist', 'meta', 'general']:
30
+ if df[column].dtype == 'object':
31
+ keyword_mask |= df[column].str.contains(keyword, na=False)
32
+
33
+ # ๋ชจ๋“  ํ‚ค์›Œ๋“œ๋ฅผ ํฌํ•จํ•˜๋Š” ํ–‰์— ๋Œ€ํ•œ mask๋ฅผ ๋ฐ˜์ „์‹œ์ผœ final_mask์— ์ €์žฅ
34
+ final_mask &= ~keyword_mask
35
+
36
+ return df[final_mask]
37
+
38
+ def process_asterisk_group(df, asterisk_group):
39
+ # ๊ฐ ํ‚ค์›Œ๋“œ ์•ž์˜ '*'๋ฅผ ์ œ๊ฑฐํ•˜๊ณ  ๋งจ ๋’ค์— ',' ์ถ”๊ฐ€
40
+ asterisk_keywords = [keyword.lstrip('*') + ',' for keyword in asterisk_group]
41
+
42
+ # ๊ฐ ํ–‰์— ๋Œ€ํ•ด ์ž„์‹œ ๋ฌธ์ž์—ด search_string์„ ๋งŒ๋“ค๊ณ  ๊ฒ€์ƒ‰ ์ˆ˜ํ–‰
43
+ df['search_string'] = df[['copyright', 'character', 'artist', 'meta', 'general']].apply(lambda x: ' ' + ', '.join(x.astype(str)) + ',', axis=1)
44
+ for keyword in asterisk_keywords:
45
+ df = df[df['search_string'].str.contains(keyword, na=False)]
46
+ df.drop('search_string', axis=1, inplace=True)
47
+
48
+ return df
49
+
50
+ def process_perfect_negative_group(df, perfect_negative_group):
51
+ # ๊ฐ ํ‚ค์›Œ๋“œ ์•ž์˜ '~'๋ฅผ ์ œ๊ฑฐํ•˜๊ณ  ๋งจ ๋’ค์— ',' ์ถ”๊ฐ€
52
+ perfect_negative_keywords = [keyword.lstrip('~') + ',' for keyword in perfect_negative_group]
53
+
54
+ # ๊ฐ ํ–‰์— ๋Œ€ํ•ด ์ž„์‹œ ๋ฌธ์ž์—ด search_string์„ ๋งŒ๋“ฆ
55
+ df['search_string'] = df[['copyright', 'character', 'artist', 'meta', 'general']].apply(lambda x: ' ' + ', '.join(x.astype(str)) + ',', axis=1)
56
+
57
+ # ๋ชจ๋“  ํ‚ค์›Œ๋“œ์— ๋Œ€ํ•œ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ํ•˜๋‚˜์˜ boolean Series๋กœ ๊ฒฐํ•ฉ
58
+ combined_mask = pd.Series([True] * len(df), index=df.index)
59
+ for keyword in perfect_negative_keywords:
60
+ keyword_mask = df['search_string'].str.contains(keyword, na=False)
61
+ combined_mask &= ~keyword_mask
62
+
63
+ # ์ตœ์ข…์ ์œผ๋กœ ์ผ์น˜ํ•˜์ง€ ์•Š๋Š” ํ–‰๋งŒ ํ•„ํ„ฐ๋ง
64
+ df = df[combined_mask]
65
+
66
+ # search_string ์—ด ์ œ๊ฑฐ
67
+ df.drop('search_string', axis=1, inplace=True)
68
+
69
+ return df
70
+
71
+ def search(df, search_request, exclude_request, E, N, S, G):
72
+ if(E == 0):
73
+ df = df[~(df['rating'] == 'e')]
74
+ if(N == 0):
75
+ df = df[~(df['rating'] == 'q')]
76
+ if(S == 0):
77
+ df = df[~(df['rating'] == 's')]
78
+ if(G == 0):
79
+ df = df[~(df['rating'] == 'g')]
80
+ if(len(df) == 0):
81
+ return None
82
+
83
+ #search_request์— ๋Œ€ํ•œ ์ฒ˜๋ฆฌ
84
+ #์ฒ˜๋ฆฌ์ˆœ์„œ normal -> curly -> asterisk
85
+ split_requests = [item.strip() for item in search_request.split(',')]
86
+
87
+ curly_brace_group = [item for item in split_requests if item.startswith('{') and item.endswith('}')]
88
+ asterisk_group = [item for item in split_requests if item.startswith('*')]
89
+ normal_group = [item for item in split_requests if item not in curly_brace_group + asterisk_group]
90
+
91
+ negative_split_requests = [item.strip() for item in exclude_request.split(',')]
92
+ perfect_negative_group = [item for item in negative_split_requests if item.startswith('~')]
93
+ negative_group = [item for item in negative_split_requests if item not in perfect_negative_group]
94
+
95
+ if '' in split_requests:
96
+ split_requests.remove('')
97
+ if '' in negative_split_requests:
98
+ negative_split_requests.remove('')
99
+
100
+ #ํฌ์ง€ํ‹ฐ๋ธŒ
101
+ if split_requests:
102
+ #normal ์ฒ˜๋ฆฌ
103
+ if normal_group:
104
+ df = filter_rows_containing_all_keywords(df, normal_group)
105
+ if(len(df) == 0):
106
+ return None
107
+
108
+ #OR ์ฒ˜๋ฆฌ
109
+ if curly_brace_group:
110
+ for keyword in curly_brace_group:
111
+ or_search_keyword = [item.strip() for item in keyword[1:-1].split('|')]
112
+ results = pd.DataFrame()
113
+ for keyword in or_search_keyword:
114
+ if keyword.startswith('*'):
115
+ keyword = keyword[1:]
116
+ for column in ['copyright', 'character', 'artist', 'meta', 'general']:
117
+ matched_rows = df[df[column].str.contains(keyword, na=False)]
118
+ if not matched_rows.empty:
119
+ results = pd.concat([results, matched_rows])
120
+ break
121
+ del[[df]]
122
+ df = results.copy()
123
+ del[[results]]
124
+ if(len(df) == 0):
125
+ return None
126
+
127
+ #Perfect Matching ์ฒ˜๋ฆฌ
128
+ if asterisk_group:
129
+ df = process_asterisk_group(df,asterisk_group)
130
+ if(len(df) == 0):
131
+ return None
132
+
133
+ #Exclude ์ฒ˜๋ฆฌ
134
+ if negative_split_requests:
135
+ if negative_group:
136
+ df = filter_rows_not_containing_all_keywords(df, negative_group)
137
+ if(len(df) == 0):
138
+ return None
139
+
140
+ if perfect_negative_group:
141
+ df = process_perfect_negative_group(df, perfect_negative_group)
142
+ if(len(df) == 0):
143
+ return None
144
+ return df
145
+
146
+
147
+
148
+
149
+
150
+
151
+
152
+
153
+
154
+
155
+
156
+
157
+
158
+
Danbooru Prompt Selector/TEST2024/artist_dictionary.py ADDED
The diff for this file is too large to render. See raw diff
 
Danbooru Prompt Selector/TEST2024/copyright_dict.py ADDED
The diff for this file is too large to render. See raw diff