Spaces:
Running
Running
Commit
·
565028a
1
Parent(s):
26ed1ea
Create tools.py
Browse files
tools.py
ADDED
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
KIFU_TO_SQUARE_NAMES = [
|
4 |
+
'1一', '1二', '1三', '1四', '1五', '1六', '1七', '1八', '1九',
|
5 |
+
'2一', '2二', '2三', '2四', '2五', '2六', '2七', '2八', '2九',
|
6 |
+
'3一', '3二', '3三', '3四', '3五', '3六', '3七', '3八', '3九',
|
7 |
+
'4一', '4二', '4三', '4四', '4五', '4六', '4七', '4八', '4九',
|
8 |
+
'5一', '5二', '5三', '5四', '5五', '5六', '5七', '5八', '5九',
|
9 |
+
'6一', '6二', '6三', '6四', '6五', '6六', '6七', '6八', '6九',
|
10 |
+
'7一', '7二', '7三', '7四', '7五', '7六', '7七', '7八', '7九',
|
11 |
+
'8一', '8二', '8三', '8四', '8五', '8六', '8七', '8八', '8九',
|
12 |
+
'9一', '9二', '9三', '9四', '9五', '9六', '9七', '9八', '9九',
|
13 |
+
]
|
14 |
+
KIFU_FROM_SQUARE_NAMES = [
|
15 |
+
'11', '12', '13', '14', '15', '16', '17', '18', '19',
|
16 |
+
'21', '22', '23', '24', '25', '26', '27', '28', '29',
|
17 |
+
'31', '32', '33', '34', '35', '36', '37', '38', '39',
|
18 |
+
'41', '42', '43', '44', '45', '46', '47', '48', '49',
|
19 |
+
'51', '52', '53', '54', '55', '56', '57', '58', '59',
|
20 |
+
'61', '62', '63', '64', '65', '66', '67', '68', '69',
|
21 |
+
'71', '72', '73', '74', '75', '76', '77', '78', '79',
|
22 |
+
'81', '82', '83', '84', '85', '86', '87', '88', '89',
|
23 |
+
'91', '92', '93', '94', '95', '96', '97', '98', '99',
|
24 |
+
]
|
25 |
+
|
26 |
+
def nomalize_precedence_name(df):
|
27 |
+
#先手の対局者の名前から段位、タイトル名を削除する
|
28 |
+
for x in range(len(df)):
|
29 |
+
df["precedence_name"].iloc[x] = df["precedence_name"].iloc[x].replace(" ","").replace(" ","").replace("\u3000","")
|
30 |
+
if df["precedence_name"].iloc[x].endswith("段"):
|
31 |
+
df["precedence_name"].iloc[x] = df["precedence_name"].iloc[x][:-2]
|
32 |
+
df["precedence_name"].iloc[x] = df["precedence_name"].iloc[x].replace("十七世名人","").replace("十八世名人","").replace("十九世名人","")
|
33 |
+
df["precedence_name"].iloc[x] = df["precedence_name"].iloc[x].replace("王将","").replace("王座","").replace("名人","").replace("竜王","").replace("棋聖","").replace("叡王","").replace("王位","").replace("棋王","")
|
34 |
+
df["precedence_name"].iloc[x] = df["precedence_name"].iloc[x].replace("・","").replace("二冠","").replace("三冠","")
|
35 |
+
return df
|
36 |
+
|
37 |
+
def nomalize_kif(df):
|
38 |
+
for x in range(len(df)):
|
39 |
+
kif = eval(df.iloc[x]["kif"])
|
40 |
+
#kifの正規化処理 手数、消費時間を削除する
|
41 |
+
cnt = -1
|
42 |
+
for y in kif:
|
43 |
+
cnt += 1
|
44 |
+
while(1):
|
45 |
+
if "0" <= y[0] <= "9":
|
46 |
+
y = y[1:]
|
47 |
+
kif[cnt] = y
|
48 |
+
else:
|
49 |
+
break
|
50 |
+
kif[cnt] = kif[cnt].replace("\u3000","")
|
51 |
+
for z in range(len(y)):
|
52 |
+
if y[z] == "(":
|
53 |
+
kif[cnt] = y[:z]
|
54 |
+
break
|
55 |
+
kifs = ""
|
56 |
+
for i in kif:
|
57 |
+
kifs += i.replace("\u3000","")
|
58 |
+
df["kif"].iloc[x] = kifs
|
59 |
+
return df
|
60 |
+
|
61 |
+
def nomalize_comment(df):
|
62 |
+
#文章中のword省略処理
|
63 |
+
for cnt in range(len(df["output"])):
|
64 |
+
x = df["output"].iloc[cnt]
|
65 |
+
read = x.split("。")
|
66 |
+
#print(read)
|
67 |
+
line = ""
|
68 |
+
for z in read:
|
69 |
+
if "期" in z or "出身" in z or "優勝" in z or "受賞" in z or "回" in z or "記録" in z or "棋士番号" in z or "勝" in z or "敗" in z or "名人" in z:
|
70 |
+
pass
|
71 |
+
elif "時" in z or "分" in z or "成績" in z or "棋戦" in z or "段" in z or "本日" in z or "立会" in z or "ABEMA" in z or "第" in z or "本局" in z:
|
72 |
+
pass
|
73 |
+
elif "対局" in z or "永世" in z:
|
74 |
+
pass
|
75 |
+
elif z == "":
|
76 |
+
pass
|
77 |
+
else:
|
78 |
+
#print(z)
|
79 |
+
line += z+"。"
|
80 |
+
df["output"].iloc[cnt] = line
|
81 |
+
return df
|
82 |
+
|
83 |
+
def accuracy_bestlist(df):
|
84 |
+
cnt2 = 0
|
85 |
+
num = 0
|
86 |
+
for z in range(len(df)):
|
87 |
+
blist = eval(df["bestlist"].iloc[z])
|
88 |
+
b2list = eval(df["best2list"].iloc[z])
|
89 |
+
te = eval(df["kif"].iloc[z])
|
90 |
+
#print(blist[0][0])
|
91 |
+
#print(b2list[0][0])
|
92 |
+
cnt = 0
|
93 |
+
for x in range(1,len(te)):
|
94 |
+
try:
|
95 |
+
if blist[x-1][0] in te[x] or b2list[x-1][0] in te[x]:
|
96 |
+
cnt += 1
|
97 |
+
#print(te[x],blist[x][0],b2list[x][0])
|
98 |
+
except Exception as e:
|
99 |
+
pass
|
100 |
+
if cnt == 0:
|
101 |
+
print("accuracy = 0",z)
|
102 |
+
print("z = ",z," accuracy = ",cnt/len(te))
|
103 |
+
cnt2 += cnt/len(te)
|
104 |
+
num += 1
|
105 |
+
print("mean_acuuracy",cnt2/num)
|
106 |
+
|
107 |
+
def nomalize_sfen(s):
|
108 |
+
flag = 0
|
109 |
+
movelist = []
|
110 |
+
for x in range(len(s)):
|
111 |
+
if x < 2:
|
112 |
+
continue
|
113 |
+
if len(s[x]) < 30 and flag == 0:
|
114 |
+
#半角の指し手を全角に変換する
|
115 |
+
temp = s[x].split()
|
116 |
+
num = temp[1][0] + temp[1][1]
|
117 |
+
for y in range(len(KIFU_FROM_SQUARE_NAMES)):
|
118 |
+
if num == KIFU_FROM_SQUARE_NAMES[y]:
|
119 |
+
sq = KIFU_TO_SQUARE_NAMES[y]
|
120 |
+
word = sq+temp[1][2:]
|
121 |
+
word = word.replace("竜","龍").replace("成銀","全").replace("成桂","圭").replace("成香","杏")
|
122 |
+
if s[x].split()[1] not in ["投了" , "千日手" , "持将棋" , "反則勝ち"]:
|
123 |
+
movelist.append(word)
|
124 |
+
else:
|
125 |
+
movelist.append(s[x].split()[1])
|
126 |
+
flag = 1
|
127 |
+
return movelist
|
128 |
+
|
129 |
+
def make_triplets(df, column):
|
130 |
+
# 重複を除いたユニークな文章リストを作成
|
131 |
+
triplets = []
|
132 |
+
for x in range(len(df)):
|
133 |
+
anchor = df.iloc[x]
|
134 |
+
# Anchorと同じではない文章をPositiveとして選択
|
135 |
+
num = df.loc[(df[column] == anchor[column]) & (df["kif"] != anchor["kif"])].sample(n=1).index
|
136 |
+
# print(df.loc[num])
|
137 |
+
positive = df.loc[num]["kif"].values[0]
|
138 |
+
|
139 |
+
# Anchorと異なる文章をNegativeとして選択
|
140 |
+
num2 = df.loc[(df[column] != anchor[column]) & (df["kif"] != anchor["kif"])].sample(n=1).index
|
141 |
+
# print(df.loc[num2])
|
142 |
+
negative = df.loc[num2]["kif"].values[0]
|
143 |
+
|
144 |
+
triplets.append((anchor["kif"], positive, negative,df.loc[num][column].values[0],df.loc[num2][column].values[0]))
|
145 |
+
|
146 |
+
def add_symbol(df,column):
|
147 |
+
teban ="▲"
|
148 |
+
kif = ""
|
149 |
+
for x in range(len(df)):
|
150 |
+
for y in df[column].iloc[x]:
|
151 |
+
if y in ["0","1","2","3","4","5","6","7","8","9","同",0,1,2,3,4,5,6,7,8,9]:
|
152 |
+
kif += teban + y
|
153 |
+
if teban =="▲":
|
154 |
+
teban = "△"
|
155 |
+
else:
|
156 |
+
teban = "▲"
|
157 |
+
else:
|
158 |
+
kif += y
|
159 |
+
df[column].iloc[x] = kif
|
160 |
+
kif = ""
|
161 |
+
return df
|