shogiapp / tools.py
pizzagatakasugi's picture
Update tools.py
1ce2752
import pandas as pd
KIFU_TO_SQUARE_NAMES = [
'1一', '1二', '1三', '1四', '1五', '1六', '1七', '1八', '1九',
'2一', '2二', '2三', '2四', '2五', '2六', '2七', '2八', '2九',
'3一', '3二', '3三', '3四', '3五', '3六', '3七', '3八', '3九',
'4一', '4二', '4三', '4四', '4五', '4六', '4七', '4八', '4九',
'5一', '5二', '5三', '5四', '5五', '5六', '5七', '5八', '5九',
'6一', '6二', '6三', '6四', '6五', '6六', '6七', '6八', '6九',
'7一', '7二', '7三', '7四', '7五', '7六', '7七', '7八', '7九',
'8一', '8二', '8三', '8四', '8五', '8六', '8七', '8八', '8九',
'9一', '9二', '9三', '9四', '9五', '9六', '9七', '9八', '9九',
]
KIFU_FROM_SQUARE_NAMES = [
'11', '12', '13', '14', '15', '16', '17', '18', '19',
'21', '22', '23', '24', '25', '26', '27', '28', '29',
'31', '32', '33', '34', '35', '36', '37', '38', '39',
'41', '42', '43', '44', '45', '46', '47', '48', '49',
'51', '52', '53', '54', '55', '56', '57', '58', '59',
'61', '62', '63', '64', '65', '66', '67', '68', '69',
'71', '72', '73', '74', '75', '76', '77', '78', '79',
'81', '82', '83', '84', '85', '86', '87', '88', '89',
'91', '92', '93', '94', '95', '96', '97', '98', '99',
]
def nomalize_precedence_name(df):
#先手の対局者の名前から段位、タイトル名を削除する
for x in range(len(df)):
df["precedence_name"].iloc[x] = df["precedence_name"].iloc[x].replace(" ","").replace(" ","").replace("\u3000","")
if df["precedence_name"].iloc[x].endswith("段"):
df["precedence_name"].iloc[x] = df["precedence_name"].iloc[x][:-2]
df["precedence_name"].iloc[x] = df["precedence_name"].iloc[x].replace("十七世名人","").replace("十八世名人","").replace("十九世名人","")
df["precedence_name"].iloc[x] = df["precedence_name"].iloc[x].replace("王将","").replace("王座","").replace("名人","").replace("竜王","").replace("棋聖","").replace("叡王","").replace("王位","").replace("棋王","")
df["precedence_name"].iloc[x] = df["precedence_name"].iloc[x].replace("・","").replace("二冠","").replace("三冠","")
return df
def nomalize_kif(df):
for x in range(len(df)):
kif = eval(df.iloc[x]["kif"])
#kifの正規化処理 手数、消費時間を削除する
cnt = -1
for y in kif:
cnt += 1
while(1):
if "0" <= y[0] <= "9":
y = y[1:]
kif[cnt] = y
else:
break
kif[cnt] = kif[cnt].replace("\u3000","")
for z in range(len(y)):
if y[z] == "(":
kif[cnt] = y[:z]
break
kifs = ""
for i in kif:
kifs += i.replace("\u3000","")
df["kif"].iloc[x] = kifs
return df
def nomalize_comment(df):
#文章中のword省略処理
for cnt in range(len(df["output"])):
x = df["output"].iloc[cnt]
read = x.split("。")
#print(read)
line = ""
for z in read:
if "期" in z or "出身" in z or "優勝" in z or "受賞" in z or "回" in z or "記録" in z or "棋士番号" in z or "勝" in z or "敗" in z or "名人" in z:
pass
elif "時" in z or "分" in z or "成績" in z or "棋戦" in z or "段" in z or "本日" in z or "立会" in z or "ABEMA" in z or "第" in z or "本局" in z:
pass
elif "対局" in z or "永世" in z:
pass
elif z == "":
pass
else:
#print(z)
line += z+"。"
df["output"].iloc[cnt] = line
return df
def accuracy_bestlist(df):
cnt2 = 0
num = 0
for z in range(len(df)):
blist = eval(df["bestlist"].iloc[z])
b2list = eval(df["best2list"].iloc[z])
te = eval(df["kif"].iloc[z])
#print(blist[0][0])
#print(b2list[0][0])
cnt = 0
for x in range(1,len(te)):
try:
if blist[x-1][0] in te[x] or b2list[x-1][0] in te[x]:
cnt += 1
#print(te[x],blist[x][0],b2list[x][0])
except Exception as e:
pass
if cnt == 0:
print("accuracy = 0",z)
print("z = ",z," accuracy = ",cnt/len(te))
cnt2 += cnt/len(te)
num += 1
print("mean_acuuracy",cnt2/num)
def nomalize_sfen(s):
flag = 0
movelist = []
for x in range(len(s)):
if x < 2:
continue
if len(s[x]) < 30 and flag == 0:
#半角の指し手を全角に変換する
temp = s[x].split()
num = temp[1][0] + temp[1][1]
for y in range(len(KIFU_FROM_SQUARE_NAMES)):
if num == KIFU_FROM_SQUARE_NAMES[y]:
sq = KIFU_TO_SQUARE_NAMES[y]
word = sq+temp[1][2:]
word = word.replace("竜","龍").replace("成銀","全").replace("成桂","圭").replace("成香","杏")
if s[x].split()[1] not in ["投了" , "千日手" , "持将棋" , "反則勝ち"]:
movelist.append(word)
else:
movelist.append(s[x].split()[1])
flag = 1
return movelist
def make_triplets(df, column):
# 重複を除いたユニークな文章リストを作成
triplets = []
for x in range(len(df)):
anchor = df.iloc[x]
# Anchorと同じではない文章をPositiveとして選択
num = df.loc[(df[column] == anchor[column]) & (df["kif"] != anchor["kif"])].sample(n=1).index
# print(df.loc[num])
positive = df.loc[num]["kif"].values[0]
# Anchorと異なる文章をNegativeとして選択
num2 = df.loc[(df[column] != anchor[column]) & (df["kif"] != anchor["kif"])].sample(n=1).index
# print(df.loc[num2])
negative = df.loc[num2]["kif"].values[0]
triplets.append((anchor["kif"], positive, negative,df.loc[num][column].values[0],df.loc[num2][column].values[0]))
def add_symbol(df,column):
teban ="▲"
kif = ""
for x in range(len(df)):
for y in df[column].iloc[x]:
if y in ["0","1","2","3","4","5","6","7","8","9","同",0,1,2,3,4,5,6,7,8,9]:
kif += teban + y
if teban =="▲":
teban = "△"
else:
teban = "▲"
else:
kif += y
df[column].iloc[x] = kif
kif = ""
return df