baqu2213's picture
Upload 15 files
f5cf8c0
raw
history blame
6.02 kB
import customtkinter
import pandas as pd
# ๋ชจ๋“  ํ‚ค์›Œ๋“œ๋ฅผ ํฌํ•จํ•˜๋Š” ํ–‰๋งŒ ํ•„ํ„ฐ๋งํ•˜๋Š” ํ•จ์ˆ˜
def filter_rows_containing_all_keywords(df, keywords):
# ๋ชจ๋“  ํ‚ค์›Œ๋“œ์— ๋Œ€ํ•œ boolean mask ์ดˆ๊ธฐํ™”, df์˜ ์ธ๋ฑ์Šค๋ฅผ ์‚ฌ์šฉ
final_mask = pd.Series([True] * len(df), index=df.index)
# ๊ฐ ํ‚ค์›Œ๋“œ์— ๋Œ€ํ•ด DataFrame์˜ ๋ชจ๋“  ์—ด์„ ๊ฒ€์‚ฌํ•˜๊ณ  boolean mask ์ƒ์„ฑ ๋ฐ ์ €์žฅ
for keyword in keywords:
keyword_mask = pd.Series([False] * len(df), index=df.index)
for column in ['copyright', 'character', 'artist', 'meta', 'general']:
if df[column].dtype == 'object':
keyword_mask |= df[column].str.contains(keyword, na=False)
final_mask &= keyword_mask
return df[final_mask]
def filter_rows_not_containing_all_keywords(df, keywords):
# ๋ชจ๋“  ํ‚ค์›Œ๋“œ๋ฅผ ํฌํ•จํ•˜์ง€ ์•Š๋Š” ํ–‰์„ ํ•„ํ„ฐ๋งํ•˜๊ธฐ ์œ„ํ•œ boolean mask ์ดˆ๊ธฐํ™”, df์˜ ์ธ๋ฑ์Šค๋ฅผ ์‚ฌ์šฉ
final_mask = pd.Series([True] * len(df), index=df.index)
# ๊ฐ ํ‚ค์›Œ๋“œ์— ๋Œ€ํ•ด DataFrame์˜ ๋ชจ๋“  ์—ด์„ ๊ฒ€์‚ฌํ•˜๊ณ  boolean mask ์ƒ์„ฑ ๋ฐ ์ €์žฅ
for keyword in keywords:
keyword_mask = pd.Series([False] * len(df), index=df.index)
for column in ['copyright', 'character', 'artist', 'meta', 'general']:
if df[column].dtype == 'object':
keyword_mask |= df[column].str.contains(keyword, na=False)
# ๋ชจ๋“  ํ‚ค์›Œ๋“œ๋ฅผ ํฌํ•จํ•˜๋Š” ํ–‰์— ๋Œ€ํ•œ mask๋ฅผ ๋ฐ˜์ „์‹œ์ผœ final_mask์— ์ €์žฅ
final_mask &= ~keyword_mask
return df[final_mask]
def process_asterisk_group(df, asterisk_group):
# ๊ฐ ํ‚ค์›Œ๋“œ ์•ž์˜ '*'๋ฅผ ์ œ๊ฑฐํ•˜๊ณ  ๋งจ ๋’ค์— ',' ์ถ”๊ฐ€
asterisk_keywords = [keyword.lstrip('*') + ',' for keyword in asterisk_group]
# ๊ฐ ํ–‰์— ๋Œ€ํ•ด ์ž„์‹œ ๋ฌธ์ž์—ด search_string์„ ๋งŒ๋“ค๊ณ  ๊ฒ€์ƒ‰ ์ˆ˜ํ–‰
df['search_string'] = df[['copyright', 'character', 'artist', 'meta', 'general']].apply(lambda x: ' ' + ', '.join(x.astype(str)) + ',', axis=1)
for keyword in asterisk_keywords:
df = df[df['search_string'].str.contains(keyword, na=False)]
df.drop('search_string', axis=1, inplace=True)
return df
def process_perfect_negative_group(df, perfect_negative_group):
# ๊ฐ ํ‚ค์›Œ๋“œ ์•ž์˜ '~'๋ฅผ ์ œ๊ฑฐํ•˜๊ณ  ๋งจ ๋’ค์— ',' ์ถ”๊ฐ€
perfect_negative_keywords = [keyword.lstrip('~') + ',' for keyword in perfect_negative_group]
# ๊ฐ ํ–‰์— ๋Œ€ํ•ด ์ž„์‹œ ๋ฌธ์ž์—ด search_string์„ ๋งŒ๋“ฆ
df['search_string'] = df[['copyright', 'character', 'artist', 'meta', 'general']].apply(lambda x: ' ' + ', '.join(x.astype(str)) + ',', axis=1)
# ๋ชจ๋“  ํ‚ค์›Œ๋“œ์— ๋Œ€ํ•œ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ํ•˜๋‚˜์˜ boolean Series๋กœ ๊ฒฐํ•ฉ
combined_mask = pd.Series([True] * len(df), index=df.index)
for keyword in perfect_negative_keywords:
keyword_mask = df['search_string'].str.contains(keyword, na=False)
combined_mask &= ~keyword_mask
# ์ตœ์ข…์ ์œผ๋กœ ์ผ์น˜ํ•˜์ง€ ์•Š๋Š” ํ–‰๋งŒ ํ•„ํ„ฐ๋ง
df = df[combined_mask]
# search_string ์—ด ์ œ๊ฑฐ
df.drop('search_string', axis=1, inplace=True)
return df
def search(df, search_request, exclude_request, E, N, S, G):
if(E == 0):
df = df[~(df['rating'] == 'e')]
if(N == 0):
df = df[~(df['rating'] == 'q')]
if(S == 0):
df = df[~(df['rating'] == 's')]
if(G == 0):
df = df[~(df['rating'] == 'g')]
if(len(df) == 0):
return None
#search_request์— ๋Œ€ํ•œ ์ฒ˜๋ฆฌ
#์ฒ˜๋ฆฌ์ˆœ์„œ normal -> curly -> asterisk
split_requests = [item.strip() for item in search_request.split(',')]
curly_brace_group = [item for item in split_requests if item.startswith('{') and item.endswith('}')]
asterisk_group = [item for item in split_requests if item.startswith('*')]
normal_group = [item for item in split_requests if item not in curly_brace_group + asterisk_group]
negative_split_requests = [item.strip() for item in exclude_request.split(',')]
perfect_negative_group = [item for item in negative_split_requests if item.startswith('~')]
negative_group = [item for item in negative_split_requests if item not in perfect_negative_group]
if '' in split_requests:
split_requests.remove('')
if '' in negative_split_requests:
negative_split_requests.remove('')
#ํฌ์ง€ํ‹ฐ๋ธŒ
if split_requests:
#normal ์ฒ˜๋ฆฌ
if normal_group:
df = filter_rows_containing_all_keywords(df, normal_group)
if(len(df) == 0):
return None
#OR ์ฒ˜๋ฆฌ
if curly_brace_group:
for keyword in curly_brace_group:
or_search_keyword = [item.strip() for item in keyword[1:-1].split('|')]
results = pd.DataFrame()
for keyword in or_search_keyword:
if keyword.startswith('*'):
keyword = keyword[1:]
for column in ['copyright', 'character', 'artist', 'meta', 'general']:
matched_rows = df[df[column].str.contains(keyword, na=False)]
if not matched_rows.empty:
results = pd.concat([results, matched_rows])
break
del[[df]]
df = results.copy()
del[[results]]
if(len(df) == 0):
return None
#Perfect Matching ์ฒ˜๋ฆฌ
if asterisk_group:
df = process_asterisk_group(df,asterisk_group)
if(len(df) == 0):
return None
#Exclude ์ฒ˜๋ฆฌ
if negative_split_requests:
if negative_group:
df = filter_rows_not_containing_all_keywords(df, negative_group)
if(len(df) == 0):
return None
if perfect_negative_group:
df = process_perfect_negative_group(df, perfect_negative_group)
if(len(df) == 0):
return None
return df