File size: 6,020 Bytes
f5cf8c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import customtkinter
import pandas as pd

# ๋ชจ๋“  ํ‚ค์›Œ๋“œ๋ฅผ ํฌํ•จํ•˜๋Š” ํ–‰๋งŒ ํ•„ํ„ฐ๋งํ•˜๋Š” ํ•จ์ˆ˜
def filter_rows_containing_all_keywords(df, keywords):
    # ๋ชจ๋“  ํ‚ค์›Œ๋“œ์— ๋Œ€ํ•œ boolean mask ์ดˆ๊ธฐํ™”, df์˜ ์ธ๋ฑ์Šค๋ฅผ ์‚ฌ์šฉ
    final_mask = pd.Series([True] * len(df), index=df.index)

    # ๊ฐ ํ‚ค์›Œ๋“œ์— ๋Œ€ํ•ด DataFrame์˜ ๋ชจ๋“  ์—ด์„ ๊ฒ€์‚ฌํ•˜๊ณ  boolean mask ์ƒ์„ฑ ๋ฐ ์ €์žฅ
    for keyword in keywords:
        keyword_mask = pd.Series([False] * len(df), index=df.index)

        for column in ['copyright', 'character', 'artist', 'meta', 'general']:
            if df[column].dtype == 'object':
                keyword_mask |= df[column].str.contains(keyword, na=False)

        final_mask &= keyword_mask

    return df[final_mask]

def filter_rows_not_containing_all_keywords(df, keywords):
    # ๋ชจ๋“  ํ‚ค์›Œ๋“œ๋ฅผ ํฌํ•จํ•˜์ง€ ์•Š๋Š” ํ–‰์„ ํ•„ํ„ฐ๋งํ•˜๊ธฐ ์œ„ํ•œ boolean mask ์ดˆ๊ธฐํ™”, df์˜ ์ธ๋ฑ์Šค๋ฅผ ์‚ฌ์šฉ
    final_mask = pd.Series([True] * len(df), index=df.index)

    # ๊ฐ ํ‚ค์›Œ๋“œ์— ๋Œ€ํ•ด DataFrame์˜ ๋ชจ๋“  ์—ด์„ ๊ฒ€์‚ฌํ•˜๊ณ  boolean mask ์ƒ์„ฑ ๋ฐ ์ €์žฅ
    for keyword in keywords:
        keyword_mask = pd.Series([False] * len(df), index=df.index)

        for column in ['copyright', 'character', 'artist', 'meta', 'general']:
            if df[column].dtype == 'object':
                keyword_mask |= df[column].str.contains(keyword, na=False)

        # ๋ชจ๋“  ํ‚ค์›Œ๋“œ๋ฅผ ํฌํ•จํ•˜๋Š” ํ–‰์— ๋Œ€ํ•œ mask๋ฅผ ๋ฐ˜์ „์‹œ์ผœ final_mask์— ์ €์žฅ
        final_mask &= ~keyword_mask

    return df[final_mask]

def process_asterisk_group(df, asterisk_group):
    # ๊ฐ ํ‚ค์›Œ๋“œ ์•ž์˜ '*'๋ฅผ ์ œ๊ฑฐํ•˜๊ณ  ๋งจ ๋’ค์— ',' ์ถ”๊ฐ€
    asterisk_keywords = [keyword.lstrip('*') + ',' for keyword in asterisk_group]

    # ๊ฐ ํ–‰์— ๋Œ€ํ•ด ์ž„์‹œ ๋ฌธ์ž์—ด search_string์„ ๋งŒ๋“ค๊ณ  ๊ฒ€์ƒ‰ ์ˆ˜ํ–‰
    df['search_string'] = df[['copyright', 'character', 'artist', 'meta', 'general']].apply(lambda x: ' ' + ', '.join(x.astype(str)) + ',', axis=1)
    for keyword in asterisk_keywords:
        df = df[df['search_string'].str.contains(keyword, na=False)]
    df.drop('search_string', axis=1, inplace=True)

    return df

def process_perfect_negative_group(df, perfect_negative_group):
    # ๊ฐ ํ‚ค์›Œ๋“œ ์•ž์˜ '~'๋ฅผ ์ œ๊ฑฐํ•˜๊ณ  ๋งจ ๋’ค์— ',' ์ถ”๊ฐ€
    perfect_negative_keywords = [keyword.lstrip('~') + ',' for keyword in perfect_negative_group]

    # ๊ฐ ํ–‰์— ๋Œ€ํ•ด ์ž„์‹œ ๋ฌธ์ž์—ด search_string์„ ๋งŒ๋“ฆ
    df['search_string'] = df[['copyright', 'character', 'artist', 'meta', 'general']].apply(lambda x: ' ' + ', '.join(x.astype(str)) + ',', axis=1)

    # ๋ชจ๋“  ํ‚ค์›Œ๋“œ์— ๋Œ€ํ•œ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ํ•˜๋‚˜์˜ boolean Series๋กœ ๊ฒฐํ•ฉ
    combined_mask = pd.Series([True] * len(df), index=df.index)
    for keyword in perfect_negative_keywords:
        keyword_mask = df['search_string'].str.contains(keyword, na=False)
        combined_mask &= ~keyword_mask

    # ์ตœ์ข…์ ์œผ๋กœ ์ผ์น˜ํ•˜์ง€ ์•Š๋Š” ํ–‰๋งŒ ํ•„ํ„ฐ๋ง
    df = df[combined_mask]

    # search_string ์—ด ์ œ๊ฑฐ
    df.drop('search_string', axis=1, inplace=True)

    return df

def search(df, search_request, exclude_request, E, N, S, G):
    if(E == 0):
        df = df[~(df['rating'] == 'e')]
    if(N == 0):
        df = df[~(df['rating'] == 'q')]
    if(S == 0):
        df = df[~(df['rating'] == 's')]
    if(G == 0):
        df = df[~(df['rating'] == 'g')]
    if(len(df) == 0):
        return None
    
    #search_request์— ๋Œ€ํ•œ ์ฒ˜๋ฆฌ
    #์ฒ˜๋ฆฌ์ˆœ์„œ normal -> curly -> asterisk
    split_requests = [item.strip() for item in search_request.split(',')]

    curly_brace_group = [item for item in split_requests if item.startswith('{') and item.endswith('}')]
    asterisk_group = [item for item in split_requests if item.startswith('*')]
    normal_group = [item for item in split_requests if item not in curly_brace_group + asterisk_group]

    negative_split_requests = [item.strip() for item in exclude_request.split(',')]
    perfect_negative_group = [item for item in negative_split_requests if item.startswith('~')]
    negative_group = [item for item in negative_split_requests if item not in perfect_negative_group]

    if '' in split_requests:
        split_requests.remove('')
    if '' in negative_split_requests:
        negative_split_requests.remove('')

    #ํฌ์ง€ํ‹ฐ๋ธŒ
    if split_requests:
        #normal ์ฒ˜๋ฆฌ
        if normal_group:
            df = filter_rows_containing_all_keywords(df, normal_group)
            if(len(df) == 0):
                return None

        #OR ์ฒ˜๋ฆฌ
        if curly_brace_group:
            for keyword in curly_brace_group:
                or_search_keyword = [item.strip() for item in keyword[1:-1].split('|')]
                results = pd.DataFrame()
                for keyword in or_search_keyword:
                    if keyword.startswith('*'):
                        keyword = keyword[1:]
                    for column in ['copyright', 'character', 'artist', 'meta', 'general']:
                        matched_rows = df[df[column].str.contains(keyword, na=False)]
                        if not matched_rows.empty:
                            results = pd.concat([results, matched_rows])
                            break
                del[[df]]
                df = results.copy()
                del[[results]]
            if(len(df) == 0):
                return None
        
        #Perfect Matching ์ฒ˜๋ฆฌ
        if asterisk_group:
            df = process_asterisk_group(df,asterisk_group)
            if(len(df) == 0):
                return None
    
    #Exclude ์ฒ˜๋ฆฌ
    if negative_split_requests:
        if negative_group:
            df = filter_rows_not_containing_all_keywords(df, negative_group)
            if(len(df) == 0):
                return None
        
        if perfect_negative_group:
            df = process_perfect_negative_group(df, perfect_negative_group)
            if(len(df) == 0):
                return None
    return df