File size: 6,354 Bytes
1f72938
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
import re

def check_integer(string):
    if string.isdigit():
        return True

    for char in string:
        if char.isdigit():
            return True
    return False

def check_alpha(string):
    for char in string:
        if not ((char >= 'a' and char <= 'z') or (char >= 'A' and char <= 'Z') or char == ' '):
            return False
    return True

def is_chinese_name(text):
    substrings = [text[:1], text[:2], text[:3], text[:4], text[:5], text[:6], text[:7], text[:8]]

    if len(text) > 40:
        return False
    
    for substring in substrings:
        upper_case_sum = 0
        lower_case_sum = 0
        space = 0
        for char in substring:
            if char >= 'A' and char <= 'Z':
                upper_case_sum += 1
            if char >= 'a' and char <= 'z':
                lower_case_sum += 1
            if char == ' ':
                space += 1
        if upper_case_sum >= 3 and lower_case_sum >= 2 and space >= 1:
            return True
    
    return False

def seperate_name(text):
    word1 = ""
    word2 = ""
    word3 = ""
    name = text.replace(' ', '')
    # l = 0
    # space = 0
    # for char in text:
    #     if char >= 'A' and char <= 'Z':
    #         l += 1
    #     if char != ' ':
    #         space += 1
    #     else:
    #         word2 = text[l-1:space]
    #         word3 = text[space+1::]
    # word1 = text[:l - 2]

    # # only two characters
    # if space == len(text):
    #     word1 = text[:l-1]
    #     word2 = text[l-1::]
    #     name = word1 + ' ' + word2
    # else:
    #     name = word1 + ' ' + word2 + ' ' + word3
    return name.lower()

def validate_hkid(hkid): # omit parentheses
    hkid = hkid.replace('(', '').replace(')', '')

    weight = [9, 8, 7, 6, 5, 4, 3, 2, 1]
    values = list('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ') + [None]

    match = re.match('^([A-Z])?([A-Z])([0-9]{6})([0-9A])$', hkid)
    if not match: return False

    hkidArr = []
    for g in match.groups():
        hkidArr += list(g) if g else [g]

    r = sum([values.index(i) * w for i, w in zip(hkidArr, weight)]) % 11

    return r == 0

def format_HKID(hkid):
    hkid = hkid.replace('(', '').replace(')', '')
    idlen = len(hkid)
    
    match = re.match('^([A-Z])?([A-Z])([0-9]{6})([0-9A])$', hkid)

    hkidArr = []
    for g in match.groups():
        hkidArr += list(g) if g else [g]

    formatted_hkid = ''

    index = 0
    for char in hkidArr:
        if char != None:
            formatted_hkid += char
        if index == idlen - 1:
            formatted_hkid += '('
        if index == idlen:
            formatted_hkid += ')'
        index += 1

    return formatted_hkid

def format_issuedate(issuedate):
    formatted_issuedate = issuedate.replace('(', '').replace(')', '')
    formatted_issuedate = formatted_issuedate.replace('C', '')
    return formatted_issuedate

def is_string_integer(string):
    try:
        int(string)  # Attempt to convert the string to an integer
        return True  # If successful, the string only contains integers
    except ValueError:
        return False  # If a ValueError occurs, the string doesn't only contain integers

def check_issuedate(text):
    if len(text) < 5 and len(text) > 7 :
        return False
    if len(text) > 0 and text[0] == '(':
        text = text.replace('(', '')
    elif len(text) > 0 and text[0] == 'C':
        text = text.replace('C', '')
    if len(text) > 0 and text[-1] == ')':
        text = text.replace(')', '')
    if len(text) != 5:
        return False
    if text[2] != '-':
        return False
    text = text.replace('-', '')
    if not is_string_integer(text):
        return False
    return True

def print_info(name, valid_hkid, hkid, issuedate):
    print(f'Name: {name}')
    print(f'HKID: {hkid} and validity: {valid_hkid}')
    print(f'Date of issue: {issuedate}')

def is_comma_present(string):
    return ',' in string

def longest_common_subsequence(s1, s2):
    m, n = len(s1), len(s2)
    # Create a 2D table to store the lengths of common subsequences
    dp = [[0] * (n + 1) for _ in range(m + 1)]

    # Build the table in a bottom-up manner
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            if s1[i - 1] == s2[j - 1]:
                dp[i][j] = dp[i - 1][j - 1] + 1
            else:
                dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])

    # Retrieve the longest common subsequence
    lcs = []
    i, j = m, n
    while i > 0 and j > 0:
        if s1[i - 1] == s2[j - 1]:
            lcs.append(s1[i - 1])
            i -= 1
            j -= 1
        elif dp[i - 1][j] > dp[i][j - 1]:
            i -= 1
        else:
            j -= 1

    # Reverse the sequence to get the correct order
    lcs.reverse()
    return ''.join(lcs)

def combine_info(info1, info2):
    combined_info = []

    print(info1)
    print(info2)

    if info1[0] == info2[0]:
        combined_info.append(info1[0])  # Append the variable as-is if it's the same in both models
    elif info1[0] == '':
        combined_info.append(info2[0])
    elif info2[0] == '':
        combined_info.append(info1[0])
    else:
        subseq = longest_common_subsequence(info1[0], info2[0])
        combined_info.append(subseq)

    if info1[1] == 'True' and info2[1] == 'False':
        combined_info.append(info1[1])
        combined_info.append(info1[2])
    elif info1[1] == 'False' and info2[1] == 'True':
        combined_info.append(info2[1])
        combined_info.append(info2[2])
    elif info1[1] == 'True' and info2[1] == 'True':
        if info1[2] == info2[2]:
            combined_info.append(info1[1])
            combined_info.append(info1[2])
    else:
        combined_info.append('False')
        combined_info.append('Suspicous HKID')

    if info1[3] == info2[3]:
        combined_info.append(info1[3])
    else:
        combined_info.append('Unmatched issuedate')

    # print(combined_info)

    return combined_info



# info1 = ['', 'True', 'Z683365(5)', '06-96']
# info2 = ['lok wing', 'False', 'Z68336505)', '06-96']
# info = combine_info(info1, info2)
# print_info(*info)


# text = 'TAMKing Man'
# if is_comma_present(text):
#             text = text.replace(',', '')
#             if not check_integer(text):
#                 if check_alpha(text) and is_chinese_name(text):
#                     name = seperate_name(text)