import re def check_integer(string): if string.isdigit(): return True for char in string: if char.isdigit(): return True return False def check_alpha(string): for char in string: if not ((char >= 'a' and char <= 'z') or (char >= 'A' and char <= 'Z') or char == ' '): return False return True def is_chinese_name(text): substrings = [text[:1], text[:2], text[:3], text[:4], text[:5], text[:6], text[:7], text[:8]] if len(text) > 40: return False for substring in substrings: upper_case_sum = 0 lower_case_sum = 0 space = 0 for char in substring: if char >= 'A' and char <= 'Z': upper_case_sum += 1 if char >= 'a' and char <= 'z': lower_case_sum += 1 if char == ' ': space += 1 if upper_case_sum >= 3 and lower_case_sum >= 2 and space >= 1: return True return False def seperate_name(text): word1 = "" word2 = "" word3 = "" name = text.replace(' ', '') # l = 0 # space = 0 # for char in text: # if char >= 'A' and char <= 'Z': # l += 1 # if char != ' ': # space += 1 # else: # word2 = text[l-1:space] # word3 = text[space+1::] # word1 = text[:l - 2] # # only two characters # if space == len(text): # word1 = text[:l-1] # word2 = text[l-1::] # name = word1 + ' ' + word2 # else: # name = word1 + ' ' + word2 + ' ' + word3 return name.lower() def validate_hkid(hkid): # omit parentheses hkid = hkid.replace('(', '').replace(')', '') weight = [9, 8, 7, 6, 5, 4, 3, 2, 1] values = list('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ') + [None] match = re.match('^([A-Z])?([A-Z])([0-9]{6})([0-9A])$', hkid) if not match: return False hkidArr = [] for g in match.groups(): hkidArr += list(g) if g else [g] r = sum([values.index(i) * w for i, w in zip(hkidArr, weight)]) % 11 return r == 0 def format_HKID(hkid): hkid = hkid.replace('(', '').replace(')', '') idlen = len(hkid) match = re.match('^([A-Z])?([A-Z])([0-9]{6})([0-9A])$', hkid) hkidArr = [] for g in match.groups(): hkidArr += list(g) if g else [g] formatted_hkid = '' index = 0 for char in hkidArr: if char != None: formatted_hkid += char if index == idlen - 1: formatted_hkid += '(' if index == idlen: formatted_hkid += ')' index += 1 return formatted_hkid def format_issuedate(issuedate): formatted_issuedate = issuedate.replace('(', '').replace(')', '') formatted_issuedate = formatted_issuedate.replace('C', '') return formatted_issuedate def is_string_integer(string): try: int(string) # Attempt to convert the string to an integer return True # If successful, the string only contains integers except ValueError: return False # If a ValueError occurs, the string doesn't only contain integers def check_issuedate(text): if len(text) < 5 and len(text) > 7 : return False if len(text) > 0 and text[0] == '(': text = text.replace('(', '') elif len(text) > 0 and text[0] == 'C': text = text.replace('C', '') if len(text) > 0 and text[-1] == ')': text = text.replace(')', '') if len(text) != 5: return False if text[2] != '-': return False text = text.replace('-', '') if not is_string_integer(text): return False return True def print_info(name, valid_hkid, hkid, issuedate): print(f'Name: {name}') print(f'HKID: {hkid} and validity: {valid_hkid}') print(f'Date of issue: {issuedate}') def is_comma_present(string): return ',' in string def longest_common_subsequence(s1, s2): m, n = len(s1), len(s2) # Create a 2D table to store the lengths of common subsequences dp = [[0] * (n + 1) for _ in range(m + 1)] # Build the table in a bottom-up manner for i in range(1, m + 1): for j in range(1, n + 1): if s1[i - 1] == s2[j - 1]: dp[i][j] = dp[i - 1][j - 1] + 1 else: dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]) # Retrieve the longest common subsequence lcs = [] i, j = m, n while i > 0 and j > 0: if s1[i - 1] == s2[j - 1]: lcs.append(s1[i - 1]) i -= 1 j -= 1 elif dp[i - 1][j] > dp[i][j - 1]: i -= 1 else: j -= 1 # Reverse the sequence to get the correct order lcs.reverse() return ''.join(lcs) def combine_info(info1, info2): combined_info = [] print(info1) print(info2) if info1[0] == info2[0]: combined_info.append(info1[0]) # Append the variable as-is if it's the same in both models elif info1[0] == '': combined_info.append(info2[0]) elif info2[0] == '': combined_info.append(info1[0]) else: subseq = longest_common_subsequence(info1[0], info2[0]) combined_info.append(subseq) if info1[1] == 'True' and info2[1] == 'False': combined_info.append(info1[1]) combined_info.append(info1[2]) elif info1[1] == 'False' and info2[1] == 'True': combined_info.append(info2[1]) combined_info.append(info2[2]) elif info1[1] == 'True' and info2[1] == 'True': if info1[2] == info2[2]: combined_info.append(info1[1]) combined_info.append(info1[2]) else: combined_info.append('False') combined_info.append('Suspicous HKID') if info1[3] == info2[3]: combined_info.append(info1[3]) else: combined_info.append('Unmatched issuedate') # print(combined_info) return combined_info # info1 = ['', 'True', 'Z683365(5)', '06-96'] # info2 = ['lok wing', 'False', 'Z68336505)', '06-96'] # info = combine_info(info1, info2) # print_info(*info) # text = 'TAMKing Man' # if is_comma_present(text): # text = text.replace(',', '') # if not check_integer(text): # if check_alpha(text) and is_chinese_name(text): # name = seperate_name(text)