Spaces:

OneFi
/

hf-similarity-check

Sleeping

Mitul Mohammad Abdullah Al Mukit

updates

e029c8d over 1 year ago

2.23 kB

	from cnocr import CnOcr
	import openai
	from dotenv import load_dotenv
	import os
	import json

	def model0(path):
	ocr = CnOcr(rec_model_name='en_PP-OCRv3')
	out = ocr.ocr(path)

	print(out)

	load_dotenv()
	openai.api_key = os.environ.get("data-extraction-api")

	invalid_list = [' ',',']
	data_set_1 = []
	for item in out:
	if item['text'] not in invalid_list:
	data_set_1.append(item['text'])

	completion = openai.ChatCompletion.create(
	model = "gpt-3.5-turbo",
	temperature = 0,
	messages = [
	{"role": "system", "content": "You are an AI assistant for extracting data from HKID card with following information \
	(name, HKID number, date of issue) from HKID card. Uppercase and lowercase letters are the same. Store the results in \
	dictionary format"},
	{"role": "user", "content": f"Extract data from the following set of text: {data_set_1}. \
	You have three types of data to extract. \
	1. id card holder full name (it noramlly is a chinese name, including surname and family \
	name in English spelling, and it may be separate in different fields in the data set for surname and family name \
	sometimes) \
	2. issue date (should be a date with month and day, e.g. 19-97 is the required format, but 26-11-18 is not \
	because date of issue of have 5 characters) Only choose valid format!!! \
	3. HKID number (The standard format of HKID number is @123456(#) e.g. A123456(7) is a valid HKID number. \
	(a) @ represents any one or two capital letters of the alphabet. \
	(b) # is the check digit which has 11 possible values from 0 to 9 and A.) \
	Remember to include the check digit with () \
	Only reply a dictionary. No need to add other words or explanation. Use double quote for dictionary."},
	]
	)

	data = completion['choices'][0]['message']['content']

	print(data)

	id_data = json.loads(data)

	print(id_data)
	return
	# return [name, valid_hkid, hkid, issuedate]

	model0('dontTouchMe/IMG_4499.jpg')