Spaces:

nkasmanoff
/

SearchingFace

Runtime error

App Files Files Community

SearchingFace / helpers.py

nkasmanoff

Update helpers.py

fb6da00 over 1 year ago

raw

history blame contribute delete

1.98 kB

	import requests

	def clean_up_tags(tags_list):
	tags_cleaned = []
	for tag in tags_list:
	if ':' in tag:
	tag = tag.split(':')[1]

	tags_cleaned.append(tag)

	return ", ".join(tags_cleaned)



	def check_api_url(url):
	"""
	This function checks to see if "api" is present in the URL between ".co" and "/datasets". If not, it inserts "api" in the correct position.

	Args:
	url (str): A URL string

	Returns:
	str: A URL string with "api" inserted if necessary
	"""
	# Split the URL into three parts based on the location of ".co" and "/datasets"
	parts = url.split(".co")
	first_part = parts[0] + ".co"
	last_part = parts[1]
	last_parts = last_part.split("/datasets")
	middle_part = ""
	if len(last_parts) > 1 and "/api" not in last_parts[0]:
	middle_part = "/api"
	# Concatenate the three parts to form the final URL
	new_url = first_part + middle_part + last_parts[0] + "/datasets" + last_parts[1]
	return new_url



	def get_dataset_metadata(dataset_url):
	retrieved_metadata = {}
	dataset_url = check_api_url(dataset_url)
	keys_to_retrieve = ['id','description', 'tags']
	response = requests.get(dataset_url)
	if response.status_code == 200:
	response_json = response.json()
	for key in keys_to_retrieve:
	if key in response_json:
	retrieved_metadata[key] = response_json[key]

	return retrieved_metadata


	def get_dataset_readme(dataset_url):
	retrieved_metadata = {}
	metadata_url = check_api_url(dataset_url)
	readme_url = dataset_url + '/raw/main/README.md'
	readme_response = requests.get(readme_url)
	metadata_response = requests.get(metadata_url)
	if readme_response.status_code == 200:
	response_text = readme_response.text
	dataset_id = metadata_response.json()['id']
	retrieved_metadata = {'id': dataset_id, 'README': response_text}

	return retrieved_metadata