Desm0nt
/

TestFinetunes

Inference Endpoints

Model card Files Files and versions Community

TestFinetunes / convert.py

Desm0nt's picture

Upload convert.py

c4edf1f verified 5 months ago

history blame contribute delete

1.59 kB

	import os
	import json

	folder_path = "d:\\Dropbox\\YandexDisk\\Dataset\\Human_Captions_done\\cleaned\\"
	base_folder = "d:\\Dropbox\\YandexDisk\\Dataset\\"
	tags_folder_path = "d:\\Dropbox\\YandexDisk\\Dataset\\Human_Captions_basetxt\\"
	json_data = []
	id_counter = 0

	for filename in os.listdir(folder_path):
	if filename.endswith(".jpg"):
	image_name = os.path.splitext(filename)[0]
	image_path = os.path.join(folder_path, filename)
	txt_path = os.path.join(folder_path, f"{image_name}.txt")

	if os.path.exists(txt_path):
	with open(txt_path, "r") as f:
	txt_content = f.read()

	tags_path = os.path.join(tags_folder_path, f"{image_name}.txt")
	if os.path.exists(tags_path):
	with open(tags_path, "r") as f:
	tags_content = f.read().strip()
	prompt = f"<ImageHere> Make a caption that describe this image. Here is the tags for this image: {tags_content}"
	else:
	prompt = "<ImageHere> Make a caption that describe this image"

	json_object = {
	"id": str(id_counter),
	"image": [image_path],
	"conversations": [
	{"from": "user", "value": prompt},
	{"from": "assistant", "value": txt_content}
	]
	}

	json_data.append(json_object)
	id_counter += 1

	with open(os.path.join(base_folder, "output.json"), "w") as f:
	json.dump(json_data, f, indent=4)