Spaces:
Build error
Build error
import os | |
import unittest | |
from unittest.mock import patch | |
from io import BytesIO | |
from utils.utils import download_and_extract | |
from utils.utils import get_checksum | |
from utils.utils import merge_file_lists | |
from utils.utils import reindex_file_list_keys | |
from utils.utils import remove_ids_from_file_list | |
from utils.utils import deduplicate_splits | |
class TestMergeFileListFunctions(unittest.TestCase): | |
def test_merge_file_lists(self): | |
# Define some example input dictionaries | |
file_list_1 = {1: 'file1.txt', 2: 'file2.txt'} | |
file_list_2 = {3: 'file3.txt', 4: 'file4.txt'} | |
file_list_3 = {5: 'file5.txt', 6: 'file6.txt'} | |
# Call the merge_file_lists function with the example input | |
merged_file_list = merge_file_lists([file_list_1, file_list_2, file_list_3]) | |
# Check that the merged dictionary has the correct length and keys/values | |
self.assertEqual(len(merged_file_list), 6) | |
self.assertEqual(merged_file_list[0], 'file1.txt') | |
self.assertEqual(merged_file_list[1], 'file2.txt') | |
self.assertEqual(merged_file_list[2], 'file3.txt') | |
self.assertEqual(merged_file_list[3], 'file4.txt') | |
self.assertEqual(merged_file_list[4], 'file5.txt') | |
self.assertEqual(merged_file_list[5], 'file6.txt') | |
def test_reindex_file_list_keys(self): | |
file_list = {'a': {'id': 1, 'name': 'file1'}, 'b': {'id': 2, 'name': 'file2'}} | |
expected_reindexed = {0: {'id': 1, 'name': 'file1'}, 1: {'id': 2, 'name': 'file2'}} | |
reindexed = reindex_file_list_keys(file_list) | |
self.assertEqual(reindexed, expected_reindexed) | |
def test_remove_ids_from_file_list(self): | |
file_list = { | |
'a': { | |
'music_id': 123, | |
'name': 'file1' | |
}, | |
'b': { | |
'music_id': 222, | |
'name': 'file2' | |
} | |
} | |
selected_ids = [123] | |
expected_filtered = {0: {'music_id': 222, 'name': 'file2'}} | |
filtered = remove_ids_from_file_list(file_list, selected_ids, reindex=True) | |
self.assertEqual(filtered, expected_filtered) | |
class TestGetChecksum(unittest.TestCase): | |
def test_get_checksum_z(self): | |
# Create a temporary file with some content | |
file_name = "temp_test_file.txt" | |
with open(file_name, "w") as f: | |
f.write("This is a test file") | |
# Calculate the expected checksum using an online md5 calculator or a known md5 value | |
expected_checksum = "0b26e313ed4a7ca6904b0e9369e5b957" | |
# Call the get_checksum function | |
calculated_checksum = get_checksum(file_name) | |
# Compare the expected and calculated checksums | |
self.assertEqual(expected_checksum, calculated_checksum) | |
# Clean up the temporary file | |
os.remove(file_name) | |
class TestDeduplicateSplits(unittest.TestCase): | |
def test_deduplicate_splits(self): | |
# Create sample file lists for splits A and B | |
file_list_a = { | |
'split1': { | |
'some_id': 1, | |
'file_name': 'a.jpg' | |
}, | |
'split2': { | |
'some_id': 2, | |
'file_name': 'b.jpg' | |
}, | |
'split3': { | |
'some_id': 3, | |
'file_name': 'c.jpg' | |
} | |
} | |
file_list_b = { | |
'split4': { | |
'some_id': 2, | |
'file_name': 'd.jpg' | |
}, | |
'split5': { | |
'some_id': 3, | |
'file_name': 'e.jpg' | |
}, | |
'split6': { | |
'some_id': 6, | |
'file_name': 'f.jpg' | |
} | |
} | |
# Remove duplicates between split A and split B | |
filtered_file_list_a = deduplicate_splits(file_list_a, file_list_b, reindex=False) | |
# Check that the correct IDs have been removed from split A | |
expected_file_list_a = { | |
'split1': { | |
'some_id': 1, | |
'file_name': 'a.jpg' | |
}, | |
} | |
self.assertDictEqual(filtered_file_list_a, expected_file_list_a) | |
if __name__ == '__main__': | |
unittest.main() |