File size: 3,060 Bytes
6d504a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import csv
import os
import glob
import shutil
import re
# rename files from original filename (hexadecimal salad) to Session_ID (human readable) and back
global DEFAULT_MAP_PATH
DEFAULT_MAP_PATH = '../../SessionIDs_from_catalog.csv'
def make_SessionID_map(path=DEFAULT_MAP_PATH):
"""generate dictionary from csv file with columns for File_Name and Session_ID -
copied from columsn 1 & 2 of the Catalog on OneDrive
"""
SID_to_FN={}
FN_to_SID={}
with open(path,encoding='utf-8-sig') as f:
reader = csv.reader(f)
headers = next(reader)
assert (headers[0]=='File_Name' or headers[0]=='Conference_ID') & (headers[1]=='Session_ID'), "Headers are wrong, expected ('File_Name' or 'Conference_ID') and 'Session_ID'"
for line in reader:
filename,sessionID=line
filename=filename.split('.')[0] # remove extensions
if (len(filename.strip())>0 and len(sessionID.strip())>0):
SID_to_FN[sessionID]=filename
FN_to_SID[filename]=sessionID
return(SID_to_FN, FN_to_SID)
def rename_files_SID_to_FN(path, recursive=True, overwrite=False):
SID_to_FN, _=make_SessionID_map()
#TODO: deal with matching nested sIDs, see commented code below
newpaths=[]
for sID in SID_to_FN.keys():
srclist = glob.glob(os.path.join(path,'**', f'*{sID}.*'), recursive=recursive)
# print(f'siD: {sID}')
# print(srclist)
for srcpath in srclist:
newpath = srcpath.replace(sID, SID_to_FN[sID])
print(newpath)
if overwrite==True:
shutil.move(srcpath, newpath)
else:
shutil.copy(srcpath, newpath)
newpaths.append(newpath)
return newpaths
# # get sessnames
# sesslist = [s for s in os.listdir(path) ]
# srclist = [os.path.join(src_dir, filename) for filename in os.listdir(src_dir) if os.path.isfile(os.path.join(src_dir, filename))]
# for src in srclist:
# sessname_matches = [sessname in src for sessname in sesslist]
# if sum(sessname_matches)>1:
# print('!!!! multiple matches, will take longest match. TODO: implement this you dope')
# elif not any(sessname_matches):
# print(f'!!!! no sessname matches for file {src}')
# else:
# sessname = sesslist[sessname_matches.index(True)]
# print(f'...copying to {sessname}')
# shutil.copy(src, os.path.join(dest_dir,sessname))
def rename_files_FN_to_SID(path, recursive=True):
_, FN_to_SID=make_SessionID_map()
def extract_conferenceID_from_filename(filename):
"""extract conferenceID from filename
"""
conferenceID=filename.split(' ')[0]
conferenceID = re.sub('_?[a-zA-Z]*(\.*[a-zA-Z]*).xlsx','', conferenceID)
conferenceID=re.sub('TMcoded|Transcript','', conferenceID)
conferenceID=re.sub('_start\d+_end\d+_?','', conferenceID)
conferenceID=re.sub(
'\d{5}_\d{4}-\d{2}-\d{2}_','', conferenceID)
return conferenceID |