File size: 3,060 Bytes

6d504a5

import csv
import os
import glob
import shutil
import re


# rename files from original filename (hexadecimal salad) to Session_ID (human readable) and back
global DEFAULT_MAP_PATH
DEFAULT_MAP_PATH = '../../SessionIDs_from_catalog.csv'

def make_SessionID_map(path=DEFAULT_MAP_PATH):
    """generate dictionary from csv file with columns for File_Name and Session_ID - 
    copied from columsn 1 & 2 of the Catalog on OneDrive
    """
    SID_to_FN={}
    FN_to_SID={}
    with open(path,encoding='utf-8-sig') as f:
        reader = csv.reader(f)
        headers = next(reader)
        assert (headers[0]=='File_Name' or headers[0]=='Conference_ID') & (headers[1]=='Session_ID'), "Headers are wrong, expected ('File_Name' or 'Conference_ID') and 'Session_ID'"

        for line in reader:
            filename,sessionID=line
            filename=filename.split('.')[0] # remove extensions
            if (len(filename.strip())>0 and len(sessionID.strip())>0): 
                SID_to_FN[sessionID]=filename
                FN_to_SID[filename]=sessionID
    return(SID_to_FN, FN_to_SID)


def rename_files_SID_to_FN(path, recursive=True, overwrite=False):
    SID_to_FN, _=make_SessionID_map()
    #TODO: deal with matching nested sIDs, see commented code below
    newpaths=[]
    for sID in SID_to_FN.keys():
        srclist = glob.glob(os.path.join(path,'**', f'*{sID}.*'), recursive=recursive)
        # print(f'siD: {sID}')
        # print(srclist)
        for srcpath in srclist:
            newpath = srcpath.replace(sID, SID_to_FN[sID])
            print(newpath)    
            if overwrite==True:
                shutil.move(srcpath, newpath)
            else:
                shutil.copy(srcpath, newpath)
            newpaths.append(newpath)
    return newpaths


    # # get sessnames
    # sesslist = [s for s in os.listdir(path) ]
    # srclist = [os.path.join(src_dir, filename) for filename in os.listdir(src_dir) if os.path.isfile(os.path.join(src_dir, filename))]
    # for src in srclist:
    #     sessname_matches = [sessname in src for sessname in sesslist]
    #     if sum(sessname_matches)>1:
    #         print('!!!!    multiple matches, will take longest match. TODO: implement this you dope')
    #     elif not any(sessname_matches):
    #         print(f'!!!!    no sessname matches for file {src}')
    #     else: 
    #         sessname = sesslist[sessname_matches.index(True)]
    #         print(f'...copying to {sessname}')
    #         shutil.copy(src, os.path.join(dest_dir,sessname))

def rename_files_FN_to_SID(path, recursive=True):
    _, FN_to_SID=make_SessionID_map()

def extract_conferenceID_from_filename(filename):
    """extract conferenceID from filename
    """
    conferenceID=filename.split(' ')[0]
    conferenceID = re.sub('_?[a-zA-Z]*(\.*[a-zA-Z]*).xlsx','', conferenceID)
    conferenceID=re.sub('TMcoded|Transcript','', conferenceID)
    conferenceID=re.sub('_start\d+_end\d+_?','', conferenceID)
    conferenceID=re.sub(
        '\d{5}_\d{4}-\d{2}-\d{2}_','', conferenceID)
    return conferenceID