Spaces:
Running
Running
class Subtitle: | |
def __init__(self, ext="srt"): | |
sub_dict = { | |
"srt": { | |
"coma": ",", | |
"header": "", | |
"format": self._srt_format, | |
}, | |
"vtt": { | |
"coma": ".", | |
"header": "WebVTT\n\n", | |
"format": self._vtt_format, | |
}, | |
"txt": { | |
"coma": "", | |
"header": "", | |
"format": self._txt_format, | |
}, | |
"lrc": { | |
"coma": "", | |
"header": "", | |
"format": self._lrc_format, | |
}, | |
} | |
self.ext = ext | |
self.coma = sub_dict[ext]["coma"] | |
self.header = sub_dict[ext]["header"] | |
self.format_fn = sub_dict[ext]["format"] | |
def timeformat(self, time): | |
hours, remainder = divmod(time, 3600) | |
minutes, seconds = divmod(remainder, 60) | |
milliseconds = (time - int(time)) * 1000 | |
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}{self.coma}{int(milliseconds):03d}" | |
def seconds_to_lrc_timestamp(self, time): | |
minutes = int(time // 60) | |
secs = time % 60 | |
return f"[{minutes:02}:{secs:06.3f}]" | |
def _srt_format(self, i, segment): | |
start_time = self.timeformat(segment['timestamp'][0]) | |
end_time = self.timeformat(segment['timestamp'][1] if segment['timestamp'][1] else segment['timestamp'][0]) | |
return f"{i + 1}\n{start_time} --> {end_time}\n{segment['text']}\n\n" | |
def _vtt_format(self, i, segment): | |
start_time = self.timeformat(segment['timestamp'][0]) | |
end_time = self.timeformat(segment['timestamp'][1] if segment['timestamp'][1] else segment['timestamp'][0]) | |
return f"{start_time} --> {end_time}\n{segment['text']}\n\n" | |
def _txt_format(self, i, segment): | |
return f"{segment['text']}\n" | |
def _lrc_format(self, i, segment): | |
start_time = self.seconds_to_lrc_timestamp(segment['timestamp'][0]) | |
return f"{start_time}{segment['text']}\n" | |
def get_subtitle(self, segments): | |
output = self.header | |
for i, segment in enumerate(segments): | |
segment['text'] = segment['text'].lstrip() | |
try: | |
output += self.format_fn(i, segment) | |
except Exception as e: | |
print(e, segment) | |
return output | |
def write_subtitle(self, segments, output_file): | |
output_file_with_ext = f"{output_file}.{self.ext}" | |
subtitle = self.get_subtitle(segments) | |
with open(output_file_with_ext, 'w', encoding='utf-8') as f: | |
f.write(subtitle) | |
def write_file(output_file,subtitle): | |
with open(output_file, 'w', encoding='utf-8') as f: | |
f.write(subtitle) | |
def subtitle_output(inputs, chunks): | |
file_name = inputs.split('/')[-1].split('.')[0] | |
lrc_sub = Subtitle("lrc") | |
srt_sub = Subtitle("srt") | |
vtt_sub = Subtitle("vtt") | |
txt_sub = Subtitle("txt") | |
lrc = lrc_sub.get_subtitle(chunks) | |
srt = srt_sub.get_subtitle(chunks) | |
vtt = vtt_sub.get_subtitle(chunks) | |
txt = txt_sub.get_subtitle(chunks) | |
write_file(file_name+".lrc",lrc) | |
write_file(file_name+".srt",srt) | |
write_file(file_name+".vtt",vtt) | |
write_file(file_name+".txt",txt) | |
files_out = [file_name+".lrc", file_name+".srt", file_name+".vtt", file_name+".txt"] | |
return lrc, files_out | |
def text_output(inputs, text): | |
file_name = inputs.split('/')[-1].split('.')[0] | |
write_file(file_name+".txt",text) | |
files_out = [file_name+".txt"] | |
return text, files_out |