Spaces:
Running
on
Zero
Running
on
Zero
sindhuhegde
commited on
Commit
•
6272c46
1
Parent(s):
b015d09
Update app
Browse files
app.py
CHANGED
@@ -554,7 +554,7 @@ def load_checkpoint(path, model):
|
|
554 |
model.load_state_dict(new_s)
|
555 |
|
556 |
if use_cuda:
|
557 |
-
model.
|
558 |
|
559 |
print("Loaded checkpoint from: {}".format(path))
|
560 |
|
@@ -1323,28 +1323,9 @@ def process_video_syncoffset(video_path, num_avg_frames, apply_preprocess):
|
|
1323 |
model = load_checkpoint(CHECKPOINT_PATH, model)
|
1324 |
print("Successfully loaded the model")
|
1325 |
|
|
|
1326 |
video_emb, audio_emb = get_embeddings(video_sequences, audio_sequences, model, calc_aud_emb=True)
|
1327 |
|
1328 |
-
# Process in batches
|
1329 |
-
# batch_size = 12
|
1330 |
-
# video_emb = []
|
1331 |
-
# audio_emb = []
|
1332 |
-
|
1333 |
-
# for i in tqdm(range(0, len(video_sequences), batch_size)):
|
1334 |
-
# video_inp = video_sequences[i:i+batch_size, ]
|
1335 |
-
# audio_inp = audio_sequences[i:i+batch_size, ]
|
1336 |
-
|
1337 |
-
# vid_emb = model.forward_vid(video_inp.to(device))
|
1338 |
-
# vid_emb = torch.mean(vid_emb, axis=-1).unsqueeze(-1)
|
1339 |
-
# aud_emb = model.forward_aud(audio_inp.to(device))
|
1340 |
-
|
1341 |
-
# video_emb.append(vid_emb.detach())
|
1342 |
-
# audio_emb.append(aud_emb.detach())
|
1343 |
-
|
1344 |
-
# torch.cuda.empty_cache()
|
1345 |
-
|
1346 |
-
# audio_emb = torch.cat(audio_emb, dim=0)
|
1347 |
-
# video_emb = torch.cat(video_emb, dim=0)
|
1348 |
|
1349 |
# L2 normalize embeddings
|
1350 |
video_emb = torch.nn.functional.normalize(video_emb, p=2, dim=1)
|
@@ -1429,11 +1410,6 @@ def process_video_activespeaker(video_path, global_speaker, num_avg_frames):
|
|
1429 |
return None, status
|
1430 |
|
1431 |
# Pre-process and extract per-speaker tracks in each scene
|
1432 |
-
print("Pre-processing the input video...")
|
1433 |
-
# status = subprocess.call("python preprocess/inference_preprocess.py --data_dir={}/temp --sd_root={}/crops --work_root={}/metadata --data_root={}".format(result_folder_input, result_folder_input, result_folder_input, video_path), shell=True)
|
1434 |
-
# if status != 0:
|
1435 |
-
# msg = "Error in pre-processing the input video, please check the input video and try again..."
|
1436 |
-
# return None, msg
|
1437 |
status = preprocess_asd(video_path, result_folder_input)
|
1438 |
if status != "success":
|
1439 |
return None, status
|
|
|
554 |
model.load_state_dict(new_s)
|
555 |
|
556 |
if use_cuda:
|
557 |
+
model.to(device)
|
558 |
|
559 |
print("Loaded checkpoint from: {}".format(path))
|
560 |
|
|
|
1323 |
model = load_checkpoint(CHECKPOINT_PATH, model)
|
1324 |
print("Successfully loaded the model")
|
1325 |
|
1326 |
+
# Extract embeddings
|
1327 |
video_emb, audio_emb = get_embeddings(video_sequences, audio_sequences, model, calc_aud_emb=True)
|
1328 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1329 |
|
1330 |
# L2 normalize embeddings
|
1331 |
video_emb = torch.nn.functional.normalize(video_emb, p=2, dim=1)
|
|
|
1410 |
return None, status
|
1411 |
|
1412 |
# Pre-process and extract per-speaker tracks in each scene
|
|
|
|
|
|
|
|
|
|
|
1413 |
status = preprocess_asd(video_path, result_folder_input)
|
1414 |
if status != "success":
|
1415 |
return None, status
|