Spaces:

sindhuhegde
/

gestsync

Running on Zero

App Files Files Community

sindhuhegde commited on Aug 26

Commit

6272c46

•

1 Parent(s): b015d09

Update app

Browse files

Files changed (1) hide show

app.py +2 -26

app.py CHANGED Viewed

@@ -554,7 +554,7 @@ def load_checkpoint(path, model):
 	model.load_state_dict(new_s)
 	if use_cuda:
-		model.cuda()
 	print("Loaded checkpoint from: {}".format(path))
@@ -1323,28 +1323,9 @@ def process_video_syncoffset(video_path, num_avg_frames, apply_preprocess):
 		model = load_checkpoint(CHECKPOINT_PATH, model)
 		print("Successfully loaded the model")
 		video_emb, audio_emb = get_embeddings(video_sequences, audio_sequences, model, calc_aud_emb=True)
-		# Process in batches
-		# batch_size = 12
-		# video_emb = []
-		# audio_emb = []
-		# for i in tqdm(range(0, len(video_sequences), batch_size)):
-		# 	video_inp = video_sequences[i:i+batch_size, ]
-		# 	audio_inp = audio_sequences[i:i+batch_size, ]
-		# 	vid_emb = model.forward_vid(video_inp.to(device))
-		# 	vid_emb = torch.mean(vid_emb, axis=-1).unsqueeze(-1)
-		# 	aud_emb = model.forward_aud(audio_inp.to(device))
-		# 	video_emb.append(vid_emb.detach())
-		# 	audio_emb.append(aud_emb.detach())
-		# 	torch.cuda.empty_cache()
-		# audio_emb = torch.cat(audio_emb, dim=0)
-		# video_emb = torch.cat(video_emb, dim=0)
 		# L2 normalize embeddings
 		video_emb = torch.nn.functional.normalize(video_emb, p=2, dim=1)
@@ -1429,11 +1410,6 @@ def process_video_activespeaker(video_path, global_speaker, num_avg_frames):
 			return None, status
 		# Pre-process and extract per-speaker tracks in each scene
-		print("Pre-processing the input video...")
-		# status = subprocess.call("python preprocess/inference_preprocess.py --data_dir={}/temp --sd_root={}/crops --work_root={}/metadata --data_root={}".format(result_folder_input, result_folder_input, result_folder_input, video_path), shell=True)
-		# if status != 0:
-		# 	msg = "Error in pre-processing the input video, please check the input video and try again..."
-		# 	return None, msg
 		status = preprocess_asd(video_path, result_folder_input)
 		if status != "success":
 			return None, status

 	model.load_state_dict(new_s)
 	if use_cuda:
+		model.to(device)
 	print("Loaded checkpoint from: {}".format(path))
 		model = load_checkpoint(CHECKPOINT_PATH, model)
 		print("Successfully loaded the model")
+		# Extract embeddings
 		video_emb, audio_emb = get_embeddings(video_sequences, audio_sequences, model, calc_aud_emb=True)
 		# L2 normalize embeddings
 		video_emb = torch.nn.functional.normalize(video_emb, p=2, dim=1)
 			return None, status
 		# Pre-process and extract per-speaker tracks in each scene
 		status = preprocess_asd(video_path, result_folder_input)
 		if status != "success":
 			return None, status