sindhuhegde commited on
Commit
afa2bc0
1 Parent(s): 342ecda

Update app

Browse files
Files changed (1) hide show
  1. app.py +2 -52
app.py CHANGED
@@ -462,57 +462,7 @@ def preprocess_video(path, result_folder, apply_preprocess, padding=20):
462
 
463
  person_videos, person_tracks, msg = get_person_detection(all_frames, frame_count, padding)
464
  if msg != "success":
465
- return None, None, None, msg
466
-
467
- # # Load YOLOv9 model (pre-trained on COCO dataset)
468
- # yolo_model = YOLO("yolov9s.pt")
469
- # print("Loaded the YOLO model")
470
-
471
-
472
-
473
- # person_videos = {}
474
- # person_tracks = {}
475
-
476
- # print("Processing the frames...")
477
- # for frame_idx in tqdm(range(frame_count)):
478
-
479
- # frame = all_frames[frame_idx]
480
-
481
- # # Perform person detection
482
- # results = yolo_model(frame, verbose=False)
483
- # detections = results[0].boxes
484
-
485
- # for i, det in enumerate(detections):
486
- # x1, y1, x2, y2 = det.xyxy[0]
487
- # cls = det.cls[0]
488
- # if int(cls) == 0: # Class 0 is 'person' in COCO dataset
489
-
490
- # x1 = max(0, int(x1) - padding)
491
- # y1 = max(0, int(y1) - padding)
492
- # x2 = min(frame.shape[1], int(x2) + padding)
493
- # y2 = min(frame.shape[0], int(y2) + padding)
494
-
495
- # if i not in person_videos:
496
- # person_videos[i] = []
497
- # person_tracks[i] = []
498
-
499
- # person_videos[i].append(frame)
500
- # person_tracks[i].append([x1,y1,x2,y2])
501
-
502
-
503
- # num_persons = 0
504
- # for i in person_videos.keys():
505
- # if len(person_videos[i]) >= frame_count//2:
506
- # num_persons+=1
507
-
508
- # if num_persons==0:
509
- # msg = "No person detected in the video! Please give a video with one person as input"
510
- # return None, None, None, msg
511
- # if num_persons>1:
512
- # msg = "More than one person detected in the video! Please give a video with only one person as input"
513
- # return None, None, None, msg
514
-
515
-
516
 
517
  # For the person detected, crop the frame based on the bounding box
518
  if len(person_videos[0]) > frame_count-10:
@@ -1144,7 +1094,7 @@ def get_embeddings(video_sequences, audio_sequences, model, calc_aud_emb=True):
1144
  video_emb = []
1145
  audio_emb = []
1146
 
1147
- model = model.cuda()
1148
 
1149
  for i in tqdm(range(0, len(video_sequences), batch_size)):
1150
  video_inp = video_sequences[i:i+batch_size, ]
 
462
 
463
  person_videos, person_tracks, msg = get_person_detection(all_frames, frame_count, padding)
464
  if msg != "success":
465
+ return None, None, None, msg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466
 
467
  # For the person detected, crop the frame based on the bounding box
468
  if len(person_videos[0]) > frame_count-10:
 
1094
  video_emb = []
1095
  audio_emb = []
1096
 
1097
+ model = model.to(device)
1098
 
1099
  for i in tqdm(range(0, len(video_sequences), batch_size)):
1100
  video_inp = video_sequences[i:i+batch_size, ]