sindhuhegde commited on
Commit
4b11292
1 Parent(s): b63403f

Update app

Browse files
app.py CHANGED
@@ -20,6 +20,7 @@ from scipy.io.wavfile import write
20
  import mediapipe as mp
21
  from protobuf_to_dict import protobuf_to_dict
22
  import warnings
 
23
 
24
  mp_holistic = mp.solutions.holistic
25
  warnings.filterwarnings("ignore", category=DeprecationWarning)
@@ -36,7 +37,7 @@ n_negative_samples = 100
36
  # Initialize the mediapipe holistic keypoint detection model
37
  holistic = mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)
38
 
39
-
40
  def preprocess_video(path, result_folder, apply_preprocess, padding=20):
41
 
42
  '''
@@ -743,7 +744,7 @@ def extract_audio(video, result_folder):
743
 
744
  return wav_file, "success"
745
 
746
-
747
  def get_embeddings(video_sequences, audio_sequences, model, calc_aud_emb=True):
748
 
749
  '''
@@ -878,6 +879,7 @@ def save_video(output_tracks, input_frames, wav_file, result_folder):
878
 
879
  return video_output, "success"
880
 
 
881
  def process_video_syncoffset(video_path, num_avg_frames, apply_preprocess):
882
 
883
  try:
 
20
  import mediapipe as mp
21
  from protobuf_to_dict import protobuf_to_dict
22
  import warnings
23
+ import spaces
24
 
25
  mp_holistic = mp.solutions.holistic
26
  warnings.filterwarnings("ignore", category=DeprecationWarning)
 
37
  # Initialize the mediapipe holistic keypoint detection model
38
  holistic = mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)
39
 
40
+ @spaces.GPU(duration=300)
41
  def preprocess_video(path, result_folder, apply_preprocess, padding=20):
42
 
43
  '''
 
744
 
745
  return wav_file, "success"
746
 
747
+ @spaces.GPU(duration=200)
748
  def get_embeddings(video_sequences, audio_sequences, model, calc_aud_emb=True):
749
 
750
  '''
 
879
 
880
  return video_output, "success"
881
 
882
+ @spaces.GPU(duration=200)
883
  def process_video_syncoffset(video_path, num_avg_frames, apply_preprocess):
884
 
885
  try:
preprocess/inference_preprocess.py CHANGED
@@ -18,6 +18,8 @@ from ultralytics import YOLO
18
 
19
  from decord import VideoReader
20
 
 
 
21
  parser = argparse.ArgumentParser(description="FaceTracker")
22
  parser.add_argument('--data_dir', type=str, help='directory to save intermediate temp results')
23
  parser.add_argument('--facedet_scale', type=float, default=0.25, help='Scale factor for face detection')
@@ -162,6 +164,7 @@ def crop_video(opt, track, cropfile, tight_scale=1):
162
 
163
  return {'track': track, 'proc_track': dets}
164
 
 
165
  def inference_video(opt, padding=0):
166
  videofile = os.path.join(opt.avi_dir, 'video.avi')
167
  vidObj = cv2.VideoCapture(videofile)
 
18
 
19
  from decord import VideoReader
20
 
21
+ import spaces
22
+
23
  parser = argparse.ArgumentParser(description="FaceTracker")
24
  parser.add_argument('--data_dir', type=str, help='directory to save intermediate temp results')
25
  parser.add_argument('--facedet_scale', type=float, default=0.25, help='Scale factor for face detection')
 
164
 
165
  return {'track': track, 'proc_track': dets}
166
 
167
+ @spaces.GPU(duration=200)
168
  def inference_video(opt, padding=0):
169
  videofile = os.path.join(opt.avi_dir, 'video.avi')
170
  vidObj = cv2.VideoCapture(videofile)
requirements.txt CHANGED
@@ -10,8 +10,8 @@ protobuf3-to-dict==0.1.5
10
  python_speech_features==0.6
11
  scenedetect==0.6.4
12
  scikit-learn==1.5.1
13
- torch==1.11.0
14
- torchvision==0.12.0
15
  tqdm==4.66.4
16
  ultralytics==8.2.70
17
  ultralytics-thop==2.0.0
 
10
  python_speech_features==0.6
11
  scenedetect==0.6.4
12
  scikit-learn==1.5.1
13
+ torch==2.0.0
14
+ torchvision==0.15.1
15
  tqdm==4.66.4
16
  ultralytics==8.2.70
17
  ultralytics-thop==2.0.0