# python gradio_demo/barc_demo_v3.py import os os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"]="0" try: # os.system("pip install --upgrade torch==1.11.0+cu113 torchvision==0.12.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html") os.system("pip install --upgrade torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/cu101/torch_stable.html") except Exception as e: print(e) import numpy as np import os import glob import torch from torch.utils.data import DataLoader import torchvision from torchvision.models.detection.faster_rcnn import FastRCNNPredictor import torchvision.transforms as T import cv2 from matplotlib import pyplot as plt from PIL import Image import gradio as gr import sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../', 'src')) from stacked_hourglass.datasets.imgcropslist import ImgCrops from combined_model.train_main_image_to_3d_withbreedrel import do_visual_epoch from combined_model.model_shape_v7 import ModelImageTo3d_withshape_withproj from configs.barc_cfg_defaults import get_cfg_global_updated print( "torch: ", torch.__version__, "\ntorchvision: ", torchvision.__version__, ) print("EnV", os.environ) def get_prediction(model, img_path_or_img, confidence=0.5): """ see https://haochen23.github.io/2020/04/object-detection-faster-rcnn.html#.YsMCm4TP3-g get_prediction parameters: - img_path - path of the input image - confidence - threshold value for prediction score method: - Image is obtained from the image path - the image is converted to image tensor using PyTorch's Transforms - image is passed through the model to get the predictions - class, box coordinates are obtained, but only prediction score > threshold are chosen. """ if isinstance(img_path_or_img, str): img = Image.open(img_path_or_img).convert('RGB') else: img = img_path_or_img transform = T.Compose([T.ToTensor()]) img = transform(img) pred = model([img]) # pred_class = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].numpy())] pred_class = list(pred[0]['labels'].numpy()) pred_boxes = [[(int(i[0]), int(i[1])), (int(i[2]), int(i[3]))] for i in list(pred[0]['boxes'].detach().numpy())] pred_score = list(pred[0]['scores'].detach().numpy()) try: pred_t = [pred_score.index(x) for x in pred_score if x>confidence][-1] pred_boxes = pred_boxes[:pred_t+1] pred_class = pred_class[:pred_t+1] return pred_boxes, pred_class, pred_score except: print('no bounding box with a score that is high enough found! -> work on full image') return None, None, None def detect_object(model, img_path_or_img, confidence=0.5, rect_th=2, text_size=0.5, text_th=1): """ see https://haochen23.github.io/2020/04/object-detection-faster-rcnn.html#.YsMCm4TP3-g object_detection_api parameters: - img_path_or_img - path of the input image - confidence - threshold value for prediction score - rect_th - thickness of bounding box - text_size - size of the class label text - text_th - thichness of the text method: - prediction is obtained from get_prediction method - for each prediction, bounding box is drawn and text is written with opencv - the final image is displayed """ boxes, pred_cls, pred_scores = get_prediction(model, img_path_or_img, confidence) if isinstance(img_path_or_img, str): img = cv2.imread(img_path_or_img) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) else: img = img_path_or_img is_first = True bbox = None if boxes is not None: for i in range(len(boxes)): cls = pred_cls[i] if cls == 18 and bbox is None: cv2.rectangle(img, boxes[i][0], boxes[i][1],color=(0, 255, 0), thickness=rect_th) # cv2.putText(img, pred_cls[i], boxes[i][0], cv2.FONT_HERSHEY_SIMPLEX, text_size, (0,255,0),thickness=text_th) cv2.putText(img, str(pred_scores[i]), boxes[i][0], cv2.FONT_HERSHEY_SIMPLEX, text_size, (0,255,0),thickness=text_th) bbox = boxes[i] return img, bbox def run_bbox_inference(input_image): # load configs cfg = get_cfg_global_updated() model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) model.eval() out_path = os.path.join(cfg.paths.ROOT_OUT_PATH, 'gradio_examples', 'test2.png') img, bbox = detect_object(model=model, img_path_or_img=input_image, confidence=0.5) fig = plt.figure() # plt.figure(figsize=(20,30)) plt.imsave(out_path, img) return img, bbox def run_barc_inference(input_image, bbox=None): # load configs cfg = get_cfg_global_updated() model_file_complete = os.path.join(cfg.paths.ROOT_CHECKPOINT_PATH, 'barc_complete', 'model_best.pth.tar') # Select the hardware device to use for inference. '''if torch.cuda.is_available() and cfg.device=='cuda': device = torch.device('cuda', torch.cuda.current_device()) # torch.backends.cudnn.benchmark = True else: device = torch.device('cpu')''' device = 'cuda' if torch.cuda.is_available() else 'cpu' print('----------------------> device: ') print(device) path_model_file_complete = os.path.join(cfg.paths.ROOT_CHECKPOINT_PATH, model_file_complete) # Disable gradient calculations. torch.set_grad_enabled(False) # prepare complete model complete_model = ModelImageTo3d_withshape_withproj( num_stage_comb=cfg.params.NUM_STAGE_COMB, num_stage_heads=cfg.params.NUM_STAGE_HEADS, \ num_stage_heads_pose=cfg.params.NUM_STAGE_HEADS_POSE, trans_sep=cfg.params.TRANS_SEP, \ arch=cfg.params.ARCH, n_joints=cfg.params.N_JOINTS, n_classes=cfg.params.N_CLASSES, \ n_keyp=cfg.params.N_KEYP, n_bones=cfg.params.N_BONES, n_betas=cfg.params.N_BETAS, n_betas_limbs=cfg.params.N_BETAS_LIMBS, \ n_breeds=cfg.params.N_BREEDS, n_z=cfg.params.N_Z, image_size=cfg.params.IMG_SIZE, \ silh_no_tail=cfg.params.SILH_NO_TAIL, thr_keyp_sc=cfg.params.KP_THRESHOLD, add_z_to_3d_input=cfg.params.ADD_Z_TO_3D_INPUT, n_segbps=cfg.params.N_SEGBPS, add_segbps_to_3d_input=cfg.params.ADD_SEGBPS_TO_3D_INPUT, add_partseg=cfg.params.ADD_PARTSEG, n_partseg=cfg.params.N_PARTSEG, \ fix_flength=cfg.params.FIX_FLENGTH, structure_z_to_betas=cfg.params.STRUCTURE_Z_TO_B, structure_pose_net=cfg.params.STRUCTURE_POSE_NET, nf_version=cfg.params.NF_VERSION) # load trained model print(path_model_file_complete) assert os.path.isfile(path_model_file_complete) print('Loading model weights from file: {}'.format(path_model_file_complete)) checkpoint_complete = torch.load(path_model_file_complete, map_location=device) state_dict_complete = checkpoint_complete['state_dict'] complete_model.load_state_dict(state_dict_complete, strict=False) complete_model = complete_model.to(device) save_imgs_path = os.path.join(cfg.paths.ROOT_OUT_PATH, 'gradio_examples') if not os.path.exists(save_imgs_path): os.makedirs(save_imgs_path) input_image_list = [input_image] if bbox is not None: input_bbox_list = [bbox] else: input_bbox_list = None val_dataset = ImgCrops(image_list=input_image_list, bbox_list=input_bbox_list, dataset_mode='complete') test_name_list = val_dataset.test_name_list val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True, drop_last=False) # run visual evaluation # remark: take ACC_Joints and DATA_INFO from StanExt as this is the training dataset all_results = do_visual_epoch(val_loader, complete_model, device, ImgCrops.DATA_INFO, weight_dict=None, acc_joints=ImgCrops.ACC_JOINTS, save_imgs_path=None, # save_imgs_path, metrics='all', test_name_list=test_name_list, render_all=cfg.params.RENDER_ALL, pck_thresh=cfg.params.PCK_THRESH, return_results=True) mesh = all_results[0]['mesh_posed'] result_path = os.path.join(save_imgs_path, test_name_list[0] + '_z') mesh.apply_transform([[-1, 0, 0, 0], [0, -1, 0, 0], [0, 0, 1, 1], [0, 0, 0, 1]]) mesh.export(file_obj=result_path + '.glb') result_gltf = result_path + '.glb' return [result_gltf, result_gltf] def run_complete_inference(input_image): output_interm_image, output_interm_bbox = run_bbox_inference(input_image.copy()) print(output_interm_bbox) # output_image = run_barc_inference(input_image) output_image = run_barc_inference(input_image, output_interm_bbox) return output_image # demo = gr.Interface(run_barc_inference, gr.Image(), "image") # demo = gr.Interface(run_complete_inference, gr.Image(), "image") # see: https://huggingface.co/spaces/radames/PIFu-Clothed-Human-Digitization/blob/main/PIFu/spaces.py description = ''' # BARC #### Project Page * https://barc.is.tue.mpg.de/ #### Description This is a demo for BARC. While BARC is trained on image crops, this demo uses a pretrained Faster-RCNN in order to get bounding boxes for the dogs. To see your result you may have to wait a minute or two, please be paitient.
More #### Citation ``` @inproceedings{BARC:2022, title = {BARC}: Learning to Regress {3D} Dog Shape from Images by Exploiting Breed Information, author = {Rueegg, Nadine and Zuffi, Silvia and Schindler, Konrad and Black, Michael J.}, booktitle = {Proceedings IEEE Conf. on Computer Vision and Pattern Recognition (CVPR)}, year = {2022} } ```
''' examples = sorted(glob.glob(os.path.join(os.path.dirname(__file__), '../', 'datasets', 'test_image_crops', '*.jpg')) + glob.glob(os.path.join(os.path.dirname(__file__), '../', 'datasets', 'test_image_crops', '*.png'))) demo = gr.Interface( fn=run_complete_inference, description=description, # inputs=gr.Image(type="filepath", label="Input Image"), inputs=gr.Image(label="Input Image"), outputs=[ gr.Model3D( clear_color=[0.0, 0.0, 0.0, 0.0], label="3D Model"), gr.File(label="Download 3D Model") ], examples=examples, thumbnail="barc_thumbnail.png", allow_flagging="never", cache_examples=True ) demo.launch(share=True)