Spaces:
Sleeping
Sleeping
init
Browse files- .gitattributes +1 -0
- app.py +48 -0
- det_dog-cycle-car.png +0 -0
- dog-cycle-car.png +0 -0
- requirements.txt +8 -0
- yolo/Poster3.jpg +0 -0
- yolo/README.md +12 -0
- yolo/__pycache__/darknet.cpython-37.pyc +0 -0
- yolo/__pycache__/model.cpython-37.pyc +0 -0
- yolo/__pycache__/utils.cpython-37.pyc +0 -0
- yolo/cfg/yolov3.cfg +788 -0
- yolo/darknet.py +586 -0
- yolo/data/coco.names +80 -0
- yolo/det/det_Poster3.jpg +0 -0
- yolo/det/det_dog-cycle-car.png +0 -0
- yolo/det/det_sample.jpeg +0 -0
- yolo/det/det_victoria.jpg +0 -0
- yolo/detector.py +321 -0
- yolo/dog-cycle-car.png +0 -0
- yolo/model.py +189 -0
- yolo/pallete +0 -0
- yolo/sample.jpeg +0 -0
- yolo/sample.py +77 -0
- yolo/test.py +166 -0
- yolo/utils.py +324 -0
- yolo/victoria.jpg +0 -0
- yolo/yolov3-tiny.cfg +182 -0
- yolo/yolov3-tiny.weights +3 -0
- yolo/yolov3.cfg +788 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.weights filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import gradio as gr
|
3 |
+
from model import api
|
4 |
+
from PIL import Image
|
5 |
+
from yolo.model import *
|
6 |
+
|
7 |
+
model = yolo_model()
|
8 |
+
|
9 |
+
def predict(input_img):
|
10 |
+
input_img = Image.fromarray(input_img)
|
11 |
+
_, payload = model.predict(image)
|
12 |
+
# print('prediction',prediction)
|
13 |
+
return payload
|
14 |
+
|
15 |
+
css = ''
|
16 |
+
|
17 |
+
# with gr.Blocks(css=css) as demo:
|
18 |
+
# gr.HTML("<h1><center>Signsapp: Classify the signs based on the hands sign images<center><h1>")
|
19 |
+
# gr.Interface(sign,inputs=gr.Image(shape=(200, 200)), outputs=gr.Label())
|
20 |
+
|
21 |
+
title = r"yolov3"
|
22 |
+
|
23 |
+
description = r"""
|
24 |
+
<center>
|
25 |
+
Recognize common objects using the model
|
26 |
+
<img src="file/det_dog-cycle-car.png" width=350px>
|
27 |
+
</center>
|
28 |
+
"""
|
29 |
+
article = r"""
|
30 |
+
### Credits
|
31 |
+
- [Coursera](https://www.coursera.org/learn/convolutional-neural-networks/)
|
32 |
+
"""
|
33 |
+
|
34 |
+
demo = gr.Interface(
|
35 |
+
title = title,
|
36 |
+
description = description,
|
37 |
+
article = article,
|
38 |
+
fn=predict,
|
39 |
+
inputs = gr.Image(shape=(200, 200)),
|
40 |
+
outputs = gr.Image(shape=(200, 200)),
|
41 |
+
examples=["dog-cycle-car.png"]
|
42 |
+
# allow_flagging = "manual",
|
43 |
+
# flagging_options = ['recule', 'tournedroite', 'arretetoi', 'tournegauche', 'gauche', 'avance', 'droite'],
|
44 |
+
# flagging_dir = "./flag/men"
|
45 |
+
)
|
46 |
+
|
47 |
+
# demo.queue()
|
48 |
+
demo.launch(debug=True)
|
det_dog-cycle-car.png
ADDED
dog-cycle-car.png
ADDED
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Flask-Cors
|
2 |
+
Flask
|
3 |
+
Werkzeug
|
4 |
+
pillow
|
5 |
+
numpy
|
6 |
+
boto3
|
7 |
+
pytorch==1.7.1
|
8 |
+
opencv==3.4.2
|
yolo/Poster3.jpg
ADDED
yolo/README.md
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Yolo
|
2 |
+
|
3 |
+
[Part 1 : Understanding How YOLO works](https://blog.paperspace.com/how-to-implement-a-yolo-object-detector-in-pytorch/)
|
4 |
+
|
5 |
+
[Part 2 : Creating the layers of the network architecture](https://blog.paperspace.com/how-to-implement-a-yolo-v3-object-detector-from-scratch-in-pytorch-part-2/)
|
6 |
+
|
7 |
+
[Part 3 : How to implement a YOLO (v3) object detector from scratch in PyTorch](https://blog.paperspace.com/how-to-implement-a-yolo-v3-object-detector-from-scratch-in-pytorch-part-3/)
|
8 |
+
|
9 |
+
[Part 4 : Objectness Confidence Thresholding and Non-maximum Suppression](https://blog.paperspace.com/how-to-implement-a-yolo-v3-object-detector-from-scratch-in-pytorch-part-4/)
|
10 |
+
|
11 |
+
[Part 5 : Designing the input and the output pipelines](https://blog.paperspace.com/how-to-implement-a-yolo-v3-object-detector-from-scratch-in-pytorch-part-5/)
|
12 |
+
|
yolo/__pycache__/darknet.cpython-37.pyc
ADDED
Binary file (11.9 kB). View file
|
|
yolo/__pycache__/model.cpython-37.pyc
ADDED
Binary file (4.87 kB). View file
|
|
yolo/__pycache__/utils.cpython-37.pyc
ADDED
Binary file (7.7 kB). View file
|
|
yolo/cfg/yolov3.cfg
ADDED
@@ -0,0 +1,788 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[net]
|
2 |
+
# Testing
|
3 |
+
# batch=1
|
4 |
+
# subdivisions=1
|
5 |
+
# Training
|
6 |
+
batch=64
|
7 |
+
subdivisions=16
|
8 |
+
width=624
|
9 |
+
height=624
|
10 |
+
channels=3
|
11 |
+
momentum=0.9
|
12 |
+
decay=0.0005
|
13 |
+
angle=0
|
14 |
+
saturation = 1.5
|
15 |
+
exposure = 1.5
|
16 |
+
hue=.1
|
17 |
+
|
18 |
+
learning_rate=0.001
|
19 |
+
burn_in=1000
|
20 |
+
max_batches = 500200
|
21 |
+
policy=steps
|
22 |
+
steps=400000,450000
|
23 |
+
scales=.1,.1
|
24 |
+
|
25 |
+
[convolutional]
|
26 |
+
batch_normalize=1
|
27 |
+
filters=32
|
28 |
+
size=3
|
29 |
+
stride=1
|
30 |
+
pad=1
|
31 |
+
activation=leaky
|
32 |
+
|
33 |
+
# Downsample
|
34 |
+
|
35 |
+
[convolutional]
|
36 |
+
batch_normalize=1
|
37 |
+
filters=64
|
38 |
+
size=3
|
39 |
+
stride=2
|
40 |
+
pad=1
|
41 |
+
activation=leaky
|
42 |
+
|
43 |
+
[convolutional]
|
44 |
+
batch_normalize=1
|
45 |
+
filters=32
|
46 |
+
size=1
|
47 |
+
stride=1
|
48 |
+
pad=1
|
49 |
+
activation=leaky
|
50 |
+
|
51 |
+
[convolutional]
|
52 |
+
batch_normalize=1
|
53 |
+
filters=64
|
54 |
+
size=3
|
55 |
+
stride=1
|
56 |
+
pad=1
|
57 |
+
activation=leaky
|
58 |
+
|
59 |
+
[shortcut]
|
60 |
+
from=-3
|
61 |
+
activation=linear
|
62 |
+
|
63 |
+
# Downsample
|
64 |
+
|
65 |
+
[convolutional]
|
66 |
+
batch_normalize=1
|
67 |
+
filters=128
|
68 |
+
size=3
|
69 |
+
stride=2
|
70 |
+
pad=1
|
71 |
+
activation=leaky
|
72 |
+
|
73 |
+
[convolutional]
|
74 |
+
batch_normalize=1
|
75 |
+
filters=64
|
76 |
+
size=1
|
77 |
+
stride=1
|
78 |
+
pad=1
|
79 |
+
activation=leaky
|
80 |
+
|
81 |
+
[convolutional]
|
82 |
+
batch_normalize=1
|
83 |
+
filters=128
|
84 |
+
size=3
|
85 |
+
stride=1
|
86 |
+
pad=1
|
87 |
+
activation=leaky
|
88 |
+
|
89 |
+
[shortcut]
|
90 |
+
from=-3
|
91 |
+
activation=linear
|
92 |
+
|
93 |
+
[convolutional]
|
94 |
+
batch_normalize=1
|
95 |
+
filters=64
|
96 |
+
size=1
|
97 |
+
stride=1
|
98 |
+
pad=1
|
99 |
+
activation=leaky
|
100 |
+
|
101 |
+
[convolutional]
|
102 |
+
batch_normalize=1
|
103 |
+
filters=128
|
104 |
+
size=3
|
105 |
+
stride=1
|
106 |
+
pad=1
|
107 |
+
activation=leaky
|
108 |
+
|
109 |
+
[shortcut]
|
110 |
+
from=-3
|
111 |
+
activation=linear
|
112 |
+
|
113 |
+
# Downsample
|
114 |
+
|
115 |
+
[convolutional]
|
116 |
+
batch_normalize=1
|
117 |
+
filters=256
|
118 |
+
size=3
|
119 |
+
stride=2
|
120 |
+
pad=1
|
121 |
+
activation=leaky
|
122 |
+
|
123 |
+
[convolutional]
|
124 |
+
batch_normalize=1
|
125 |
+
filters=128
|
126 |
+
size=1
|
127 |
+
stride=1
|
128 |
+
pad=1
|
129 |
+
activation=leaky
|
130 |
+
|
131 |
+
[convolutional]
|
132 |
+
batch_normalize=1
|
133 |
+
filters=256
|
134 |
+
size=3
|
135 |
+
stride=1
|
136 |
+
pad=1
|
137 |
+
activation=leaky
|
138 |
+
|
139 |
+
[shortcut]
|
140 |
+
from=-3
|
141 |
+
activation=linear
|
142 |
+
|
143 |
+
[convolutional]
|
144 |
+
batch_normalize=1
|
145 |
+
filters=128
|
146 |
+
size=1
|
147 |
+
stride=1
|
148 |
+
pad=1
|
149 |
+
activation=leaky
|
150 |
+
|
151 |
+
[convolutional]
|
152 |
+
batch_normalize=1
|
153 |
+
filters=256
|
154 |
+
size=3
|
155 |
+
stride=1
|
156 |
+
pad=1
|
157 |
+
activation=leaky
|
158 |
+
|
159 |
+
[shortcut]
|
160 |
+
from=-3
|
161 |
+
activation=linear
|
162 |
+
|
163 |
+
[convolutional]
|
164 |
+
batch_normalize=1
|
165 |
+
filters=128
|
166 |
+
size=1
|
167 |
+
stride=1
|
168 |
+
pad=1
|
169 |
+
activation=leaky
|
170 |
+
|
171 |
+
[convolutional]
|
172 |
+
batch_normalize=1
|
173 |
+
filters=256
|
174 |
+
size=3
|
175 |
+
stride=1
|
176 |
+
pad=1
|
177 |
+
activation=leaky
|
178 |
+
|
179 |
+
[shortcut]
|
180 |
+
from=-3
|
181 |
+
activation=linear
|
182 |
+
|
183 |
+
[convolutional]
|
184 |
+
batch_normalize=1
|
185 |
+
filters=128
|
186 |
+
size=1
|
187 |
+
stride=1
|
188 |
+
pad=1
|
189 |
+
activation=leaky
|
190 |
+
|
191 |
+
[convolutional]
|
192 |
+
batch_normalize=1
|
193 |
+
filters=256
|
194 |
+
size=3
|
195 |
+
stride=1
|
196 |
+
pad=1
|
197 |
+
activation=leaky
|
198 |
+
|
199 |
+
[shortcut]
|
200 |
+
from=-3
|
201 |
+
activation=linear
|
202 |
+
|
203 |
+
|
204 |
+
[convolutional]
|
205 |
+
batch_normalize=1
|
206 |
+
filters=128
|
207 |
+
size=1
|
208 |
+
stride=1
|
209 |
+
pad=1
|
210 |
+
activation=leaky
|
211 |
+
|
212 |
+
[convolutional]
|
213 |
+
batch_normalize=1
|
214 |
+
filters=256
|
215 |
+
size=3
|
216 |
+
stride=1
|
217 |
+
pad=1
|
218 |
+
activation=leaky
|
219 |
+
|
220 |
+
[shortcut]
|
221 |
+
from=-3
|
222 |
+
activation=linear
|
223 |
+
|
224 |
+
[convolutional]
|
225 |
+
batch_normalize=1
|
226 |
+
filters=128
|
227 |
+
size=1
|
228 |
+
stride=1
|
229 |
+
pad=1
|
230 |
+
activation=leaky
|
231 |
+
|
232 |
+
[convolutional]
|
233 |
+
batch_normalize=1
|
234 |
+
filters=256
|
235 |
+
size=3
|
236 |
+
stride=1
|
237 |
+
pad=1
|
238 |
+
activation=leaky
|
239 |
+
|
240 |
+
[shortcut]
|
241 |
+
from=-3
|
242 |
+
activation=linear
|
243 |
+
|
244 |
+
[convolutional]
|
245 |
+
batch_normalize=1
|
246 |
+
filters=128
|
247 |
+
size=1
|
248 |
+
stride=1
|
249 |
+
pad=1
|
250 |
+
activation=leaky
|
251 |
+
|
252 |
+
[convolutional]
|
253 |
+
batch_normalize=1
|
254 |
+
filters=256
|
255 |
+
size=3
|
256 |
+
stride=1
|
257 |
+
pad=1
|
258 |
+
activation=leaky
|
259 |
+
|
260 |
+
[shortcut]
|
261 |
+
from=-3
|
262 |
+
activation=linear
|
263 |
+
|
264 |
+
[convolutional]
|
265 |
+
batch_normalize=1
|
266 |
+
filters=128
|
267 |
+
size=1
|
268 |
+
stride=1
|
269 |
+
pad=1
|
270 |
+
activation=leaky
|
271 |
+
|
272 |
+
[convolutional]
|
273 |
+
batch_normalize=1
|
274 |
+
filters=256
|
275 |
+
size=3
|
276 |
+
stride=1
|
277 |
+
pad=1
|
278 |
+
activation=leaky
|
279 |
+
|
280 |
+
[shortcut]
|
281 |
+
from=-3
|
282 |
+
activation=linear
|
283 |
+
|
284 |
+
# Downsample
|
285 |
+
|
286 |
+
[convolutional]
|
287 |
+
batch_normalize=1
|
288 |
+
filters=512
|
289 |
+
size=3
|
290 |
+
stride=2
|
291 |
+
pad=1
|
292 |
+
activation=leaky
|
293 |
+
|
294 |
+
[convolutional]
|
295 |
+
batch_normalize=1
|
296 |
+
filters=256
|
297 |
+
size=1
|
298 |
+
stride=1
|
299 |
+
pad=1
|
300 |
+
activation=leaky
|
301 |
+
|
302 |
+
[convolutional]
|
303 |
+
batch_normalize=1
|
304 |
+
filters=512
|
305 |
+
size=3
|
306 |
+
stride=1
|
307 |
+
pad=1
|
308 |
+
activation=leaky
|
309 |
+
|
310 |
+
[shortcut]
|
311 |
+
from=-3
|
312 |
+
activation=linear
|
313 |
+
|
314 |
+
|
315 |
+
[convolutional]
|
316 |
+
batch_normalize=1
|
317 |
+
filters=256
|
318 |
+
size=1
|
319 |
+
stride=1
|
320 |
+
pad=1
|
321 |
+
activation=leaky
|
322 |
+
|
323 |
+
[convolutional]
|
324 |
+
batch_normalize=1
|
325 |
+
filters=512
|
326 |
+
size=3
|
327 |
+
stride=1
|
328 |
+
pad=1
|
329 |
+
activation=leaky
|
330 |
+
|
331 |
+
[shortcut]
|
332 |
+
from=-3
|
333 |
+
activation=linear
|
334 |
+
|
335 |
+
|
336 |
+
[convolutional]
|
337 |
+
batch_normalize=1
|
338 |
+
filters=256
|
339 |
+
size=1
|
340 |
+
stride=1
|
341 |
+
pad=1
|
342 |
+
activation=leaky
|
343 |
+
|
344 |
+
[convolutional]
|
345 |
+
batch_normalize=1
|
346 |
+
filters=512
|
347 |
+
size=3
|
348 |
+
stride=1
|
349 |
+
pad=1
|
350 |
+
activation=leaky
|
351 |
+
|
352 |
+
[shortcut]
|
353 |
+
from=-3
|
354 |
+
activation=linear
|
355 |
+
|
356 |
+
|
357 |
+
[convolutional]
|
358 |
+
batch_normalize=1
|
359 |
+
filters=256
|
360 |
+
size=1
|
361 |
+
stride=1
|
362 |
+
pad=1
|
363 |
+
activation=leaky
|
364 |
+
|
365 |
+
[convolutional]
|
366 |
+
batch_normalize=1
|
367 |
+
filters=512
|
368 |
+
size=3
|
369 |
+
stride=1
|
370 |
+
pad=1
|
371 |
+
activation=leaky
|
372 |
+
|
373 |
+
[shortcut]
|
374 |
+
from=-3
|
375 |
+
activation=linear
|
376 |
+
|
377 |
+
[convolutional]
|
378 |
+
batch_normalize=1
|
379 |
+
filters=256
|
380 |
+
size=1
|
381 |
+
stride=1
|
382 |
+
pad=1
|
383 |
+
activation=leaky
|
384 |
+
|
385 |
+
[convolutional]
|
386 |
+
batch_normalize=1
|
387 |
+
filters=512
|
388 |
+
size=3
|
389 |
+
stride=1
|
390 |
+
pad=1
|
391 |
+
activation=leaky
|
392 |
+
|
393 |
+
[shortcut]
|
394 |
+
from=-3
|
395 |
+
activation=linear
|
396 |
+
|
397 |
+
|
398 |
+
[convolutional]
|
399 |
+
batch_normalize=1
|
400 |
+
filters=256
|
401 |
+
size=1
|
402 |
+
stride=1
|
403 |
+
pad=1
|
404 |
+
activation=leaky
|
405 |
+
|
406 |
+
[convolutional]
|
407 |
+
batch_normalize=1
|
408 |
+
filters=512
|
409 |
+
size=3
|
410 |
+
stride=1
|
411 |
+
pad=1
|
412 |
+
activation=leaky
|
413 |
+
|
414 |
+
[shortcut]
|
415 |
+
from=-3
|
416 |
+
activation=linear
|
417 |
+
|
418 |
+
|
419 |
+
[convolutional]
|
420 |
+
batch_normalize=1
|
421 |
+
filters=256
|
422 |
+
size=1
|
423 |
+
stride=1
|
424 |
+
pad=1
|
425 |
+
activation=leaky
|
426 |
+
|
427 |
+
[convolutional]
|
428 |
+
batch_normalize=1
|
429 |
+
filters=512
|
430 |
+
size=3
|
431 |
+
stride=1
|
432 |
+
pad=1
|
433 |
+
activation=leaky
|
434 |
+
|
435 |
+
[shortcut]
|
436 |
+
from=-3
|
437 |
+
activation=linear
|
438 |
+
|
439 |
+
[convolutional]
|
440 |
+
batch_normalize=1
|
441 |
+
filters=256
|
442 |
+
size=1
|
443 |
+
stride=1
|
444 |
+
pad=1
|
445 |
+
activation=leaky
|
446 |
+
|
447 |
+
[convolutional]
|
448 |
+
batch_normalize=1
|
449 |
+
filters=512
|
450 |
+
size=3
|
451 |
+
stride=1
|
452 |
+
pad=1
|
453 |
+
activation=leaky
|
454 |
+
|
455 |
+
[shortcut]
|
456 |
+
from=-3
|
457 |
+
activation=linear
|
458 |
+
|
459 |
+
# Downsample
|
460 |
+
|
461 |
+
[convolutional]
|
462 |
+
batch_normalize=1
|
463 |
+
filters=1024
|
464 |
+
size=3
|
465 |
+
stride=2
|
466 |
+
pad=1
|
467 |
+
activation=leaky
|
468 |
+
|
469 |
+
[convolutional]
|
470 |
+
batch_normalize=1
|
471 |
+
filters=512
|
472 |
+
size=1
|
473 |
+
stride=1
|
474 |
+
pad=1
|
475 |
+
activation=leaky
|
476 |
+
|
477 |
+
[convolutional]
|
478 |
+
batch_normalize=1
|
479 |
+
filters=1024
|
480 |
+
size=3
|
481 |
+
stride=1
|
482 |
+
pad=1
|
483 |
+
activation=leaky
|
484 |
+
|
485 |
+
[shortcut]
|
486 |
+
from=-3
|
487 |
+
activation=linear
|
488 |
+
|
489 |
+
[convolutional]
|
490 |
+
batch_normalize=1
|
491 |
+
filters=512
|
492 |
+
size=1
|
493 |
+
stride=1
|
494 |
+
pad=1
|
495 |
+
activation=leaky
|
496 |
+
|
497 |
+
[convolutional]
|
498 |
+
batch_normalize=1
|
499 |
+
filters=1024
|
500 |
+
size=3
|
501 |
+
stride=1
|
502 |
+
pad=1
|
503 |
+
activation=leaky
|
504 |
+
|
505 |
+
[shortcut]
|
506 |
+
from=-3
|
507 |
+
activation=linear
|
508 |
+
|
509 |
+
[convolutional]
|
510 |
+
batch_normalize=1
|
511 |
+
filters=512
|
512 |
+
size=1
|
513 |
+
stride=1
|
514 |
+
pad=1
|
515 |
+
activation=leaky
|
516 |
+
|
517 |
+
[convolutional]
|
518 |
+
batch_normalize=1
|
519 |
+
filters=1024
|
520 |
+
size=3
|
521 |
+
stride=1
|
522 |
+
pad=1
|
523 |
+
activation=leaky
|
524 |
+
|
525 |
+
[shortcut]
|
526 |
+
from=-3
|
527 |
+
activation=linear
|
528 |
+
|
529 |
+
[convolutional]
|
530 |
+
batch_normalize=1
|
531 |
+
filters=512
|
532 |
+
size=1
|
533 |
+
stride=1
|
534 |
+
pad=1
|
535 |
+
activation=leaky
|
536 |
+
|
537 |
+
[convolutional]
|
538 |
+
batch_normalize=1
|
539 |
+
filters=1024
|
540 |
+
size=3
|
541 |
+
stride=1
|
542 |
+
pad=1
|
543 |
+
activation=leaky
|
544 |
+
|
545 |
+
[shortcut]
|
546 |
+
from=-3
|
547 |
+
activation=linear
|
548 |
+
|
549 |
+
######################
|
550 |
+
|
551 |
+
[convolutional]
|
552 |
+
batch_normalize=1
|
553 |
+
filters=512
|
554 |
+
size=1
|
555 |
+
stride=1
|
556 |
+
pad=1
|
557 |
+
activation=leaky
|
558 |
+
|
559 |
+
[convolutional]
|
560 |
+
batch_normalize=1
|
561 |
+
size=3
|
562 |
+
stride=1
|
563 |
+
pad=1
|
564 |
+
filters=1024
|
565 |
+
activation=leaky
|
566 |
+
|
567 |
+
[convolutional]
|
568 |
+
batch_normalize=1
|
569 |
+
filters=512
|
570 |
+
size=1
|
571 |
+
stride=1
|
572 |
+
pad=1
|
573 |
+
activation=leaky
|
574 |
+
|
575 |
+
[convolutional]
|
576 |
+
batch_normalize=1
|
577 |
+
size=3
|
578 |
+
stride=1
|
579 |
+
pad=1
|
580 |
+
filters=1024
|
581 |
+
activation=leaky
|
582 |
+
|
583 |
+
[convolutional]
|
584 |
+
batch_normalize=1
|
585 |
+
filters=512
|
586 |
+
size=1
|
587 |
+
stride=1
|
588 |
+
pad=1
|
589 |
+
activation=leaky
|
590 |
+
|
591 |
+
[convolutional]
|
592 |
+
batch_normalize=1
|
593 |
+
size=3
|
594 |
+
stride=1
|
595 |
+
pad=1
|
596 |
+
filters=1024
|
597 |
+
activation=leaky
|
598 |
+
|
599 |
+
[convolutional]
|
600 |
+
size=1
|
601 |
+
stride=1
|
602 |
+
pad=1
|
603 |
+
filters=255
|
604 |
+
activation=linear
|
605 |
+
|
606 |
+
|
607 |
+
[yolo]
|
608 |
+
mask = 6,7,8
|
609 |
+
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
|
610 |
+
classes=80
|
611 |
+
num=9
|
612 |
+
jitter=.3
|
613 |
+
ignore_thresh = .7
|
614 |
+
truth_thresh = 1
|
615 |
+
random=1
|
616 |
+
|
617 |
+
|
618 |
+
[route]
|
619 |
+
layers = -4
|
620 |
+
|
621 |
+
[convolutional]
|
622 |
+
batch_normalize=1
|
623 |
+
filters=256
|
624 |
+
size=1
|
625 |
+
stride=1
|
626 |
+
pad=1
|
627 |
+
activation=leaky
|
628 |
+
|
629 |
+
[upsample]
|
630 |
+
stride=2
|
631 |
+
|
632 |
+
[route]
|
633 |
+
layers = -1, 61
|
634 |
+
|
635 |
+
|
636 |
+
|
637 |
+
[convolutional]
|
638 |
+
batch_normalize=1
|
639 |
+
filters=256
|
640 |
+
size=1
|
641 |
+
stride=1
|
642 |
+
pad=1
|
643 |
+
activation=leaky
|
644 |
+
|
645 |
+
[convolutional]
|
646 |
+
batch_normalize=1
|
647 |
+
size=3
|
648 |
+
stride=1
|
649 |
+
pad=1
|
650 |
+
filters=512
|
651 |
+
activation=leaky
|
652 |
+
|
653 |
+
[convolutional]
|
654 |
+
batch_normalize=1
|
655 |
+
filters=256
|
656 |
+
size=1
|
657 |
+
stride=1
|
658 |
+
pad=1
|
659 |
+
activation=leaky
|
660 |
+
|
661 |
+
[convolutional]
|
662 |
+
batch_normalize=1
|
663 |
+
size=3
|
664 |
+
stride=1
|
665 |
+
pad=1
|
666 |
+
filters=512
|
667 |
+
activation=leaky
|
668 |
+
|
669 |
+
[convolutional]
|
670 |
+
batch_normalize=1
|
671 |
+
filters=256
|
672 |
+
size=1
|
673 |
+
stride=1
|
674 |
+
pad=1
|
675 |
+
activation=leaky
|
676 |
+
|
677 |
+
[convolutional]
|
678 |
+
batch_normalize=1
|
679 |
+
size=3
|
680 |
+
stride=1
|
681 |
+
pad=1
|
682 |
+
filters=512
|
683 |
+
activation=leaky
|
684 |
+
|
685 |
+
[convolutional]
|
686 |
+
size=1
|
687 |
+
stride=1
|
688 |
+
pad=1
|
689 |
+
filters=255
|
690 |
+
activation=linear
|
691 |
+
|
692 |
+
|
693 |
+
[yolo]
|
694 |
+
mask = 3,4,5
|
695 |
+
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
|
696 |
+
classes=80
|
697 |
+
num=9
|
698 |
+
jitter=.3
|
699 |
+
ignore_thresh = .7
|
700 |
+
truth_thresh = 1
|
701 |
+
random=1
|
702 |
+
|
703 |
+
|
704 |
+
|
705 |
+
[route]
|
706 |
+
layers = -4
|
707 |
+
|
708 |
+
[convolutional]
|
709 |
+
batch_normalize=1
|
710 |
+
filters=128
|
711 |
+
size=1
|
712 |
+
stride=1
|
713 |
+
pad=1
|
714 |
+
activation=leaky
|
715 |
+
|
716 |
+
[upsample]
|
717 |
+
stride=2
|
718 |
+
|
719 |
+
[route]
|
720 |
+
layers = -1, 36
|
721 |
+
|
722 |
+
|
723 |
+
|
724 |
+
[convolutional]
|
725 |
+
batch_normalize=1
|
726 |
+
filters=128
|
727 |
+
size=1
|
728 |
+
stride=1
|
729 |
+
pad=1
|
730 |
+
activation=leaky
|
731 |
+
|
732 |
+
[convolutional]
|
733 |
+
batch_normalize=1
|
734 |
+
size=3
|
735 |
+
stride=1
|
736 |
+
pad=1
|
737 |
+
filters=256
|
738 |
+
activation=leaky
|
739 |
+
|
740 |
+
[convolutional]
|
741 |
+
batch_normalize=1
|
742 |
+
filters=128
|
743 |
+
size=1
|
744 |
+
stride=1
|
745 |
+
pad=1
|
746 |
+
activation=leaky
|
747 |
+
|
748 |
+
[convolutional]
|
749 |
+
batch_normalize=1
|
750 |
+
size=3
|
751 |
+
stride=1
|
752 |
+
pad=1
|
753 |
+
filters=256
|
754 |
+
activation=leaky
|
755 |
+
|
756 |
+
[convolutional]
|
757 |
+
batch_normalize=1
|
758 |
+
filters=128
|
759 |
+
size=1
|
760 |
+
stride=1
|
761 |
+
pad=1
|
762 |
+
activation=leaky
|
763 |
+
|
764 |
+
[convolutional]
|
765 |
+
batch_normalize=1
|
766 |
+
size=3
|
767 |
+
stride=1
|
768 |
+
pad=1
|
769 |
+
filters=256
|
770 |
+
activation=leaky
|
771 |
+
|
772 |
+
[convolutional]
|
773 |
+
size=1
|
774 |
+
stride=1
|
775 |
+
pad=1
|
776 |
+
filters=255
|
777 |
+
activation=linear
|
778 |
+
|
779 |
+
|
780 |
+
[yolo]
|
781 |
+
mask = 0,1,2
|
782 |
+
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
|
783 |
+
classes=80
|
784 |
+
num=9
|
785 |
+
jitter=.3
|
786 |
+
ignore_thresh = .7
|
787 |
+
truth_thresh = 1
|
788 |
+
random=1
|
yolo/darknet.py
ADDED
@@ -0,0 +1,586 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import division
|
2 |
+
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
import torch.nn.functional as F
|
6 |
+
from torch.autograd import Variable
|
7 |
+
import numpy as np
|
8 |
+
from PIL import Image
|
9 |
+
|
10 |
+
from yolo.utils import *
|
11 |
+
|
12 |
+
# from utils import *
|
13 |
+
|
14 |
+
def get_test_input_normal():
|
15 |
+
|
16 |
+
input_image = "dog-cycle-car.png"
|
17 |
+
image = Image.open(input_image)
|
18 |
+
image = image.convert("RGB")
|
19 |
+
|
20 |
+
img = image.resize(( 416 , 416 ))
|
21 |
+
|
22 |
+
img = np.asarray(img)
|
23 |
+
|
24 |
+
img_ = img[:,:,::-1].transpose((2,0,1)) # BGR -> RGB | H X W C -> C X H X W
|
25 |
+
img_ = img_[np.newaxis,:,:,:]/255.0 #Add a channel at 0 (for batch) | Normalise
|
26 |
+
img_ = torch.from_numpy(img_).float() #Convert to float
|
27 |
+
img_ = Variable(img_) # Convert to Variable
|
28 |
+
return img_
|
29 |
+
|
30 |
+
def get_test_input():
|
31 |
+
img = cv2.imread("dog-cycle-car.png")
|
32 |
+
img = cv2.resize(img, (416,416)) #Resize to the input dimension
|
33 |
+
img_ = img[:,:,::-1].transpose((2,0,1)) # BGR -> RGB | H X W C -> C X H X W
|
34 |
+
img_ = img_[np.newaxis,:,:,:]/255.0 #Add a channel at 0 (for batch) | Normalise
|
35 |
+
img_ = torch.from_numpy(img_).float() #Convert to float
|
36 |
+
img_ = Variable(img_) # Convert to Variable
|
37 |
+
return img_
|
38 |
+
|
39 |
+
|
40 |
+
def parse_cfg(cfgfile):
|
41 |
+
"""
|
42 |
+
Takes a configuration file
|
43 |
+
|
44 |
+
Returns a list of blocks. Each blocks describes a block in the neural
|
45 |
+
network to be built. Block is represented as a dictionary in the list
|
46 |
+
|
47 |
+
"""
|
48 |
+
file = open(cfgfile, 'r')
|
49 |
+
lines = file.read().split('\n') #store the lines in a list
|
50 |
+
lines = [x for x in lines if len(x) > 0] #get read of the empty lines
|
51 |
+
lines = [x for x in lines if x[0] != '#']
|
52 |
+
lines = [x.rstrip().lstrip() for x in lines]
|
53 |
+
|
54 |
+
|
55 |
+
block = {}
|
56 |
+
blocks = []
|
57 |
+
|
58 |
+
for line in lines:
|
59 |
+
if line[0] == "[": #This marks the start of a new block
|
60 |
+
if len(block) != 0:
|
61 |
+
blocks.append(block)
|
62 |
+
block = {}
|
63 |
+
block["type"] = line[1:-1].rstrip()
|
64 |
+
else:
|
65 |
+
key,value = line.split("=")
|
66 |
+
block[key.rstrip()] = value.lstrip()
|
67 |
+
blocks.append(block)
|
68 |
+
|
69 |
+
return blocks
|
70 |
+
# print('\n\n'.join([repr(x) for x in blocks]))
|
71 |
+
|
72 |
+
import pickle as pkl
|
73 |
+
|
74 |
+
class MaxPoolStride1(nn.Module):
|
75 |
+
def __init__(self, kernel_size):
|
76 |
+
super(MaxPoolStride1, self).__init__()
|
77 |
+
self.kernel_size = kernel_size
|
78 |
+
self.pad = kernel_size - 1
|
79 |
+
|
80 |
+
def forward(self, x):
|
81 |
+
padded_x = F.pad(x, (0,self.pad,0,self.pad), mode="replicate")
|
82 |
+
pooled_x = nn.MaxPool2d(self.kernel_size, self.pad)(padded_x)
|
83 |
+
return pooled_x
|
84 |
+
|
85 |
+
|
86 |
+
class EmptyLayer(nn.Module):
|
87 |
+
def __init__(self):
|
88 |
+
super(EmptyLayer, self).__init__()
|
89 |
+
|
90 |
+
|
91 |
+
class DetectionLayer(nn.Module):
|
92 |
+
def __init__(self, anchors):
|
93 |
+
super(DetectionLayer, self).__init__()
|
94 |
+
self.anchors = anchors
|
95 |
+
|
96 |
+
def forward(self, x, inp_dim, num_classes, confidence):
|
97 |
+
x = x.data
|
98 |
+
global CUDA
|
99 |
+
prediction = x
|
100 |
+
prediction = predict_transform(prediction, inp_dim, self.anchors, num_classes, confidence, CUDA)
|
101 |
+
return prediction
|
102 |
+
|
103 |
+
|
104 |
+
|
105 |
+
|
106 |
+
|
107 |
+
class Upsample(nn.Module):
|
108 |
+
def __init__(self, stride=2):
|
109 |
+
super(Upsample, self).__init__()
|
110 |
+
self.stride = stride
|
111 |
+
|
112 |
+
def forward(self, x):
|
113 |
+
stride = self.stride
|
114 |
+
assert(x.data.dim() == 4)
|
115 |
+
B = x.data.size(0)
|
116 |
+
C = x.data.size(1)
|
117 |
+
H = x.data.size(2)
|
118 |
+
W = x.data.size(3)
|
119 |
+
ws = stride
|
120 |
+
hs = stride
|
121 |
+
x = x.view(B, C, H, 1, W, 1).expand(B, C, H, stride, W, stride).contiguous().view(B, C, H*stride, W*stride)
|
122 |
+
return x
|
123 |
+
#
|
124 |
+
|
125 |
+
class ReOrgLayer(nn.Module):
|
126 |
+
def __init__(self, stride = 2):
|
127 |
+
super(ReOrgLayer, self).__init__()
|
128 |
+
self.stride= stride
|
129 |
+
|
130 |
+
def forward(self,x):
|
131 |
+
assert(x.data.dim() == 4)
|
132 |
+
B,C,H,W = x.data.shape
|
133 |
+
hs = self.stride
|
134 |
+
ws = self.stride
|
135 |
+
assert(H % hs == 0), "The stride " + str(self.stride) + " is not a proper divisor of height " + str(H)
|
136 |
+
assert(W % ws == 0), "The stride " + str(self.stride) + " is not a proper divisor of height " + str(W)
|
137 |
+
x = x.view(B,C, H // hs, hs, W // ws, ws).transpose(-2,-3).contiguous()
|
138 |
+
x = x.view(B,C, H // hs * W // ws, hs, ws)
|
139 |
+
x = x.view(B,C, H // hs * W // ws, hs*ws).transpose(-1,-2).contiguous()
|
140 |
+
x = x.view(B, C, ws*hs, H // ws, W // ws).transpose(1,2).contiguous()
|
141 |
+
x = x.view(B, C*ws*hs, H // ws, W // ws)
|
142 |
+
return x
|
143 |
+
|
144 |
+
|
145 |
+
def create_modules(blocks):
|
146 |
+
net_info = blocks[0] #Captures the information about the input and pre-processing
|
147 |
+
|
148 |
+
module_list = nn.ModuleList()
|
149 |
+
|
150 |
+
index = 0 #indexing blocks helps with implementing route layers (skip connections)
|
151 |
+
|
152 |
+
|
153 |
+
prev_filters = 3
|
154 |
+
|
155 |
+
output_filters = []
|
156 |
+
|
157 |
+
for x in blocks:
|
158 |
+
module = nn.Sequential()
|
159 |
+
|
160 |
+
if (x["type"] == "net"):
|
161 |
+
continue
|
162 |
+
|
163 |
+
#If it's a convolutional layer
|
164 |
+
if (x["type"] == "convolutional"):
|
165 |
+
#Get the info about the layer
|
166 |
+
activation = x["activation"]
|
167 |
+
try:
|
168 |
+
batch_normalize = int(x["batch_normalize"])
|
169 |
+
bias = False
|
170 |
+
except:
|
171 |
+
batch_normalize = 0
|
172 |
+
bias = True
|
173 |
+
|
174 |
+
filters= int(x["filters"])
|
175 |
+
padding = int(x["pad"])
|
176 |
+
kernel_size = int(x["size"])
|
177 |
+
stride = int(x["stride"])
|
178 |
+
|
179 |
+
if padding:
|
180 |
+
pad = (kernel_size - 1) // 2
|
181 |
+
else:
|
182 |
+
pad = 0
|
183 |
+
|
184 |
+
#Add the convolutional layer
|
185 |
+
conv = nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias = bias)
|
186 |
+
module.add_module("conv_{0}".format(index), conv)
|
187 |
+
|
188 |
+
#Add the Batch Norm Layer
|
189 |
+
if batch_normalize:
|
190 |
+
bn = nn.BatchNorm2d(filters)
|
191 |
+
module.add_module("batch_norm_{0}".format(index), bn)
|
192 |
+
|
193 |
+
#Check the activation.
|
194 |
+
#It is either Linear or a Leaky ReLU for YOLO
|
195 |
+
if activation == "leaky":
|
196 |
+
activn = nn.LeakyReLU(0.1, inplace = True)
|
197 |
+
module.add_module("leaky_{0}".format(index), activn)
|
198 |
+
|
199 |
+
|
200 |
+
|
201 |
+
#If it's an upsampling layer
|
202 |
+
#We use Bilinear2dUpsampling
|
203 |
+
|
204 |
+
elif (x["type"] == "upsample"):
|
205 |
+
stride = int(x["stride"])
|
206 |
+
# upsample = Upsample(stride)
|
207 |
+
upsample = nn.Upsample(scale_factor = 2, mode = "nearest")
|
208 |
+
module.add_module("upsample_{}".format(index), upsample)
|
209 |
+
|
210 |
+
#If it is a route layer
|
211 |
+
elif (x["type"] == "route"):
|
212 |
+
x["layers"] = x["layers"].split(',')
|
213 |
+
|
214 |
+
#Start of a route
|
215 |
+
start = int(x["layers"][0])
|
216 |
+
|
217 |
+
#end, if there exists one.
|
218 |
+
try:
|
219 |
+
end = int(x["layers"][1])
|
220 |
+
except:
|
221 |
+
end = 0
|
222 |
+
|
223 |
+
|
224 |
+
|
225 |
+
#Positive anotation
|
226 |
+
if start > 0:
|
227 |
+
start = start - index
|
228 |
+
|
229 |
+
if end > 0:
|
230 |
+
end = end - index
|
231 |
+
|
232 |
+
|
233 |
+
route = EmptyLayer()
|
234 |
+
module.add_module("route_{0}".format(index), route)
|
235 |
+
|
236 |
+
|
237 |
+
|
238 |
+
if end < 0:
|
239 |
+
filters = output_filters[index + start] + output_filters[index + end]
|
240 |
+
else:
|
241 |
+
filters= output_filters[index + start]
|
242 |
+
|
243 |
+
|
244 |
+
|
245 |
+
#shortcut corresponds to skip connection
|
246 |
+
elif x["type"] == "shortcut":
|
247 |
+
from_ = int(x["from"])
|
248 |
+
shortcut = EmptyLayer()
|
249 |
+
module.add_module("shortcut_{}".format(index), shortcut)
|
250 |
+
|
251 |
+
|
252 |
+
elif x["type"] == "maxpool":
|
253 |
+
stride = int(x["stride"])
|
254 |
+
size = int(x["size"])
|
255 |
+
if stride != 1:
|
256 |
+
maxpool = nn.MaxPool2d(size, stride)
|
257 |
+
else:
|
258 |
+
maxpool = MaxPoolStride1(size)
|
259 |
+
|
260 |
+
module.add_module("maxpool_{}".format(index), maxpool)
|
261 |
+
|
262 |
+
#Yolo is the detection layer
|
263 |
+
elif x["type"] == "yolo":
|
264 |
+
mask = x["mask"].split(",")
|
265 |
+
mask = [int(x) for x in mask]
|
266 |
+
|
267 |
+
|
268 |
+
anchors = x["anchors"].split(",")
|
269 |
+
anchors = [int(a) for a in anchors]
|
270 |
+
anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors),2)]
|
271 |
+
anchors = [anchors[i] for i in mask]
|
272 |
+
|
273 |
+
detection = DetectionLayer(anchors)
|
274 |
+
module.add_module("Detection_{}".format(index), detection)
|
275 |
+
|
276 |
+
|
277 |
+
|
278 |
+
else:
|
279 |
+
print("Something I dunno")
|
280 |
+
assert False
|
281 |
+
|
282 |
+
|
283 |
+
module_list.append(module)
|
284 |
+
prev_filters = filters
|
285 |
+
output_filters.append(filters)
|
286 |
+
index += 1
|
287 |
+
|
288 |
+
|
289 |
+
return (net_info, module_list)
|
290 |
+
|
291 |
+
|
292 |
+
|
293 |
+
class Darknet(nn.Module):
|
294 |
+
def __init__(self, cfgfile):
|
295 |
+
super(Darknet, self).__init__()
|
296 |
+
self.blocks = parse_cfg(cfgfile)
|
297 |
+
self.net_info, self.module_list = create_modules(self.blocks)
|
298 |
+
self.header = torch.IntTensor([0,0,0,0])
|
299 |
+
self.seen = 0
|
300 |
+
|
301 |
+
|
302 |
+
|
303 |
+
def get_blocks(self):
|
304 |
+
return self.blocks
|
305 |
+
|
306 |
+
def get_module_list(self):
|
307 |
+
return self.module_list
|
308 |
+
|
309 |
+
|
310 |
+
def forward(self, x, CUDA):
|
311 |
+
detections = []
|
312 |
+
modules = self.blocks[1:]
|
313 |
+
outputs = {} #We cache the outputs for the route layer
|
314 |
+
|
315 |
+
|
316 |
+
write = 0
|
317 |
+
for i in range(len(modules)):
|
318 |
+
|
319 |
+
module_type = (modules[i]["type"])
|
320 |
+
if module_type == "convolutional" or module_type == "upsample" or module_type == "maxpool":
|
321 |
+
|
322 |
+
x = self.module_list[i](x)
|
323 |
+
outputs[i] = x
|
324 |
+
|
325 |
+
|
326 |
+
elif module_type == "route":
|
327 |
+
layers = modules[i]["layers"]
|
328 |
+
layers = [int(a) for a in layers]
|
329 |
+
|
330 |
+
if (layers[0]) > 0:
|
331 |
+
layers[0] = layers[0] - i
|
332 |
+
|
333 |
+
if len(layers) == 1:
|
334 |
+
x = outputs[i + (layers[0])]
|
335 |
+
|
336 |
+
else:
|
337 |
+
if (layers[1]) > 0:
|
338 |
+
layers[1] = layers[1] - i
|
339 |
+
|
340 |
+
map1 = outputs[i + layers[0]]
|
341 |
+
map2 = outputs[i + layers[1]]
|
342 |
+
|
343 |
+
|
344 |
+
x = torch.cat((map1, map2), 1)
|
345 |
+
outputs[i] = x
|
346 |
+
|
347 |
+
elif module_type == "shortcut":
|
348 |
+
from_ = int(modules[i]["from"])
|
349 |
+
x = outputs[i-1] + outputs[i+from_]
|
350 |
+
outputs[i] = x
|
351 |
+
|
352 |
+
|
353 |
+
|
354 |
+
elif module_type == 'yolo':
|
355 |
+
|
356 |
+
anchors = self.module_list[i][0].anchors
|
357 |
+
#Get the input dimensions
|
358 |
+
inp_dim = int (self.net_info["height"])
|
359 |
+
|
360 |
+
#Get the number of classes
|
361 |
+
num_classes = int (modules[i]["classes"])
|
362 |
+
|
363 |
+
#Output the result
|
364 |
+
x = x.data
|
365 |
+
x = predict_transform(x, inp_dim, anchors, num_classes, CUDA)
|
366 |
+
|
367 |
+
if type(x) == int:
|
368 |
+
continue
|
369 |
+
|
370 |
+
|
371 |
+
if not write:
|
372 |
+
detections = x
|
373 |
+
write = 1
|
374 |
+
|
375 |
+
else:
|
376 |
+
detections = torch.cat((detections, x), 1)
|
377 |
+
|
378 |
+
outputs[i] = outputs[i-1]
|
379 |
+
|
380 |
+
|
381 |
+
|
382 |
+
try:
|
383 |
+
return detections
|
384 |
+
except:
|
385 |
+
return 0
|
386 |
+
|
387 |
+
def load_weights_url(self, weightfile):
|
388 |
+
|
389 |
+
# Open the weights file
|
390 |
+
fp = get_data_s3(weightfile)
|
391 |
+
|
392 |
+
# The first 5 values are header information
|
393 |
+
# 1. Major version number
|
394 |
+
# 2. Minor Version Number
|
395 |
+
# 3. Subversion number
|
396 |
+
# 4,5. Images seen by the network (during training)
|
397 |
+
header = np.frombuffer( fp.getvalue() , dtype = np.int32, count = 5)
|
398 |
+
self.header = torch.from_numpy(header)
|
399 |
+
self.seen = self.header[3]
|
400 |
+
|
401 |
+
weights = np.frombuffer( fp.getvalue() , dtype = np.float32)
|
402 |
+
|
403 |
+
ptr = 0
|
404 |
+
|
405 |
+
for i in range(len(self.module_list)):
|
406 |
+
module_type = self.blocks[i + 1]["type"]
|
407 |
+
|
408 |
+
#If module_type is convolutional load weights
|
409 |
+
#Otherwise ignore.
|
410 |
+
|
411 |
+
if module_type == "convolutional":
|
412 |
+
model = self.module_list[i]
|
413 |
+
try:
|
414 |
+
batch_normalize = int(self.blocks[i+1]["batch_normalize"])
|
415 |
+
except:
|
416 |
+
batch_normalize = 0
|
417 |
+
|
418 |
+
conv = model[0]
|
419 |
+
|
420 |
+
if (batch_normalize):
|
421 |
+
bn = model[1]
|
422 |
+
|
423 |
+
#Get the number of weights of Batch Norm Layer
|
424 |
+
num_bn_biases = bn.bias.numel()
|
425 |
+
|
426 |
+
#Load the weights
|
427 |
+
bn_biases = torch.from_numpy(weights[ptr:ptr + num_bn_biases])
|
428 |
+
ptr += num_bn_biases
|
429 |
+
|
430 |
+
bn_weights = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
|
431 |
+
ptr += num_bn_biases
|
432 |
+
|
433 |
+
bn_running_mean = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
|
434 |
+
ptr += num_bn_biases
|
435 |
+
|
436 |
+
bn_running_var = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
|
437 |
+
ptr += num_bn_biases
|
438 |
+
|
439 |
+
#Cast the loaded weights into dims of model weights.
|
440 |
+
bn_biases = bn_biases.view_as(bn.bias.data)
|
441 |
+
bn_weights = bn_weights.view_as(bn.weight.data)
|
442 |
+
bn_running_mean = bn_running_mean.view_as(bn.running_mean)
|
443 |
+
bn_running_var = bn_running_var.view_as(bn.running_var)
|
444 |
+
|
445 |
+
#Copy the data to model
|
446 |
+
bn.bias.data.copy_(bn_biases)
|
447 |
+
bn.weight.data.copy_(bn_weights)
|
448 |
+
bn.running_mean.copy_(bn_running_mean)
|
449 |
+
bn.running_var.copy_(bn_running_var)
|
450 |
+
|
451 |
+
else:
|
452 |
+
|
453 |
+
#Number of biases
|
454 |
+
num_biases = conv.bias.numel()
|
455 |
+
|
456 |
+
#Load the weights
|
457 |
+
conv_biases = torch.from_numpy(weights[ptr: ptr + num_biases])
|
458 |
+
ptr = ptr + num_biases
|
459 |
+
|
460 |
+
#reshape the loaded weights according to the dims of the model weights
|
461 |
+
conv_biases = conv_biases.view_as(conv.bias.data)
|
462 |
+
|
463 |
+
#Finally copy the data
|
464 |
+
conv.bias.data.copy_(conv_biases)
|
465 |
+
|
466 |
+
#Let us load the weights for the Convolutional layers
|
467 |
+
num_weights = conv.weight.numel()
|
468 |
+
|
469 |
+
#Do the same as above for weights
|
470 |
+
conv_weights = torch.from_numpy(weights[ptr:ptr+num_weights])
|
471 |
+
ptr = ptr + num_weights
|
472 |
+
|
473 |
+
conv_weights = conv_weights.view_as(conv.weight.data)
|
474 |
+
conv.weight.data.copy_(conv_weights)
|
475 |
+
|
476 |
+
|
477 |
+
def load_weights(self, weightfile):
|
478 |
+
|
479 |
+
# Open the weights file
|
480 |
+
fp = open(weightfile, "rb")
|
481 |
+
|
482 |
+
# The first 5 values are header information
|
483 |
+
# 1. Major version number
|
484 |
+
# 2. Minor Version Number
|
485 |
+
# 3. Subversion number
|
486 |
+
# 4,5. Images seen by the network (during training)
|
487 |
+
header = np.fromfile(fp, dtype = np.int32, count = 5)
|
488 |
+
self.header = torch.from_numpy(header)
|
489 |
+
self.seen = self.header[3]
|
490 |
+
|
491 |
+
weights = np.fromfile(fp, dtype = np.float32)
|
492 |
+
|
493 |
+
ptr = 0
|
494 |
+
|
495 |
+
for i in range(len(self.module_list)):
|
496 |
+
module_type = self.blocks[i + 1]["type"]
|
497 |
+
|
498 |
+
#If module_type is convolutional load weights
|
499 |
+
#Otherwise ignore.
|
500 |
+
|
501 |
+
if module_type == "convolutional":
|
502 |
+
model = self.module_list[i]
|
503 |
+
try:
|
504 |
+
batch_normalize = int(self.blocks[i+1]["batch_normalize"])
|
505 |
+
except:
|
506 |
+
batch_normalize = 0
|
507 |
+
|
508 |
+
conv = model[0]
|
509 |
+
|
510 |
+
if (batch_normalize):
|
511 |
+
bn = model[1]
|
512 |
+
|
513 |
+
#Get the number of weights of Batch Norm Layer
|
514 |
+
num_bn_biases = bn.bias.numel()
|
515 |
+
|
516 |
+
#Load the weights
|
517 |
+
bn_biases = torch.from_numpy(weights[ptr:ptr + num_bn_biases])
|
518 |
+
ptr += num_bn_biases
|
519 |
+
|
520 |
+
bn_weights = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
|
521 |
+
ptr += num_bn_biases
|
522 |
+
|
523 |
+
bn_running_mean = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
|
524 |
+
ptr += num_bn_biases
|
525 |
+
|
526 |
+
bn_running_var = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
|
527 |
+
ptr += num_bn_biases
|
528 |
+
|
529 |
+
#Cast the loaded weights into dims of model weights.
|
530 |
+
bn_biases = bn_biases.view_as(bn.bias.data)
|
531 |
+
bn_weights = bn_weights.view_as(bn.weight.data)
|
532 |
+
bn_running_mean = bn_running_mean.view_as(bn.running_mean)
|
533 |
+
bn_running_var = bn_running_var.view_as(bn.running_var)
|
534 |
+
|
535 |
+
#Copy the data to model
|
536 |
+
bn.bias.data.copy_(bn_biases)
|
537 |
+
bn.weight.data.copy_(bn_weights)
|
538 |
+
bn.running_mean.copy_(bn_running_mean)
|
539 |
+
bn.running_var.copy_(bn_running_var)
|
540 |
+
|
541 |
+
else:
|
542 |
+
|
543 |
+
#Number of biases
|
544 |
+
num_biases = conv.bias.numel()
|
545 |
+
|
546 |
+
#Load the weights
|
547 |
+
conv_biases = torch.from_numpy(weights[ptr: ptr + num_biases])
|
548 |
+
ptr = ptr + num_biases
|
549 |
+
|
550 |
+
#reshape the loaded weights according to the dims of the model weights
|
551 |
+
conv_biases = conv_biases.view_as(conv.bias.data)
|
552 |
+
|
553 |
+
#Finally copy the data
|
554 |
+
conv.bias.data.copy_(conv_biases)
|
555 |
+
|
556 |
+
#Let us load the weights for the Convolutional layers
|
557 |
+
num_weights = conv.weight.numel()
|
558 |
+
|
559 |
+
#Do the same as above for weights
|
560 |
+
conv_weights = torch.from_numpy(weights[ptr:ptr+num_weights])
|
561 |
+
ptr = ptr + num_weights
|
562 |
+
|
563 |
+
conv_weights = conv_weights.view_as(conv.weight.data)
|
564 |
+
conv.weight.data.copy_(conv_weights)
|
565 |
+
|
566 |
+
|
567 |
+
if __name__ == '__main__':
|
568 |
+
|
569 |
+
model = Darknet("yolov3.cfg")
|
570 |
+
model.load_weights_url("yolov3.weights")
|
571 |
+
|
572 |
+
CUDA = torch.cuda.is_available()
|
573 |
+
|
574 |
+
print(' cuda : ' , CUDA )
|
575 |
+
|
576 |
+
inp = get_test_input()
|
577 |
+
|
578 |
+
# if CUDA:
|
579 |
+
|
580 |
+
# model.cuda()
|
581 |
+
# inp.cuda()
|
582 |
+
|
583 |
+
pred = model( inp , False )
|
584 |
+
|
585 |
+
print (pred)
|
586 |
+
print( 'shape' , pred.shape )
|
yolo/data/coco.names
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
person
|
2 |
+
bicycle
|
3 |
+
car
|
4 |
+
motorbike
|
5 |
+
aeroplane
|
6 |
+
bus
|
7 |
+
train
|
8 |
+
truck
|
9 |
+
boat
|
10 |
+
traffic light
|
11 |
+
fire hydrant
|
12 |
+
stop sign
|
13 |
+
parking meter
|
14 |
+
bench
|
15 |
+
bird
|
16 |
+
cat
|
17 |
+
dog
|
18 |
+
horse
|
19 |
+
sheep
|
20 |
+
cow
|
21 |
+
elephant
|
22 |
+
bear
|
23 |
+
zebra
|
24 |
+
giraffe
|
25 |
+
backpack
|
26 |
+
umbrella
|
27 |
+
handbag
|
28 |
+
tie
|
29 |
+
suitcase
|
30 |
+
frisbee
|
31 |
+
skis
|
32 |
+
snowboard
|
33 |
+
sports ball
|
34 |
+
kite
|
35 |
+
baseball bat
|
36 |
+
baseball glove
|
37 |
+
skateboard
|
38 |
+
surfboard
|
39 |
+
tennis racket
|
40 |
+
bottle
|
41 |
+
wine glass
|
42 |
+
cup
|
43 |
+
fork
|
44 |
+
knife
|
45 |
+
spoon
|
46 |
+
bowl
|
47 |
+
banana
|
48 |
+
apple
|
49 |
+
sandwich
|
50 |
+
orange
|
51 |
+
broccoli
|
52 |
+
carrot
|
53 |
+
hot dog
|
54 |
+
pizza
|
55 |
+
donut
|
56 |
+
cake
|
57 |
+
chair
|
58 |
+
sofa
|
59 |
+
pottedplant
|
60 |
+
bed
|
61 |
+
diningtable
|
62 |
+
toilet
|
63 |
+
tvmonitor
|
64 |
+
laptop
|
65 |
+
mouse
|
66 |
+
remote
|
67 |
+
keyboard
|
68 |
+
cell phone
|
69 |
+
microwave
|
70 |
+
oven
|
71 |
+
toaster
|
72 |
+
sink
|
73 |
+
refrigerator
|
74 |
+
book
|
75 |
+
clock
|
76 |
+
vase
|
77 |
+
scissors
|
78 |
+
teddy bear
|
79 |
+
hair drier
|
80 |
+
toothbrush
|
yolo/det/det_Poster3.jpg
ADDED
yolo/det/det_dog-cycle-car.png
ADDED
yolo/det/det_sample.jpeg
ADDED
yolo/det/det_victoria.jpg
ADDED
yolo/detector.py
ADDED
@@ -0,0 +1,321 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import division
|
2 |
+
import time
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
from torch.autograd import Variable
|
6 |
+
import numpy as np
|
7 |
+
import cv2
|
8 |
+
from utils import *
|
9 |
+
import argparse
|
10 |
+
import os
|
11 |
+
import os.path as osp
|
12 |
+
from darknet import Darknet
|
13 |
+
# from preprocess import prep_image, inp_to_image
|
14 |
+
import pandas as pd
|
15 |
+
import random
|
16 |
+
import pickle as pkl
|
17 |
+
import itertools
|
18 |
+
|
19 |
+
class test_net(nn.Module):
|
20 |
+
def __init__(self, num_layers, input_size):
|
21 |
+
super(test_net, self).__init__()
|
22 |
+
self.num_layers= num_layers
|
23 |
+
self.linear_1 = nn.Linear(input_size, 5)
|
24 |
+
self.middle = nn.ModuleList([nn.Linear(5,5) for x in range(num_layers)])
|
25 |
+
self.output = nn.Linear(5,2)
|
26 |
+
|
27 |
+
def forward(self, x):
|
28 |
+
x = x.view(-1)
|
29 |
+
fwd = nn.Sequential(self.linear_1, *self.middle, self.output)
|
30 |
+
return fwd(x)
|
31 |
+
|
32 |
+
def get_test_input(input_dim, CUDA):
|
33 |
+
img = cv2.imread("dog-cycle-car.png")
|
34 |
+
img = cv2.resize(img, (input_dim, input_dim))
|
35 |
+
img_ = img[:,:,::-1].transpose((2,0,1))
|
36 |
+
img_ = img_[np.newaxis,:,:,:]/255.0
|
37 |
+
img_ = torch.from_numpy(img_).float()
|
38 |
+
img_ = Variable(img_)
|
39 |
+
|
40 |
+
if CUDA:
|
41 |
+
img_ = img_.cuda()
|
42 |
+
num_classes
|
43 |
+
return img_
|
44 |
+
|
45 |
+
|
46 |
+
|
47 |
+
def arg_parse():
|
48 |
+
"""
|
49 |
+
Parse arguements to the detect module
|
50 |
+
|
51 |
+
"""
|
52 |
+
|
53 |
+
|
54 |
+
parser = argparse.ArgumentParser(description='YOLO v3 Detection Module')
|
55 |
+
|
56 |
+
parser.add_argument("--images", dest = 'images', help =
|
57 |
+
"Image / Directory containing images to perform detection upon",
|
58 |
+
default = "imgs", type = str)
|
59 |
+
parser.add_argument("--det", dest = 'det', help =
|
60 |
+
"Image / Directory to store detections to",
|
61 |
+
default = "det", type = str)
|
62 |
+
parser.add_argument("--bs", dest = "bs", help = "Batch size", default = 1)
|
63 |
+
parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.5)
|
64 |
+
parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
|
65 |
+
parser.add_argument("--cfg", dest = 'cfgfile', help =
|
66 |
+
"Config file",
|
67 |
+
default = "cfg/yolov3.cfg", type = str)
|
68 |
+
parser.add_argument("--weights", dest = 'weightsfile', help =
|
69 |
+
"weightsfile",
|
70 |
+
default = "yolov3.weights", type = str)
|
71 |
+
parser.add_argument("--reso", dest = 'reso', help =
|
72 |
+
"Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
|
73 |
+
default = "416", type = str)
|
74 |
+
parser.add_argument("--scales", dest = "scales", help = "Scales to use for detection",
|
75 |
+
default = "1,2,3", type = str)
|
76 |
+
|
77 |
+
return parser.parse_args()
|
78 |
+
|
79 |
+
if __name__ == '__main__':
|
80 |
+
args = arg_parse()
|
81 |
+
|
82 |
+
scales = args.scales
|
83 |
+
|
84 |
+
|
85 |
+
# scales = [int(x) for x in scales.split(',')]
|
86 |
+
#
|
87 |
+
#
|
88 |
+
#
|
89 |
+
# args.reso = int(args.reso)
|
90 |
+
#
|
91 |
+
# num_boxes = [args.reso//32, args.reso//16, args.reso//8]
|
92 |
+
# scale_indices = [3*(x**2) for x in num_boxes]
|
93 |
+
# scale_indices = list(itertools.accumulate(scale_indices, lambda x,y : x+y))
|
94 |
+
#
|
95 |
+
#
|
96 |
+
# li = []
|
97 |
+
# i = 0
|
98 |
+
# for scale in scale_indices:
|
99 |
+
# li.extend(list(range(i, scale)))
|
100 |
+
# i = scale
|
101 |
+
#
|
102 |
+
# scale_indices = li
|
103 |
+
|
104 |
+
images = args.images
|
105 |
+
batch_size = int(args.bs)
|
106 |
+
confidence = float(args.confidence)
|
107 |
+
nms_thesh = float(args.nms_thresh)
|
108 |
+
start = 0
|
109 |
+
|
110 |
+
CUDA = torch.cuda.is_available()
|
111 |
+
|
112 |
+
num_classes = 80
|
113 |
+
classes = load_classes('data/coco.names')
|
114 |
+
|
115 |
+
#Set up the neural network
|
116 |
+
print("Loading network.....")
|
117 |
+
model = Darknet(args.cfgfile)
|
118 |
+
model.load_weights(args.weightsfile)
|
119 |
+
print("Network successfully loaded")
|
120 |
+
|
121 |
+
model.net_info["height"] = args.reso
|
122 |
+
inp_dim = int(model.net_info["height"])
|
123 |
+
assert inp_dim % 32 == 0
|
124 |
+
assert inp_dim > 32
|
125 |
+
|
126 |
+
#If there's a GPU availible, put the model on GPU
|
127 |
+
if CUDA:
|
128 |
+
model.cuda()
|
129 |
+
|
130 |
+
|
131 |
+
#Set the model in evaluation mode
|
132 |
+
model.eval()
|
133 |
+
|
134 |
+
read_dir = time.time()
|
135 |
+
#Detection phase
|
136 |
+
try:
|
137 |
+
imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images) if os.path.splitext(img)[1] == '.png' or os.path.splitext(img)[1] =='.jpeg' or os.path.splitext(img)[1] =='.jpg']
|
138 |
+
except NotADirectoryError:
|
139 |
+
imlist = []
|
140 |
+
imlist.append(osp.join(osp.realpath('.'), images))
|
141 |
+
except FileNotFoundError:
|
142 |
+
print ("No file or directory with the name {}".format(images))
|
143 |
+
exit()
|
144 |
+
|
145 |
+
if not os.path.exists(args.det):
|
146 |
+
os.makedirs(args.det)
|
147 |
+
|
148 |
+
load_batch = time.time()
|
149 |
+
|
150 |
+
batches = list(map(prep_image, imlist, [inp_dim for x in range(len(imlist))]))
|
151 |
+
im_batches = [x[0] for x in batches]
|
152 |
+
orig_ims = [x[1] for x in batches]
|
153 |
+
im_dim_list = [x[2] for x in batches]
|
154 |
+
im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
|
155 |
+
|
156 |
+
|
157 |
+
|
158 |
+
if CUDA:
|
159 |
+
im_dim_list = im_dim_list.cuda()
|
160 |
+
|
161 |
+
leftover = 0
|
162 |
+
|
163 |
+
if (len(im_dim_list) % batch_size):
|
164 |
+
leftover = 1
|
165 |
+
|
166 |
+
|
167 |
+
if batch_size != 1:
|
168 |
+
num_batches = len(imlist) // batch_size + leftover
|
169 |
+
im_batches = [torch.cat((im_batches[i*batch_size : min((i + 1)*batch_size,
|
170 |
+
len(im_batches))])) for i in range(num_batches)]
|
171 |
+
|
172 |
+
|
173 |
+
i = 0
|
174 |
+
|
175 |
+
|
176 |
+
write = False
|
177 |
+
model(get_test_input(inp_dim, CUDA), CUDA)
|
178 |
+
|
179 |
+
start_det_loop = time.time()
|
180 |
+
|
181 |
+
objs = {}
|
182 |
+
|
183 |
+
|
184 |
+
|
185 |
+
for batch in im_batches:
|
186 |
+
#load the image
|
187 |
+
start = time.time()
|
188 |
+
if CUDA:
|
189 |
+
batch = batch.cuda()
|
190 |
+
|
191 |
+
|
192 |
+
#Apply offsets to the result predictions
|
193 |
+
#Tranform the predictions as described in the YOLO paper
|
194 |
+
#flatten the prediction vector
|
195 |
+
# B x (bbox cord x no. of anchors) x grid_w x grid_h --> B x bbox x (all the boxes)
|
196 |
+
# Put every proposed box as a row.
|
197 |
+
with torch.no_grad():
|
198 |
+
prediction = model(Variable(batch), CUDA)
|
199 |
+
|
200 |
+
# prediction = prediction[:,scale_indices]
|
201 |
+
|
202 |
+
|
203 |
+
#get the boxes with object confidence > threshold
|
204 |
+
#Convert the cordinates to absolute coordinates
|
205 |
+
#perform NMS on these boxes, and save the results
|
206 |
+
#I could have done NMS and saving seperately to have a better abstraction
|
207 |
+
#But both these operations require looping, hence
|
208 |
+
#clubbing these ops in one loop instead of two.
|
209 |
+
#loops are slower than vectorised operations.
|
210 |
+
|
211 |
+
prediction = write_results(prediction, confidence, num_classes, nms = True, nms_conf = nms_thesh)
|
212 |
+
|
213 |
+
|
214 |
+
if type(prediction) == int:
|
215 |
+
i += 1
|
216 |
+
continue
|
217 |
+
|
218 |
+
end = time.time()
|
219 |
+
|
220 |
+
# print(end - start)
|
221 |
+
|
222 |
+
prediction[:,0] += i*batch_size
|
223 |
+
|
224 |
+
if not write:
|
225 |
+
output = prediction
|
226 |
+
write = 1
|
227 |
+
else:
|
228 |
+
output = torch.cat((output,prediction))
|
229 |
+
|
230 |
+
|
231 |
+
|
232 |
+
|
233 |
+
for im_num, image in enumerate(imlist[i*batch_size: min((i + 1)*batch_size, len(imlist))]):
|
234 |
+
im_id = i*batch_size + im_num
|
235 |
+
objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
|
236 |
+
print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size))
|
237 |
+
print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
|
238 |
+
print("----------------------------------------------------------")
|
239 |
+
i += 1
|
240 |
+
|
241 |
+
|
242 |
+
if CUDA:
|
243 |
+
torch.cuda.synchronize()
|
244 |
+
|
245 |
+
try:
|
246 |
+
output
|
247 |
+
except NameError:
|
248 |
+
print("No detections were made")
|
249 |
+
exit()
|
250 |
+
|
251 |
+
im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())
|
252 |
+
|
253 |
+
scaling_factor = torch.min(inp_dim/im_dim_list,1)[0].view(-1,1)
|
254 |
+
|
255 |
+
|
256 |
+
output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
|
257 |
+
output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2
|
258 |
+
|
259 |
+
|
260 |
+
|
261 |
+
output[:,1:5] /= scaling_factor
|
262 |
+
|
263 |
+
for i in range(output.shape[0]):
|
264 |
+
output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
|
265 |
+
output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])
|
266 |
+
|
267 |
+
|
268 |
+
output_recast = time.time()
|
269 |
+
|
270 |
+
class_load = time.time()
|
271 |
+
|
272 |
+
colors = pkl.load(open("pallete", "rb"))
|
273 |
+
|
274 |
+
draw = time.time()
|
275 |
+
|
276 |
+
def write(x, batches, results):
|
277 |
+
c1 = tuple(x[1:3].int())
|
278 |
+
c2 = tuple(x[3:5].int())
|
279 |
+
img = results[int(x[0])]
|
280 |
+
|
281 |
+
print( 'img' , int( x[0] ) )
|
282 |
+
print( 'cls' , int( x[-1] ) )
|
283 |
+
|
284 |
+
cls = int(x[-1])
|
285 |
+
label = "{0}".format(classes[cls])
|
286 |
+
color = random.choice(colors)
|
287 |
+
cv2.rectangle(img, c1, c2,color, 1)
|
288 |
+
t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
|
289 |
+
c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
|
290 |
+
cv2.rectangle(img, c1, c2,color, -1)
|
291 |
+
cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1)
|
292 |
+
return img
|
293 |
+
|
294 |
+
|
295 |
+
list(map(lambda x: write(x, im_batches, orig_ims), output))
|
296 |
+
|
297 |
+
det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format(args.det,x.split("/")[-1]))
|
298 |
+
|
299 |
+
print('det_names ',det_names)
|
300 |
+
print('orig_ims ',orig_ims[0].shape)
|
301 |
+
print('output : ',output)
|
302 |
+
|
303 |
+
list(map(cv2.imwrite, det_names, orig_ims))
|
304 |
+
|
305 |
+
end = time.time()
|
306 |
+
|
307 |
+
print()
|
308 |
+
print("SUMMARY")
|
309 |
+
print("----------------------------------------------------------")
|
310 |
+
print("{:25s}: {}".format("Task", "Time Taken (in seconds)"))
|
311 |
+
print()
|
312 |
+
print("{:25s}: {:2.3f}".format("Reading addresses", load_batch - read_dir))
|
313 |
+
print("{:25s}: {:2.3f}".format("Loading batch", start_det_loop - load_batch))
|
314 |
+
print("{:25s}: {:2.3f}".format("Detection (" + str(len(imlist)) + " images)", output_recast - start_det_loop))
|
315 |
+
print("{:25s}: {:2.3f}".format("Output Processing", class_load - output_recast))
|
316 |
+
print("{:25s}: {:2.3f}".format("Drawing Boxes", end - draw))
|
317 |
+
print("{:25s}: {:2.3f}".format("Average time_per_img", (end - load_batch)/len(imlist)))
|
318 |
+
print("----------------------------------------------------------")
|
319 |
+
|
320 |
+
|
321 |
+
torch.cuda.empty_cache()
|
yolo/dog-cycle-car.png
ADDED
yolo/model.py
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import division
|
2 |
+
import time
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
from torch.autograd import Variable
|
6 |
+
import numpy as np
|
7 |
+
import cv2
|
8 |
+
from yolo.utils import *
|
9 |
+
import argparse
|
10 |
+
import os
|
11 |
+
import os.path as osp
|
12 |
+
from yolo.darknet import Darknet
|
13 |
+
# from preprocess import prep_image, inp_to_image
|
14 |
+
import pandas as pd
|
15 |
+
import random
|
16 |
+
import pickle as pkl
|
17 |
+
import itertools
|
18 |
+
import os
|
19 |
+
import base64
|
20 |
+
from PIL import Image
|
21 |
+
from io import BytesIO
|
22 |
+
|
23 |
+
class yolo_model():
|
24 |
+
|
25 |
+
|
26 |
+
batch_size = int(1)
|
27 |
+
confidence = float(0.5)
|
28 |
+
nms_thesh = float(0.4)
|
29 |
+
reso = 416
|
30 |
+
start = 0
|
31 |
+
|
32 |
+
CUDA = torch.cuda.is_available()
|
33 |
+
|
34 |
+
num_classes = 80
|
35 |
+
|
36 |
+
|
37 |
+
def __init__(self):
|
38 |
+
|
39 |
+
self.classes = load_classes( os.path.join( 'yolo' , 'data', 'coco.names' ) )
|
40 |
+
|
41 |
+
# self.colors = pkl.load( get_data_s3( "pallete" ) )
|
42 |
+
|
43 |
+
# Set up the neural network
|
44 |
+
|
45 |
+
self.model = Darknet( os.path.join( 'yolo' , 'yolov3-tiny.cfg' ) )
|
46 |
+
self.model.load_weights( os.path.join( 'yolo' , 'yolov3-tiny.weights' ) )
|
47 |
+
print(' [*] Model Loaded Successfuly')
|
48 |
+
|
49 |
+
# set model resolution
|
50 |
+
|
51 |
+
self.model.net_info["height"] = self.reso
|
52 |
+
self.inp_dim = int(self.model.net_info["height"])
|
53 |
+
|
54 |
+
assert self.inp_dim % 32 == 0
|
55 |
+
assert self.inp_dim > 32
|
56 |
+
|
57 |
+
# If there's a GPU availible, put the model on GPU
|
58 |
+
if self.CUDA:
|
59 |
+
self.model.cuda()
|
60 |
+
|
61 |
+
# Set the model in evaluation mode
|
62 |
+
self.model.eval()
|
63 |
+
|
64 |
+
def write( self , x , batches , results , colors=[] ):
|
65 |
+
c1 = tuple(x[1:3].int())
|
66 |
+
c2 = tuple(x[3:5].int())
|
67 |
+
img = results[int(x[0])]
|
68 |
+
|
69 |
+
print( 'img' , int( x[0] ) )
|
70 |
+
print( 'cls' , int( x[-1] ) )
|
71 |
+
|
72 |
+
cls = int(x[-1])
|
73 |
+
label = "{0}".format(self.classes[cls])
|
74 |
+
color = random.choice(colors)
|
75 |
+
cv2.rectangle(img, c1, c2,color, 1)
|
76 |
+
t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
|
77 |
+
c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
|
78 |
+
cv2.rectangle(img, c1, c2,color, -1)
|
79 |
+
cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1)
|
80 |
+
return img
|
81 |
+
|
82 |
+
def img_to_base64_str(self,img):
|
83 |
+
buffered = BytesIO()
|
84 |
+
img.save(buffered, format="PNG")
|
85 |
+
buffered.seek(0)
|
86 |
+
img_byte = buffered.getvalue()
|
87 |
+
img_str = "data:image/png;base64," + base64.b64encode(img_byte).decode()
|
88 |
+
return img_str
|
89 |
+
|
90 |
+
|
91 |
+
def predict( self , image ):
|
92 |
+
|
93 |
+
imlist = []
|
94 |
+
imlist.append( image )
|
95 |
+
|
96 |
+
batches = list( map( prep_image_org , imlist , [ self.inp_dim for x in range( len(imlist) ) ] ) )
|
97 |
+
im_batches = [x[0] for x in batches]
|
98 |
+
orig_ims = [x[1] for x in batches]
|
99 |
+
im_dim_list = [x[2] for x in batches]
|
100 |
+
|
101 |
+
print( 'im_dim_list : ' , im_dim_list )
|
102 |
+
|
103 |
+
im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
|
104 |
+
|
105 |
+
if self.CUDA:
|
106 |
+
im_dim_list = im_dim_list.cuda()
|
107 |
+
|
108 |
+
print('im_batches' , len(im_batches))
|
109 |
+
|
110 |
+
batch = im_batches[0]
|
111 |
+
|
112 |
+
if self.CUDA:
|
113 |
+
batch = batch.cuda()
|
114 |
+
|
115 |
+
|
116 |
+
#Apply offsets to the result predictions
|
117 |
+
#Tranform the predictions as described in the YOLO paper
|
118 |
+
#flatten the prediction vector
|
119 |
+
# B x (bbox cord x no. of anchors) x grid_w x grid_h --> B x bbox x (all the boxes)
|
120 |
+
# Put every proposed box as a row.
|
121 |
+
with torch.no_grad():
|
122 |
+
prediction = self.model(Variable(batch), self.CUDA)
|
123 |
+
|
124 |
+
# prediction = prediction[:,scale_indices]
|
125 |
+
|
126 |
+
|
127 |
+
#get the boxes with object confidence > threshold
|
128 |
+
#Convert the cordinates to absolute coordinates
|
129 |
+
#perform NMS on these boxes, and save the results
|
130 |
+
#I could have done NMS and saving seperately to have a better abstraction
|
131 |
+
#But both these operations require looping, hence
|
132 |
+
#clubbing these ops in one loop instead of two.
|
133 |
+
#loops are slower than vectorised operations.
|
134 |
+
|
135 |
+
prediction = write_results(prediction, self.confidence, self.num_classes, nms = True, nms_conf = self.nms_thesh)
|
136 |
+
|
137 |
+
end = time.time()
|
138 |
+
|
139 |
+
# print(end - start)
|
140 |
+
|
141 |
+
# prediction[:,0] += i*batch_size
|
142 |
+
|
143 |
+
output = prediction
|
144 |
+
|
145 |
+
# 1, 1, 1
|
146 |
+
# print( 'enumerate : ' , batch_size , len(imlist) , min( batch_size , len(imlist) ) )
|
147 |
+
|
148 |
+
for im_num, image in enumerate( imlist ):
|
149 |
+
im_id = im_num
|
150 |
+
objs = [self.classes[int(x[-1])] for x in output if int(x[0]) == im_id]
|
151 |
+
# print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - self.start)/self.batch_size))
|
152 |
+
print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
|
153 |
+
print("----------------------------------------------------------")
|
154 |
+
|
155 |
+
im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())
|
156 |
+
|
157 |
+
scaling_factor = torch.min(self.inp_dim/im_dim_list,1)[0].view(-1,1)
|
158 |
+
|
159 |
+
output[:,[1,3]] -= (self.inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
|
160 |
+
output[:,[2,4]] -= (self.inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2
|
161 |
+
|
162 |
+
output[:,1:5] /= scaling_factor
|
163 |
+
|
164 |
+
for i in range(output.shape[0]):
|
165 |
+
output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
|
166 |
+
output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])
|
167 |
+
|
168 |
+
colors = pkl.load( open( "yolo/pallete", "rb") )
|
169 |
+
|
170 |
+
list(map(lambda x: self.write( x , im_batches , orig_ims , colors=colors ) , output ) )
|
171 |
+
|
172 |
+
print('orig_ims : shape ',orig_ims[0].shape)
|
173 |
+
# print('orig_ims : ',orig_ims[0])
|
174 |
+
|
175 |
+
output_image = Image.fromarray(orig_ims[0])
|
176 |
+
|
177 |
+
img_str = self.img_to_base64_str(output_image)
|
178 |
+
|
179 |
+
# im_bytes = orig_ims[0].tobytes()
|
180 |
+
# im_b64 = base64.b64encode(im_bytes)
|
181 |
+
|
182 |
+
# im_b64 = im_b64.decode('utf-8')
|
183 |
+
|
184 |
+
# print( 'im_b64' , im_b64 )
|
185 |
+
|
186 |
+
payload = dict({ 'image' : img_str , 'objects' : objs })
|
187 |
+
|
188 |
+
return payload,output_image
|
189 |
+
|
yolo/pallete
ADDED
Binary file (908 Bytes). View file
|
|
yolo/sample.jpeg
ADDED
yolo/sample.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import os
|
3 |
+
import boto3
|
4 |
+
from io import BytesIO , StringIO
|
5 |
+
import pickle as pkl
|
6 |
+
from utils import *
|
7 |
+
|
8 |
+
def get_data_s3(filename):
|
9 |
+
|
10 |
+
ACCESS_KEY = "AKIAUKUH7S3OIVOEIRWY"
|
11 |
+
SECRET_KEY = "89dABXdWDjGGuqFOx8nGR+ueShuaKZfCc4EV4AJr"
|
12 |
+
bucket = "root-models"
|
13 |
+
|
14 |
+
s3 = boto3.client( "s3" , aws_access_key_id=ACCESS_KEY , aws_secret_access_key=SECRET_KEY )
|
15 |
+
|
16 |
+
response = s3.get_object(Bucket=bucket, Key=filename)
|
17 |
+
|
18 |
+
data = BytesIO( response["Body"].read() )
|
19 |
+
|
20 |
+
return data
|
21 |
+
|
22 |
+
|
23 |
+
def parse_cfg_url(filename='yolov3.cfg'):
|
24 |
+
|
25 |
+
data = get_data_s3(filename)
|
26 |
+
|
27 |
+
lines = data.getvalue().decode().rstrip().lstrip().split('\n') #store the lines in a list
|
28 |
+
lines = [x.rstrip().lstrip() for x in lines]
|
29 |
+
|
30 |
+
lines = [x for x in lines if len(x) > 0] #get read of the empty lines
|
31 |
+
lines = [x for x in lines if x[0] != '#']
|
32 |
+
lines = [x.rstrip().lstrip() for x in lines]
|
33 |
+
|
34 |
+
|
35 |
+
block = {}
|
36 |
+
blocks = []
|
37 |
+
|
38 |
+
for line in lines:
|
39 |
+
# print('line:' , line)
|
40 |
+
if line[0] == "[": #This marks the start of a new block
|
41 |
+
if len(block) != 0:
|
42 |
+
blocks.append(block)
|
43 |
+
block = {}
|
44 |
+
block["type"] = line[1:-1].rstrip()
|
45 |
+
else:
|
46 |
+
key,value = line.split("=")
|
47 |
+
block[key.rstrip()] = value.lstrip()
|
48 |
+
blocks.append(block)
|
49 |
+
|
50 |
+
# print('blocks : 2 ' , blocks )
|
51 |
+
|
52 |
+
return blocks
|
53 |
+
|
54 |
+
if __name__ == '__main__':
|
55 |
+
# parse_cfg('yolov3.cfg')
|
56 |
+
|
57 |
+
# parse_cfg_url('yolov3.cfg')
|
58 |
+
|
59 |
+
# colors = pkl.load( open( "pallete", "rb") )
|
60 |
+
|
61 |
+
# print(colors)
|
62 |
+
|
63 |
+
# print()
|
64 |
+
|
65 |
+
# colors = pkl.load( get_data_s3( "pallete" ) )
|
66 |
+
|
67 |
+
# print(colors)
|
68 |
+
|
69 |
+
classes = load_classes('data/coco.names')
|
70 |
+
|
71 |
+
print( classes )
|
72 |
+
|
73 |
+
print()
|
74 |
+
|
75 |
+
classes = load_classes_url('coco.names')
|
76 |
+
|
77 |
+
print( classes )
|
yolo/test.py
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import division
|
2 |
+
import time
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
from torch.autograd import Variable
|
6 |
+
import numpy as np
|
7 |
+
import cv2
|
8 |
+
from utils import *
|
9 |
+
import argparse
|
10 |
+
import os
|
11 |
+
import os.path as osp
|
12 |
+
from darknet import Darknet
|
13 |
+
# from preprocess import prep_image, inp_to_image
|
14 |
+
import pandas as pd
|
15 |
+
import random
|
16 |
+
import pickle as pkl
|
17 |
+
import itertools
|
18 |
+
import os
|
19 |
+
|
20 |
+
if __name__ == '__main__':
|
21 |
+
|
22 |
+
images = os.path.join('victoria.jpg')
|
23 |
+
|
24 |
+
batch_size = int(1)
|
25 |
+
confidence = float(0.5)
|
26 |
+
nms_thesh = float(0.4)
|
27 |
+
reso = 416
|
28 |
+
start = 0
|
29 |
+
|
30 |
+
CUDA = torch.cuda.is_available()
|
31 |
+
|
32 |
+
num_classes = 80
|
33 |
+
classes = load_classes('data/coco.names')
|
34 |
+
|
35 |
+
#Set up the neural network
|
36 |
+
|
37 |
+
model = Darknet("yolov3.cfg")
|
38 |
+
model.load_weights("yolov3.weights")
|
39 |
+
print(' [*] Model Loaded Successfuly')
|
40 |
+
|
41 |
+
# set model resolution
|
42 |
+
|
43 |
+
model.net_info["height"] = reso
|
44 |
+
inp_dim = int(model.net_info["height"])
|
45 |
+
|
46 |
+
assert inp_dim % 32 == 0
|
47 |
+
assert inp_dim > 32
|
48 |
+
|
49 |
+
# If there's a GPU availible, put the model on GPU
|
50 |
+
if CUDA:
|
51 |
+
model.cuda()
|
52 |
+
|
53 |
+
# Set the model in evaluation mode
|
54 |
+
model.eval()
|
55 |
+
|
56 |
+
imlist = []
|
57 |
+
imlist.append( osp.join(osp.realpath('.') , images) )
|
58 |
+
|
59 |
+
batches = list( map( prep_image , imlist , [ inp_dim for x in range( len(imlist) ) ] ) )
|
60 |
+
im_batches = [x[0] for x in batches]
|
61 |
+
orig_ims = [x[1] for x in batches]
|
62 |
+
im_dim_list = [x[2] for x in batches]
|
63 |
+
|
64 |
+
print( 'im_dim_list : ' , im_dim_list )
|
65 |
+
|
66 |
+
im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
|
67 |
+
|
68 |
+
print( 'im_dim_list : after' , im_dim_list )
|
69 |
+
|
70 |
+
if CUDA:
|
71 |
+
im_dim_list = im_dim_list.cuda()
|
72 |
+
|
73 |
+
print('im_batches' , len(im_batches))
|
74 |
+
|
75 |
+
batch = im_batches[0]
|
76 |
+
|
77 |
+
if CUDA:
|
78 |
+
batch = batch.cuda()
|
79 |
+
|
80 |
+
|
81 |
+
#Apply offsets to the result predictions
|
82 |
+
#Tranform the predictions as described in the YOLO paper
|
83 |
+
#flatten the prediction vector
|
84 |
+
# B x (bbox cord x no. of anchors) x grid_w x grid_h --> B x bbox x (all the boxes)
|
85 |
+
# Put every proposed box as a row.
|
86 |
+
with torch.no_grad():
|
87 |
+
prediction = model(Variable(batch), CUDA)
|
88 |
+
|
89 |
+
# prediction = prediction[:,scale_indices]
|
90 |
+
|
91 |
+
|
92 |
+
#get the boxes with object confidence > threshold
|
93 |
+
#Convert the cordinates to absolute coordinates
|
94 |
+
#perform NMS on these boxes, and save the results
|
95 |
+
#I could have done NMS and saving seperately to have a better abstraction
|
96 |
+
#But both these operations require looping, hence
|
97 |
+
#clubbing these ops in one loop instead of two.
|
98 |
+
#loops are slower than vectorised operations.
|
99 |
+
|
100 |
+
prediction = write_results(prediction, confidence, num_classes, nms = True, nms_conf = nms_thesh)
|
101 |
+
|
102 |
+
|
103 |
+
# if type(prediction) == int:
|
104 |
+
# continue
|
105 |
+
|
106 |
+
end = time.time()
|
107 |
+
|
108 |
+
# print(end - start)
|
109 |
+
|
110 |
+
# prediction[:,0] += i*batch_size
|
111 |
+
|
112 |
+
output = prediction
|
113 |
+
|
114 |
+
# 1, 1, 1
|
115 |
+
# print( 'enumerate : ' , batch_size , len(imlist) , min( batch_size , len(imlist) ) )
|
116 |
+
|
117 |
+
for im_num, image in enumerate( imlist ):
|
118 |
+
im_id = im_num
|
119 |
+
objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
|
120 |
+
print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size))
|
121 |
+
print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
|
122 |
+
print("----------------------------------------------------------")
|
123 |
+
|
124 |
+
im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())
|
125 |
+
|
126 |
+
scaling_factor = torch.min(inp_dim/im_dim_list,1)[0].view(-1,1)
|
127 |
+
|
128 |
+
output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
|
129 |
+
output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2
|
130 |
+
|
131 |
+
output[:,1:5] /= scaling_factor
|
132 |
+
|
133 |
+
for i in range(output.shape[0]):
|
134 |
+
output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
|
135 |
+
output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])
|
136 |
+
|
137 |
+
colors = pkl.load(open("pallete", "rb"))
|
138 |
+
|
139 |
+
def write(x, batches, results):
|
140 |
+
c1 = tuple(x[1:3].int())
|
141 |
+
c2 = tuple(x[3:5].int())
|
142 |
+
img = results[int(x[0])]
|
143 |
+
|
144 |
+
print( 'img' , int( x[0] ) )
|
145 |
+
print( 'cls' , int( x[-1] ) )
|
146 |
+
|
147 |
+
cls = int(x[-1])
|
148 |
+
label = "{0}".format(classes[cls])
|
149 |
+
color = random.choice(colors)
|
150 |
+
cv2.rectangle(img, c1, c2,color, 1)
|
151 |
+
t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
|
152 |
+
c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
|
153 |
+
cv2.rectangle(img, c1, c2,color, -1)
|
154 |
+
cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1)
|
155 |
+
return img
|
156 |
+
|
157 |
+
|
158 |
+
list(map(lambda x: write(x, im_batches, orig_ims), output))
|
159 |
+
|
160 |
+
det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format('det',x.split("/")[-1]))
|
161 |
+
|
162 |
+
print('det_names ',det_names)
|
163 |
+
print('orig_ims ',orig_ims[0].shape)
|
164 |
+
print('output : ',output)
|
165 |
+
|
166 |
+
list(map(cv2.imwrite, det_names, orig_ims))
|
yolo/utils.py
ADDED
@@ -0,0 +1,324 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import division
|
2 |
+
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
import torch.nn.functional as F
|
6 |
+
from torch.autograd import Variable
|
7 |
+
import numpy as np
|
8 |
+
import cv2
|
9 |
+
import boto3
|
10 |
+
from io import BytesIO
|
11 |
+
|
12 |
+
def get_data_s3(filename):
|
13 |
+
|
14 |
+
ACCESS_KEY = "AKIAUKUH7S3OIVOEIRWY"
|
15 |
+
SECRET_KEY = "89dABXdWDjGGuqFOx8nGR+ueShuaKZfCc4EV4AJr"
|
16 |
+
bucket = "root-models"
|
17 |
+
|
18 |
+
s3 = boto3.client( "s3" , aws_access_key_id=ACCESS_KEY , aws_secret_access_key=SECRET_KEY )
|
19 |
+
|
20 |
+
response = s3.get_object(Bucket=bucket, Key=filename)
|
21 |
+
|
22 |
+
data = BytesIO( response["Body"].read() )
|
23 |
+
|
24 |
+
return data
|
25 |
+
|
26 |
+
def parse_cfg_url(filename='yolov3.cfg'):
|
27 |
+
|
28 |
+
data = get_data_s3(filename)
|
29 |
+
|
30 |
+
lines = data.getvalue().decode().rstrip().lstrip().split('\n') #store the lines in a list
|
31 |
+
lines = [x.rstrip().lstrip() for x in lines]
|
32 |
+
|
33 |
+
lines = [x for x in lines if len(x) > 0] #get read of the empty lines
|
34 |
+
lines = [x for x in lines if x[0] != '#']
|
35 |
+
lines = [x.rstrip().lstrip() for x in lines]
|
36 |
+
|
37 |
+
|
38 |
+
block = {}
|
39 |
+
blocks = []
|
40 |
+
|
41 |
+
for line in lines:
|
42 |
+
# print('line:' , line)
|
43 |
+
if line[0] == "[": #This marks the start of a new block
|
44 |
+
if len(block) != 0:
|
45 |
+
blocks.append(block)
|
46 |
+
block = {}
|
47 |
+
block["type"] = line[1:-1].rstrip()
|
48 |
+
else:
|
49 |
+
key,value = line.split("=")
|
50 |
+
block[key.rstrip()] = value.lstrip()
|
51 |
+
blocks.append(block)
|
52 |
+
|
53 |
+
# print('blocks : 2 ' , blocks )
|
54 |
+
|
55 |
+
return blocks
|
56 |
+
|
57 |
+
|
58 |
+
|
59 |
+
def predict_transform(prediction, inp_dim, anchors, num_classes, CUDA = True):
|
60 |
+
batch_size = prediction.size(0)
|
61 |
+
stride = inp_dim // prediction.size(2)
|
62 |
+
grid_size = inp_dim // stride
|
63 |
+
bbox_attrs = 5 + num_classes
|
64 |
+
num_anchors = len(anchors)
|
65 |
+
|
66 |
+
anchors = [(a[0]/stride, a[1]/stride) for a in anchors]
|
67 |
+
|
68 |
+
|
69 |
+
|
70 |
+
prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
|
71 |
+
prediction = prediction.transpose(1,2).contiguous()
|
72 |
+
prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
|
73 |
+
|
74 |
+
|
75 |
+
#Sigmoid the centre_X, centre_Y. and object confidencce
|
76 |
+
prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
|
77 |
+
prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
|
78 |
+
prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])
|
79 |
+
|
80 |
+
|
81 |
+
|
82 |
+
#Add the center offsets
|
83 |
+
grid_len = np.arange(grid_size)
|
84 |
+
a,b = np.meshgrid(grid_len, grid_len)
|
85 |
+
|
86 |
+
x_offset = torch.FloatTensor(a).view(-1,1)
|
87 |
+
y_offset = torch.FloatTensor(b).view(-1,1)
|
88 |
+
|
89 |
+
if CUDA:
|
90 |
+
x_offset = x_offset.cuda()
|
91 |
+
y_offset = y_offset.cuda()
|
92 |
+
|
93 |
+
x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
|
94 |
+
|
95 |
+
prediction[:,:,:2] += x_y_offset
|
96 |
+
|
97 |
+
#log space transform height and the width
|
98 |
+
anchors = torch.FloatTensor(anchors)
|
99 |
+
|
100 |
+
if CUDA:
|
101 |
+
anchors = anchors.cuda()
|
102 |
+
|
103 |
+
anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
|
104 |
+
prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors
|
105 |
+
|
106 |
+
#Softmax the class scores
|
107 |
+
prediction[:,:,5: 5 + num_classes] = torch.sigmoid((prediction[:,:, 5 : 5 + num_classes]))
|
108 |
+
|
109 |
+
prediction[:,:,:4] *= stride
|
110 |
+
|
111 |
+
|
112 |
+
return prediction
|
113 |
+
|
114 |
+
def write_results(prediction, confidence, num_classes, nms = True, nms_conf = 0.4):
|
115 |
+
conf_mask = (prediction[:,:,4] > confidence).float().unsqueeze(2)
|
116 |
+
prediction = prediction*conf_mask
|
117 |
+
|
118 |
+
|
119 |
+
try:
|
120 |
+
ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()
|
121 |
+
except:
|
122 |
+
return 0
|
123 |
+
|
124 |
+
|
125 |
+
box_a = prediction.new(prediction.shape)
|
126 |
+
box_a[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2)
|
127 |
+
box_a[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2)
|
128 |
+
box_a[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2)
|
129 |
+
box_a[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2)
|
130 |
+
prediction[:,:,:4] = box_a[:,:,:4]
|
131 |
+
|
132 |
+
|
133 |
+
|
134 |
+
batch_size = prediction.size(0)
|
135 |
+
|
136 |
+
output = prediction.new(1, prediction.size(2) + 1)
|
137 |
+
write = False
|
138 |
+
|
139 |
+
|
140 |
+
for ind in range(batch_size):
|
141 |
+
#select the image from the batch
|
142 |
+
image_pred = prediction[ind]
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
#Get the class having maximum score, and the index of that class
|
147 |
+
#Get rid of num_classes softmax scores
|
148 |
+
#Add the class index and the class score of class having maximum score
|
149 |
+
max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1)
|
150 |
+
max_conf = max_conf.float().unsqueeze(1)
|
151 |
+
max_conf_score = max_conf_score.float().unsqueeze(1)
|
152 |
+
seq = (image_pred[:,:5], max_conf, max_conf_score)
|
153 |
+
image_pred = torch.cat(seq, 1)
|
154 |
+
|
155 |
+
|
156 |
+
|
157 |
+
#Get rid of the zero entries
|
158 |
+
non_zero_ind = (torch.nonzero(image_pred[:,4]))
|
159 |
+
|
160 |
+
|
161 |
+
image_pred_ = image_pred[non_zero_ind.squeeze(),:].view(-1,7)
|
162 |
+
|
163 |
+
#Get the various classes detected in the image
|
164 |
+
try:
|
165 |
+
img_classes = unique(image_pred_[:,-1])
|
166 |
+
except:
|
167 |
+
continue
|
168 |
+
#WE will do NMS classwise
|
169 |
+
for cls in img_classes:
|
170 |
+
#get the detections with one particular class
|
171 |
+
cls_mask = image_pred_*(image_pred_[:,-1] == cls).float().unsqueeze(1)
|
172 |
+
class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze()
|
173 |
+
|
174 |
+
|
175 |
+
image_pred_class = image_pred_[class_mask_ind].view(-1,7)
|
176 |
+
|
177 |
+
|
178 |
+
|
179 |
+
#sort the detections such that the entry with the maximum objectness
|
180 |
+
#confidence is at the top
|
181 |
+
conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1]
|
182 |
+
image_pred_class = image_pred_class[conf_sort_index]
|
183 |
+
idx = image_pred_class.size(0)
|
184 |
+
|
185 |
+
#if nms has to be done
|
186 |
+
if nms:
|
187 |
+
#For each detection
|
188 |
+
for i in range(idx):
|
189 |
+
#Get the IOUs of all boxes that come after the one we are looking at
|
190 |
+
#in the loop
|
191 |
+
try:
|
192 |
+
ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])
|
193 |
+
except ValueError:
|
194 |
+
break
|
195 |
+
|
196 |
+
except IndexError:
|
197 |
+
break
|
198 |
+
|
199 |
+
#Zero out all the detections that have IoU > treshhold
|
200 |
+
iou_mask = (ious < nms_conf).float().unsqueeze(1)
|
201 |
+
image_pred_class[i+1:] *= iou_mask
|
202 |
+
|
203 |
+
#Remove the non-zero entries
|
204 |
+
non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
|
205 |
+
image_pred_class = image_pred_class[non_zero_ind].view(-1,7)
|
206 |
+
|
207 |
+
|
208 |
+
|
209 |
+
#Concatenate the batch_id of the image to the detection
|
210 |
+
#this helps us identify which image does the detection correspond to
|
211 |
+
#We use a linear straucture to hold ALL the detections from the batch
|
212 |
+
#the batch_dim is flattened
|
213 |
+
#batch is identified by extra batch column
|
214 |
+
|
215 |
+
|
216 |
+
batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)
|
217 |
+
seq = batch_ind, image_pred_class
|
218 |
+
if not write:
|
219 |
+
output = torch.cat(seq,1)
|
220 |
+
write = True
|
221 |
+
else:
|
222 |
+
out = torch.cat(seq,1)
|
223 |
+
output = torch.cat((output,out))
|
224 |
+
|
225 |
+
try:
|
226 |
+
return output
|
227 |
+
except:
|
228 |
+
return 0
|
229 |
+
|
230 |
+
def unique(tensor):
|
231 |
+
tensor_np = tensor.cpu().numpy()
|
232 |
+
unique_np = np.unique(tensor_np)
|
233 |
+
unique_tensor = torch.from_numpy(unique_np)
|
234 |
+
|
235 |
+
tensor_res = tensor.new(unique_tensor.shape)
|
236 |
+
tensor_res.copy_(unique_tensor)
|
237 |
+
return tensor_res
|
238 |
+
|
239 |
+
def load_classes_url(namesfile):
|
240 |
+
fp = get_data_s3(namesfile)
|
241 |
+
names = fp.getvalue().decode().split("\n")[:-1]
|
242 |
+
return names
|
243 |
+
|
244 |
+
|
245 |
+
def load_classes(namesfile):
|
246 |
+
fp = open(namesfile, "r")
|
247 |
+
names = fp.read().split("\n")[:-1]
|
248 |
+
return names
|
249 |
+
|
250 |
+
def bbox_iou(box1, box2):
|
251 |
+
"""
|
252 |
+
Returns the IoU of two bounding boxes
|
253 |
+
|
254 |
+
|
255 |
+
"""
|
256 |
+
#Get the coordinates of bounding boxes
|
257 |
+
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3]
|
258 |
+
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3]
|
259 |
+
|
260 |
+
#get the corrdinates of the intersection rectangle
|
261 |
+
inter_rect_x1 = torch.max(b1_x1, b2_x1)
|
262 |
+
inter_rect_y1 = torch.max(b1_y1, b2_y1)
|
263 |
+
inter_rect_x2 = torch.min(b1_x2, b2_x2)
|
264 |
+
inter_rect_y2 = torch.min(b1_y2, b2_y2)
|
265 |
+
|
266 |
+
#Intersection area
|
267 |
+
if torch.cuda.is_available():
|
268 |
+
inter_area = torch.max(inter_rect_x2 - inter_rect_x1 + 1,torch.zeros(inter_rect_x2.shape).cuda())*torch.max(inter_rect_y2 - inter_rect_y1 + 1, torch.zeros(inter_rect_x2.shape).cuda())
|
269 |
+
else:
|
270 |
+
inter_area = torch.max(inter_rect_x2 - inter_rect_x1 + 1,torch.zeros(inter_rect_x2.shape))*torch.max(inter_rect_y2 - inter_rect_y1 + 1, torch.zeros(inter_rect_x2.shape))
|
271 |
+
|
272 |
+
#Union Area
|
273 |
+
b1_area = (b1_x2 - b1_x1 + 1)*(b1_y2 - b1_y1 + 1)
|
274 |
+
b2_area = (b2_x2 - b2_x1 + 1)*(b2_y2 - b2_y1 + 1)
|
275 |
+
|
276 |
+
iou = inter_area / (b1_area + b2_area - inter_area)
|
277 |
+
|
278 |
+
return iou
|
279 |
+
|
280 |
+
def letterbox_image(img, inp_dim):
|
281 |
+
'''resize image with unchanged aspect ratio using padding'''
|
282 |
+
img_w, img_h = img.shape[1], img.shape[0]
|
283 |
+
w, h = inp_dim
|
284 |
+
new_w = int(img_w * min(w/img_w, h/img_h))
|
285 |
+
new_h = int(img_h * min(w/img_w, h/img_h))
|
286 |
+
resized_image = cv2.resize(img, (new_w,new_h), interpolation = cv2.INTER_CUBIC)
|
287 |
+
|
288 |
+
canvas = np.full((inp_dim[1], inp_dim[0], 3), 128)
|
289 |
+
|
290 |
+
canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w, :] = resized_image
|
291 |
+
|
292 |
+
return canvas
|
293 |
+
|
294 |
+
|
295 |
+
def prep_image_org(orig_im, inp_dim):
|
296 |
+
"""
|
297 |
+
Prepare image for inputting to the neural network.
|
298 |
+
|
299 |
+
Returns a Variable
|
300 |
+
"""
|
301 |
+
|
302 |
+
# orig_im = cv2.imread(img)
|
303 |
+
dim = orig_im.shape[1], orig_im.shape[0]
|
304 |
+
img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
|
305 |
+
img_ = img[:,:,::-1].transpose((2,0,1)).copy()
|
306 |
+
img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
|
307 |
+
return img_, orig_im, dim
|
308 |
+
|
309 |
+
|
310 |
+
|
311 |
+
def prep_image(img, inp_dim):
|
312 |
+
"""
|
313 |
+
Prepare image for inputting to the neural network.
|
314 |
+
|
315 |
+
Returns a Variable
|
316 |
+
"""
|
317 |
+
|
318 |
+
orig_im = cv2.imread(img)
|
319 |
+
dim = orig_im.shape[1], orig_im.shape[0]
|
320 |
+
img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
|
321 |
+
img_ = img[:,:,::-1].transpose((2,0,1)).copy()
|
322 |
+
img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
|
323 |
+
return img_, orig_im, dim
|
324 |
+
|
yolo/victoria.jpg
ADDED
yolo/yolov3-tiny.cfg
ADDED
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[net]
|
2 |
+
# Testing
|
3 |
+
batch=1
|
4 |
+
subdivisions=1
|
5 |
+
# Training
|
6 |
+
# batch=64
|
7 |
+
# subdivisions=2
|
8 |
+
width=416
|
9 |
+
height=416
|
10 |
+
channels=3
|
11 |
+
momentum=0.9
|
12 |
+
decay=0.0005
|
13 |
+
angle=0
|
14 |
+
saturation = 1.5
|
15 |
+
exposure = 1.5
|
16 |
+
hue=.1
|
17 |
+
|
18 |
+
learning_rate=0.001
|
19 |
+
burn_in=1000
|
20 |
+
max_batches = 500200
|
21 |
+
policy=steps
|
22 |
+
steps=400000,450000
|
23 |
+
scales=.1,.1
|
24 |
+
|
25 |
+
[convolutional]
|
26 |
+
batch_normalize=1
|
27 |
+
filters=16
|
28 |
+
size=3
|
29 |
+
stride=1
|
30 |
+
pad=1
|
31 |
+
activation=leaky
|
32 |
+
|
33 |
+
[maxpool]
|
34 |
+
size=2
|
35 |
+
stride=2
|
36 |
+
|
37 |
+
[convolutional]
|
38 |
+
batch_normalize=1
|
39 |
+
filters=32
|
40 |
+
size=3
|
41 |
+
stride=1
|
42 |
+
pad=1
|
43 |
+
activation=leaky
|
44 |
+
|
45 |
+
[maxpool]
|
46 |
+
size=2
|
47 |
+
stride=2
|
48 |
+
|
49 |
+
[convolutional]
|
50 |
+
batch_normalize=1
|
51 |
+
filters=64
|
52 |
+
size=3
|
53 |
+
stride=1
|
54 |
+
pad=1
|
55 |
+
activation=leaky
|
56 |
+
|
57 |
+
[maxpool]
|
58 |
+
size=2
|
59 |
+
stride=2
|
60 |
+
|
61 |
+
[convolutional]
|
62 |
+
batch_normalize=1
|
63 |
+
filters=128
|
64 |
+
size=3
|
65 |
+
stride=1
|
66 |
+
pad=1
|
67 |
+
activation=leaky
|
68 |
+
|
69 |
+
[maxpool]
|
70 |
+
size=2
|
71 |
+
stride=2
|
72 |
+
|
73 |
+
[convolutional]
|
74 |
+
batch_normalize=1
|
75 |
+
filters=256
|
76 |
+
size=3
|
77 |
+
stride=1
|
78 |
+
pad=1
|
79 |
+
activation=leaky
|
80 |
+
|
81 |
+
[maxpool]
|
82 |
+
size=2
|
83 |
+
stride=2
|
84 |
+
|
85 |
+
[convolutional]
|
86 |
+
batch_normalize=1
|
87 |
+
filters=512
|
88 |
+
size=3
|
89 |
+
stride=1
|
90 |
+
pad=1
|
91 |
+
activation=leaky
|
92 |
+
|
93 |
+
[maxpool]
|
94 |
+
size=2
|
95 |
+
stride=1
|
96 |
+
|
97 |
+
[convolutional]
|
98 |
+
batch_normalize=1
|
99 |
+
filters=1024
|
100 |
+
size=3
|
101 |
+
stride=1
|
102 |
+
pad=1
|
103 |
+
activation=leaky
|
104 |
+
|
105 |
+
###########
|
106 |
+
|
107 |
+
[convolutional]
|
108 |
+
batch_normalize=1
|
109 |
+
filters=256
|
110 |
+
size=1
|
111 |
+
stride=1
|
112 |
+
pad=1
|
113 |
+
activation=leaky
|
114 |
+
|
115 |
+
[convolutional]
|
116 |
+
batch_normalize=1
|
117 |
+
filters=512
|
118 |
+
size=3
|
119 |
+
stride=1
|
120 |
+
pad=1
|
121 |
+
activation=leaky
|
122 |
+
|
123 |
+
[convolutional]
|
124 |
+
size=1
|
125 |
+
stride=1
|
126 |
+
pad=1
|
127 |
+
filters=255
|
128 |
+
activation=linear
|
129 |
+
|
130 |
+
|
131 |
+
|
132 |
+
[yolo]
|
133 |
+
mask = 3,4,5
|
134 |
+
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
|
135 |
+
classes=80
|
136 |
+
num=6
|
137 |
+
jitter=.3
|
138 |
+
ignore_thresh = .7
|
139 |
+
truth_thresh = 1
|
140 |
+
random=1
|
141 |
+
|
142 |
+
[route]
|
143 |
+
layers = -4
|
144 |
+
|
145 |
+
[convolutional]
|
146 |
+
batch_normalize=1
|
147 |
+
filters=128
|
148 |
+
size=1
|
149 |
+
stride=1
|
150 |
+
pad=1
|
151 |
+
activation=leaky
|
152 |
+
|
153 |
+
[upsample]
|
154 |
+
stride=2
|
155 |
+
|
156 |
+
[route]
|
157 |
+
layers = -1, 8
|
158 |
+
|
159 |
+
[convolutional]
|
160 |
+
batch_normalize=1
|
161 |
+
filters=256
|
162 |
+
size=3
|
163 |
+
stride=1
|
164 |
+
pad=1
|
165 |
+
activation=leaky
|
166 |
+
|
167 |
+
[convolutional]
|
168 |
+
size=1
|
169 |
+
stride=1
|
170 |
+
pad=1
|
171 |
+
filters=255
|
172 |
+
activation=linear
|
173 |
+
|
174 |
+
[yolo]
|
175 |
+
mask = 0,1,2
|
176 |
+
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
|
177 |
+
classes=80
|
178 |
+
num=6
|
179 |
+
jitter=.3
|
180 |
+
ignore_thresh = .7
|
181 |
+
truth_thresh = 1
|
182 |
+
random=1
|
yolo/yolov3-tiny.weights
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dccea06f59b781ec1234ddf8d1e94b9519a97f4245748a7d4db75d5b7080a42c
|
3 |
+
size 35434956
|
yolo/yolov3.cfg
ADDED
@@ -0,0 +1,788 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[net]
|
2 |
+
# Testing
|
3 |
+
# batch=1
|
4 |
+
# subdivisions=1
|
5 |
+
# Training
|
6 |
+
batch=64
|
7 |
+
subdivisions=16
|
8 |
+
width=624
|
9 |
+
height=624
|
10 |
+
channels=3
|
11 |
+
momentum=0.9
|
12 |
+
decay=0.0005
|
13 |
+
angle=0
|
14 |
+
saturation = 1.5
|
15 |
+
exposure = 1.5
|
16 |
+
hue=.1
|
17 |
+
|
18 |
+
learning_rate=0.001
|
19 |
+
burn_in=1000
|
20 |
+
max_batches = 500200
|
21 |
+
policy=steps
|
22 |
+
steps=400000,450000
|
23 |
+
scales=.1,.1
|
24 |
+
|
25 |
+
[convolutional]
|
26 |
+
batch_normalize=1
|
27 |
+
filters=32
|
28 |
+
size=3
|
29 |
+
stride=1
|
30 |
+
pad=1
|
31 |
+
activation=leaky
|
32 |
+
|
33 |
+
# Downsample
|
34 |
+
|
35 |
+
[convolutional]
|
36 |
+
batch_normalize=1
|
37 |
+
filters=64
|
38 |
+
size=3
|
39 |
+
stride=2
|
40 |
+
pad=1
|
41 |
+
activation=leaky
|
42 |
+
|
43 |
+
[convolutional]
|
44 |
+
batch_normalize=1
|
45 |
+
filters=32
|
46 |
+
size=1
|
47 |
+
stride=1
|
48 |
+
pad=1
|
49 |
+
activation=leaky
|
50 |
+
|
51 |
+
[convolutional]
|
52 |
+
batch_normalize=1
|
53 |
+
filters=64
|
54 |
+
size=3
|
55 |
+
stride=1
|
56 |
+
pad=1
|
57 |
+
activation=leaky
|
58 |
+
|
59 |
+
[shortcut]
|
60 |
+
from=-3
|
61 |
+
activation=linear
|
62 |
+
|
63 |
+
# Downsample
|
64 |
+
|
65 |
+
[convolutional]
|
66 |
+
batch_normalize=1
|
67 |
+
filters=128
|
68 |
+
size=3
|
69 |
+
stride=2
|
70 |
+
pad=1
|
71 |
+
activation=leaky
|
72 |
+
|
73 |
+
[convolutional]
|
74 |
+
batch_normalize=1
|
75 |
+
filters=64
|
76 |
+
size=1
|
77 |
+
stride=1
|
78 |
+
pad=1
|
79 |
+
activation=leaky
|
80 |
+
|
81 |
+
[convolutional]
|
82 |
+
batch_normalize=1
|
83 |
+
filters=128
|
84 |
+
size=3
|
85 |
+
stride=1
|
86 |
+
pad=1
|
87 |
+
activation=leaky
|
88 |
+
|
89 |
+
[shortcut]
|
90 |
+
from=-3
|
91 |
+
activation=linear
|
92 |
+
|
93 |
+
[convolutional]
|
94 |
+
batch_normalize=1
|
95 |
+
filters=64
|
96 |
+
size=1
|
97 |
+
stride=1
|
98 |
+
pad=1
|
99 |
+
activation=leaky
|
100 |
+
|
101 |
+
[convolutional]
|
102 |
+
batch_normalize=1
|
103 |
+
filters=128
|
104 |
+
size=3
|
105 |
+
stride=1
|
106 |
+
pad=1
|
107 |
+
activation=leaky
|
108 |
+
|
109 |
+
[shortcut]
|
110 |
+
from=-3
|
111 |
+
activation=linear
|
112 |
+
|
113 |
+
# Downsample
|
114 |
+
|
115 |
+
[convolutional]
|
116 |
+
batch_normalize=1
|
117 |
+
filters=256
|
118 |
+
size=3
|
119 |
+
stride=2
|
120 |
+
pad=1
|
121 |
+
activation=leaky
|
122 |
+
|
123 |
+
[convolutional]
|
124 |
+
batch_normalize=1
|
125 |
+
filters=128
|
126 |
+
size=1
|
127 |
+
stride=1
|
128 |
+
pad=1
|
129 |
+
activation=leaky
|
130 |
+
|
131 |
+
[convolutional]
|
132 |
+
batch_normalize=1
|
133 |
+
filters=256
|
134 |
+
size=3
|
135 |
+
stride=1
|
136 |
+
pad=1
|
137 |
+
activation=leaky
|
138 |
+
|
139 |
+
[shortcut]
|
140 |
+
from=-3
|
141 |
+
activation=linear
|
142 |
+
|
143 |
+
[convolutional]
|
144 |
+
batch_normalize=1
|
145 |
+
filters=128
|
146 |
+
size=1
|
147 |
+
stride=1
|
148 |
+
pad=1
|
149 |
+
activation=leaky
|
150 |
+
|
151 |
+
[convolutional]
|
152 |
+
batch_normalize=1
|
153 |
+
filters=256
|
154 |
+
size=3
|
155 |
+
stride=1
|
156 |
+
pad=1
|
157 |
+
activation=leaky
|
158 |
+
|
159 |
+
[shortcut]
|
160 |
+
from=-3
|
161 |
+
activation=linear
|
162 |
+
|
163 |
+
[convolutional]
|
164 |
+
batch_normalize=1
|
165 |
+
filters=128
|
166 |
+
size=1
|
167 |
+
stride=1
|
168 |
+
pad=1
|
169 |
+
activation=leaky
|
170 |
+
|
171 |
+
[convolutional]
|
172 |
+
batch_normalize=1
|
173 |
+
filters=256
|
174 |
+
size=3
|
175 |
+
stride=1
|
176 |
+
pad=1
|
177 |
+
activation=leaky
|
178 |
+
|
179 |
+
[shortcut]
|
180 |
+
from=-3
|
181 |
+
activation=linear
|
182 |
+
|
183 |
+
[convolutional]
|
184 |
+
batch_normalize=1
|
185 |
+
filters=128
|
186 |
+
size=1
|
187 |
+
stride=1
|
188 |
+
pad=1
|
189 |
+
activation=leaky
|
190 |
+
|
191 |
+
[convolutional]
|
192 |
+
batch_normalize=1
|
193 |
+
filters=256
|
194 |
+
size=3
|
195 |
+
stride=1
|
196 |
+
pad=1
|
197 |
+
activation=leaky
|
198 |
+
|
199 |
+
[shortcut]
|
200 |
+
from=-3
|
201 |
+
activation=linear
|
202 |
+
|
203 |
+
|
204 |
+
[convolutional]
|
205 |
+
batch_normalize=1
|
206 |
+
filters=128
|
207 |
+
size=1
|
208 |
+
stride=1
|
209 |
+
pad=1
|
210 |
+
activation=leaky
|
211 |
+
|
212 |
+
[convolutional]
|
213 |
+
batch_normalize=1
|
214 |
+
filters=256
|
215 |
+
size=3
|
216 |
+
stride=1
|
217 |
+
pad=1
|
218 |
+
activation=leaky
|
219 |
+
|
220 |
+
[shortcut]
|
221 |
+
from=-3
|
222 |
+
activation=linear
|
223 |
+
|
224 |
+
[convolutional]
|
225 |
+
batch_normalize=1
|
226 |
+
filters=128
|
227 |
+
size=1
|
228 |
+
stride=1
|
229 |
+
pad=1
|
230 |
+
activation=leaky
|
231 |
+
|
232 |
+
[convolutional]
|
233 |
+
batch_normalize=1
|
234 |
+
filters=256
|
235 |
+
size=3
|
236 |
+
stride=1
|
237 |
+
pad=1
|
238 |
+
activation=leaky
|
239 |
+
|
240 |
+
[shortcut]
|
241 |
+
from=-3
|
242 |
+
activation=linear
|
243 |
+
|
244 |
+
[convolutional]
|
245 |
+
batch_normalize=1
|
246 |
+
filters=128
|
247 |
+
size=1
|
248 |
+
stride=1
|
249 |
+
pad=1
|
250 |
+
activation=leaky
|
251 |
+
|
252 |
+
[convolutional]
|
253 |
+
batch_normalize=1
|
254 |
+
filters=256
|
255 |
+
size=3
|
256 |
+
stride=1
|
257 |
+
pad=1
|
258 |
+
activation=leaky
|
259 |
+
|
260 |
+
[shortcut]
|
261 |
+
from=-3
|
262 |
+
activation=linear
|
263 |
+
|
264 |
+
[convolutional]
|
265 |
+
batch_normalize=1
|
266 |
+
filters=128
|
267 |
+
size=1
|
268 |
+
stride=1
|
269 |
+
pad=1
|
270 |
+
activation=leaky
|
271 |
+
|
272 |
+
[convolutional]
|
273 |
+
batch_normalize=1
|
274 |
+
filters=256
|
275 |
+
size=3
|
276 |
+
stride=1
|
277 |
+
pad=1
|
278 |
+
activation=leaky
|
279 |
+
|
280 |
+
[shortcut]
|
281 |
+
from=-3
|
282 |
+
activation=linear
|
283 |
+
|
284 |
+
# Downsample
|
285 |
+
|
286 |
+
[convolutional]
|
287 |
+
batch_normalize=1
|
288 |
+
filters=512
|
289 |
+
size=3
|
290 |
+
stride=2
|
291 |
+
pad=1
|
292 |
+
activation=leaky
|
293 |
+
|
294 |
+
[convolutional]
|
295 |
+
batch_normalize=1
|
296 |
+
filters=256
|
297 |
+
size=1
|
298 |
+
stride=1
|
299 |
+
pad=1
|
300 |
+
activation=leaky
|
301 |
+
|
302 |
+
[convolutional]
|
303 |
+
batch_normalize=1
|
304 |
+
filters=512
|
305 |
+
size=3
|
306 |
+
stride=1
|
307 |
+
pad=1
|
308 |
+
activation=leaky
|
309 |
+
|
310 |
+
[shortcut]
|
311 |
+
from=-3
|
312 |
+
activation=linear
|
313 |
+
|
314 |
+
|
315 |
+
[convolutional]
|
316 |
+
batch_normalize=1
|
317 |
+
filters=256
|
318 |
+
size=1
|
319 |
+
stride=1
|
320 |
+
pad=1
|
321 |
+
activation=leaky
|
322 |
+
|
323 |
+
[convolutional]
|
324 |
+
batch_normalize=1
|
325 |
+
filters=512
|
326 |
+
size=3
|
327 |
+
stride=1
|
328 |
+
pad=1
|
329 |
+
activation=leaky
|
330 |
+
|
331 |
+
[shortcut]
|
332 |
+
from=-3
|
333 |
+
activation=linear
|
334 |
+
|
335 |
+
|
336 |
+
[convolutional]
|
337 |
+
batch_normalize=1
|
338 |
+
filters=256
|
339 |
+
size=1
|
340 |
+
stride=1
|
341 |
+
pad=1
|
342 |
+
activation=leaky
|
343 |
+
|
344 |
+
[convolutional]
|
345 |
+
batch_normalize=1
|
346 |
+
filters=512
|
347 |
+
size=3
|
348 |
+
stride=1
|
349 |
+
pad=1
|
350 |
+
activation=leaky
|
351 |
+
|
352 |
+
[shortcut]
|
353 |
+
from=-3
|
354 |
+
activation=linear
|
355 |
+
|
356 |
+
|
357 |
+
[convolutional]
|
358 |
+
batch_normalize=1
|
359 |
+
filters=256
|
360 |
+
size=1
|
361 |
+
stride=1
|
362 |
+
pad=1
|
363 |
+
activation=leaky
|
364 |
+
|
365 |
+
[convolutional]
|
366 |
+
batch_normalize=1
|
367 |
+
filters=512
|
368 |
+
size=3
|
369 |
+
stride=1
|
370 |
+
pad=1
|
371 |
+
activation=leaky
|
372 |
+
|
373 |
+
[shortcut]
|
374 |
+
from=-3
|
375 |
+
activation=linear
|
376 |
+
|
377 |
+
[convolutional]
|
378 |
+
batch_normalize=1
|
379 |
+
filters=256
|
380 |
+
size=1
|
381 |
+
stride=1
|
382 |
+
pad=1
|
383 |
+
activation=leaky
|
384 |
+
|
385 |
+
[convolutional]
|
386 |
+
batch_normalize=1
|
387 |
+
filters=512
|
388 |
+
size=3
|
389 |
+
stride=1
|
390 |
+
pad=1
|
391 |
+
activation=leaky
|
392 |
+
|
393 |
+
[shortcut]
|
394 |
+
from=-3
|
395 |
+
activation=linear
|
396 |
+
|
397 |
+
|
398 |
+
[convolutional]
|
399 |
+
batch_normalize=1
|
400 |
+
filters=256
|
401 |
+
size=1
|
402 |
+
stride=1
|
403 |
+
pad=1
|
404 |
+
activation=leaky
|
405 |
+
|
406 |
+
[convolutional]
|
407 |
+
batch_normalize=1
|
408 |
+
filters=512
|
409 |
+
size=3
|
410 |
+
stride=1
|
411 |
+
pad=1
|
412 |
+
activation=leaky
|
413 |
+
|
414 |
+
[shortcut]
|
415 |
+
from=-3
|
416 |
+
activation=linear
|
417 |
+
|
418 |
+
|
419 |
+
[convolutional]
|
420 |
+
batch_normalize=1
|
421 |
+
filters=256
|
422 |
+
size=1
|
423 |
+
stride=1
|
424 |
+
pad=1
|
425 |
+
activation=leaky
|
426 |
+
|
427 |
+
[convolutional]
|
428 |
+
batch_normalize=1
|
429 |
+
filters=512
|
430 |
+
size=3
|
431 |
+
stride=1
|
432 |
+
pad=1
|
433 |
+
activation=leaky
|
434 |
+
|
435 |
+
[shortcut]
|
436 |
+
from=-3
|
437 |
+
activation=linear
|
438 |
+
|
439 |
+
[convolutional]
|
440 |
+
batch_normalize=1
|
441 |
+
filters=256
|
442 |
+
size=1
|
443 |
+
stride=1
|
444 |
+
pad=1
|
445 |
+
activation=leaky
|
446 |
+
|
447 |
+
[convolutional]
|
448 |
+
batch_normalize=1
|
449 |
+
filters=512
|
450 |
+
size=3
|
451 |
+
stride=1
|
452 |
+
pad=1
|
453 |
+
activation=leaky
|
454 |
+
|
455 |
+
[shortcut]
|
456 |
+
from=-3
|
457 |
+
activation=linear
|
458 |
+
|
459 |
+
# Downsample
|
460 |
+
|
461 |
+
[convolutional]
|
462 |
+
batch_normalize=1
|
463 |
+
filters=1024
|
464 |
+
size=3
|
465 |
+
stride=2
|
466 |
+
pad=1
|
467 |
+
activation=leaky
|
468 |
+
|
469 |
+
[convolutional]
|
470 |
+
batch_normalize=1
|
471 |
+
filters=512
|
472 |
+
size=1
|
473 |
+
stride=1
|
474 |
+
pad=1
|
475 |
+
activation=leaky
|
476 |
+
|
477 |
+
[convolutional]
|
478 |
+
batch_normalize=1
|
479 |
+
filters=1024
|
480 |
+
size=3
|
481 |
+
stride=1
|
482 |
+
pad=1
|
483 |
+
activation=leaky
|
484 |
+
|
485 |
+
[shortcut]
|
486 |
+
from=-3
|
487 |
+
activation=linear
|
488 |
+
|
489 |
+
[convolutional]
|
490 |
+
batch_normalize=1
|
491 |
+
filters=512
|
492 |
+
size=1
|
493 |
+
stride=1
|
494 |
+
pad=1
|
495 |
+
activation=leaky
|
496 |
+
|
497 |
+
[convolutional]
|
498 |
+
batch_normalize=1
|
499 |
+
filters=1024
|
500 |
+
size=3
|
501 |
+
stride=1
|
502 |
+
pad=1
|
503 |
+
activation=leaky
|
504 |
+
|
505 |
+
[shortcut]
|
506 |
+
from=-3
|
507 |
+
activation=linear
|
508 |
+
|
509 |
+
[convolutional]
|
510 |
+
batch_normalize=1
|
511 |
+
filters=512
|
512 |
+
size=1
|
513 |
+
stride=1
|
514 |
+
pad=1
|
515 |
+
activation=leaky
|
516 |
+
|
517 |
+
[convolutional]
|
518 |
+
batch_normalize=1
|
519 |
+
filters=1024
|
520 |
+
size=3
|
521 |
+
stride=1
|
522 |
+
pad=1
|
523 |
+
activation=leaky
|
524 |
+
|
525 |
+
[shortcut]
|
526 |
+
from=-3
|
527 |
+
activation=linear
|
528 |
+
|
529 |
+
[convolutional]
|
530 |
+
batch_normalize=1
|
531 |
+
filters=512
|
532 |
+
size=1
|
533 |
+
stride=1
|
534 |
+
pad=1
|
535 |
+
activation=leaky
|
536 |
+
|
537 |
+
[convolutional]
|
538 |
+
batch_normalize=1
|
539 |
+
filters=1024
|
540 |
+
size=3
|
541 |
+
stride=1
|
542 |
+
pad=1
|
543 |
+
activation=leaky
|
544 |
+
|
545 |
+
[shortcut]
|
546 |
+
from=-3
|
547 |
+
activation=linear
|
548 |
+
|
549 |
+
######################
|
550 |
+
|
551 |
+
[convolutional]
|
552 |
+
batch_normalize=1
|
553 |
+
filters=512
|
554 |
+
size=1
|
555 |
+
stride=1
|
556 |
+
pad=1
|
557 |
+
activation=leaky
|
558 |
+
|
559 |
+
[convolutional]
|
560 |
+
batch_normalize=1
|
561 |
+
size=3
|
562 |
+
stride=1
|
563 |
+
pad=1
|
564 |
+
filters=1024
|
565 |
+
activation=leaky
|
566 |
+
|
567 |
+
[convolutional]
|
568 |
+
batch_normalize=1
|
569 |
+
filters=512
|
570 |
+
size=1
|
571 |
+
stride=1
|
572 |
+
pad=1
|
573 |
+
activation=leaky
|
574 |
+
|
575 |
+
[convolutional]
|
576 |
+
batch_normalize=1
|
577 |
+
size=3
|
578 |
+
stride=1
|
579 |
+
pad=1
|
580 |
+
filters=1024
|
581 |
+
activation=leaky
|
582 |
+
|
583 |
+
[convolutional]
|
584 |
+
batch_normalize=1
|
585 |
+
filters=512
|
586 |
+
size=1
|
587 |
+
stride=1
|
588 |
+
pad=1
|
589 |
+
activation=leaky
|
590 |
+
|
591 |
+
[convolutional]
|
592 |
+
batch_normalize=1
|
593 |
+
size=3
|
594 |
+
stride=1
|
595 |
+
pad=1
|
596 |
+
filters=1024
|
597 |
+
activation=leaky
|
598 |
+
|
599 |
+
[convolutional]
|
600 |
+
size=1
|
601 |
+
stride=1
|
602 |
+
pad=1
|
603 |
+
filters=255
|
604 |
+
activation=linear
|
605 |
+
|
606 |
+
|
607 |
+
[yolo]
|
608 |
+
mask = 6,7,8
|
609 |
+
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
|
610 |
+
classes=80
|
611 |
+
num=9
|
612 |
+
jitter=.3
|
613 |
+
ignore_thresh = .7
|
614 |
+
truth_thresh = 1
|
615 |
+
random=1
|
616 |
+
|
617 |
+
|
618 |
+
[route]
|
619 |
+
layers = -4
|
620 |
+
|
621 |
+
[convolutional]
|
622 |
+
batch_normalize=1
|
623 |
+
filters=256
|
624 |
+
size=1
|
625 |
+
stride=1
|
626 |
+
pad=1
|
627 |
+
activation=leaky
|
628 |
+
|
629 |
+
[upsample]
|
630 |
+
stride=2
|
631 |
+
|
632 |
+
[route]
|
633 |
+
layers = -1, 61
|
634 |
+
|
635 |
+
|
636 |
+
|
637 |
+
[convolutional]
|
638 |
+
batch_normalize=1
|
639 |
+
filters=256
|
640 |
+
size=1
|
641 |
+
stride=1
|
642 |
+
pad=1
|
643 |
+
activation=leaky
|
644 |
+
|
645 |
+
[convolutional]
|
646 |
+
batch_normalize=1
|
647 |
+
size=3
|
648 |
+
stride=1
|
649 |
+
pad=1
|
650 |
+
filters=512
|
651 |
+
activation=leaky
|
652 |
+
|
653 |
+
[convolutional]
|
654 |
+
batch_normalize=1
|
655 |
+
filters=256
|
656 |
+
size=1
|
657 |
+
stride=1
|
658 |
+
pad=1
|
659 |
+
activation=leaky
|
660 |
+
|
661 |
+
[convolutional]
|
662 |
+
batch_normalize=1
|
663 |
+
size=3
|
664 |
+
stride=1
|
665 |
+
pad=1
|
666 |
+
filters=512
|
667 |
+
activation=leaky
|
668 |
+
|
669 |
+
[convolutional]
|
670 |
+
batch_normalize=1
|
671 |
+
filters=256
|
672 |
+
size=1
|
673 |
+
stride=1
|
674 |
+
pad=1
|
675 |
+
activation=leaky
|
676 |
+
|
677 |
+
[convolutional]
|
678 |
+
batch_normalize=1
|
679 |
+
size=3
|
680 |
+
stride=1
|
681 |
+
pad=1
|
682 |
+
filters=512
|
683 |
+
activation=leaky
|
684 |
+
|
685 |
+
[convolutional]
|
686 |
+
size=1
|
687 |
+
stride=1
|
688 |
+
pad=1
|
689 |
+
filters=255
|
690 |
+
activation=linear
|
691 |
+
|
692 |
+
|
693 |
+
[yolo]
|
694 |
+
mask = 3,4,5
|
695 |
+
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
|
696 |
+
classes=80
|
697 |
+
num=9
|
698 |
+
jitter=.3
|
699 |
+
ignore_thresh = .7
|
700 |
+
truth_thresh = 1
|
701 |
+
random=1
|
702 |
+
|
703 |
+
|
704 |
+
|
705 |
+
[route]
|
706 |
+
layers = -4
|
707 |
+
|
708 |
+
[convolutional]
|
709 |
+
batch_normalize=1
|
710 |
+
filters=128
|
711 |
+
size=1
|
712 |
+
stride=1
|
713 |
+
pad=1
|
714 |
+
activation=leaky
|
715 |
+
|
716 |
+
[upsample]
|
717 |
+
stride=2
|
718 |
+
|
719 |
+
[route]
|
720 |
+
layers = -1, 36
|
721 |
+
|
722 |
+
|
723 |
+
|
724 |
+
[convolutional]
|
725 |
+
batch_normalize=1
|
726 |
+
filters=128
|
727 |
+
size=1
|
728 |
+
stride=1
|
729 |
+
pad=1
|
730 |
+
activation=leaky
|
731 |
+
|
732 |
+
[convolutional]
|
733 |
+
batch_normalize=1
|
734 |
+
size=3
|
735 |
+
stride=1
|
736 |
+
pad=1
|
737 |
+
filters=256
|
738 |
+
activation=leaky
|
739 |
+
|
740 |
+
[convolutional]
|
741 |
+
batch_normalize=1
|
742 |
+
filters=128
|
743 |
+
size=1
|
744 |
+
stride=1
|
745 |
+
pad=1
|
746 |
+
activation=leaky
|
747 |
+
|
748 |
+
[convolutional]
|
749 |
+
batch_normalize=1
|
750 |
+
size=3
|
751 |
+
stride=1
|
752 |
+
pad=1
|
753 |
+
filters=256
|
754 |
+
activation=leaky
|
755 |
+
|
756 |
+
[convolutional]
|
757 |
+
batch_normalize=1
|
758 |
+
filters=128
|
759 |
+
size=1
|
760 |
+
stride=1
|
761 |
+
pad=1
|
762 |
+
activation=leaky
|
763 |
+
|
764 |
+
[convolutional]
|
765 |
+
batch_normalize=1
|
766 |
+
size=3
|
767 |
+
stride=1
|
768 |
+
pad=1
|
769 |
+
filters=256
|
770 |
+
activation=leaky
|
771 |
+
|
772 |
+
[convolutional]
|
773 |
+
size=1
|
774 |
+
stride=1
|
775 |
+
pad=1
|
776 |
+
filters=255
|
777 |
+
activation=linear
|
778 |
+
|
779 |
+
|
780 |
+
[yolo]
|
781 |
+
mask = 0,1,2
|
782 |
+
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
|
783 |
+
classes=80
|
784 |
+
num=9
|
785 |
+
jitter=.3
|
786 |
+
ignore_thresh = .7
|
787 |
+
truth_thresh = 1
|
788 |
+
random=1
|