Skip to content

Commit 3fd2bf9

Browse files
committed
allow evaluating forest at lower resolution for faster framerate
1 parent 74aa84e commit 3fd2bf9

File tree

4 files changed

+90
-49
lines changed

4 files changed

+90
-49
lines changed

src/3d_bz.py

+21-13
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ def __init__(self):
4949

5050
self.midi = Midi()
5151

52+
self.LABELS_REDUCE = 2
53+
5254
self.NO_DEBUG = args.no_debug
5355

5456
self.NUM_RANDOM_GUESSES = args.plane_num_iterations or 25000
@@ -67,7 +69,7 @@ def __init__(self):
6769

6870
print('loading forest')
6971

70-
self.layered_rdf = LayeredDecisionForest.load(args.cfg, (480, 848))
72+
self.layered_rdf = LayeredDecisionForest.load(args.cfg, (480, 848), labels_reduce = self.LABELS_REDUCE)
7173

7274
self.points_ops = PointsOps()
7375

@@ -94,11 +96,15 @@ def __init__(self):
9496
self.coord_croups_cpu = np.zeros((self.depth_mm_dims[0] * self.depth_mm_dims[1], 3), dtype=np.int32)
9597
self.coord_groups_gpu = GpuBuffer((self.depth_mm_dims[0] * self.depth_mm_dims[1], 3), dtype=np.int32)
9698

97-
self.labels_image = GpuBuffer((self.DIM_Y, self.DIM_X), dtype=np.uint16)
98-
self.labels_image_2 = GpuBuffer((self.DIM_Y, self.DIM_X), dtype=np.uint16)
99-
self.labels_image_rgba_cpu = np.zeros((self.DIM_Y, self.DIM_X, 4), dtype=np.uint8)
100-
self.labels_image_rgba = GpuBuffer((self.DIM_Y, self.DIM_X, 4), dtype=np.uint8)
101-
self.labels_image_rgba_tex = GpuTexture((self.DIM_X, self.DIM_Y), (GL_RGBA, GL_UNSIGNED_BYTE))
99+
self.LABELS_DIM_X = self.DIM_X // self.LABELS_REDUCE
100+
self.LABELS_DIM_Y = self.DIM_Y // self.LABELS_REDUCE
101+
self.LABELS_DIM = (self.LABELS_DIM_Y, self.LABELS_DIM_X)
102+
103+
self.labels_image = GpuBuffer(self.LABELS_DIM, dtype=np.uint16)
104+
self.labels_image_2 = GpuBuffer(self.LABELS_DIM, dtype=np.uint16)
105+
self.labels_image_rgba_cpu = np.zeros(self.LABELS_DIM + (4,), dtype=np.uint8)
106+
self.labels_image_rgba = GpuBuffer(self.LABELS_DIM + (4,), dtype=np.uint8)
107+
self.labels_image_rgba_tex = GpuTexture((self.LABELS_DIM_X, self.LABELS_DIM_Y), (GL_RGBA, GL_UNSIGNED_BYTE))
102108

103109
mean_shift_variances = np.array(
104110
[100., 50., 50., 50., 50., 50., 50.],
@@ -296,7 +302,7 @@ def tick(self, _):
296302
window_pad = 24 * self.dpi_scale
297303

298304
imgui.push_style_var(imgui.STYLE_WINDOW_PADDING, (window_pad, window_pad))
299-
imgui.set_next_window_position(0, 0)#self.DIM_Y * self.dpi_scale)
305+
imgui.set_next_window_position(0, 0)
300306
imgui.set_next_window_size(self.width * self.dpi_scale, 220 * self.dpi_scale)
301307
imgui.set_next_window_bg_alpha(0.3)
302308
imgui.begin('Hand state', flags= imgui.WINDOW_NO_MOVE | imgui.WINDOW_NO_RESIZE | imgui.WINDOW_NO_TITLE_BAR | imgui.WINDOW_NO_SCROLLBAR)
@@ -422,28 +428,28 @@ def run_per_hand_pipeline(self, g_id, flip_x):
422428
if flip_x:
423429
self.labels_image_2.cu().set(self.labels_image.cu())
424430
self.points_ops.flip_x(
425-
np.array([self.DIM_X, self.DIM_Y], dtype=np.int32),
431+
np.array([self.LABELS_DIM_X, self.LABELS_DIM_Y], dtype=np.int32),
426432
self.labels_image_2.cu(),
427433
self.labels_image.cu(),
428-
grid=make_grid((self.DIM_X, self.DIM_Y, 1), (32, 32, 1)),
434+
grid=make_grid((self.LABELS_DIM_X, self.LABELS_DIM_Y, 1), (32, 32, 1)),
429435
block=(32, 32, 1))
430436

431437
self.points_ops.make_rgba_from_labels(
432-
np.uint32(self.DIM_X),
433-
np.uint32(self.DIM_Y),
438+
np.uint32(self.LABELS_DIM_X),
439+
np.uint32(self.LABELS_DIM_Y),
434440
np.uint32(self.layered_rdf.num_layered_classes),
435441
self.labels_image.cu(),
436442
self.layered_rdf.label_colors.cu(),
437443
self.labels_image_rgba.cu(),
438-
grid = ((self.DIM_X // 32) + 1, (self.DIM_Y // 32) + 1, 1),
444+
grid = ((self.LABELS_DIM_X // 32) + 1, (self.LABELS_DIM_Y // 32) + 1, 1),
439445
block = (32,32,1))
440446

441447
# self.cu_ctx.synchronize()
442448
# self.t.record('--mean shift')
443449

444450
label_means = self.mean_shift.run(
445451
self.mean_shift_rounds,
446-
self.labels_image.cu().reshape((1, self.DIM_Y, self.DIM_X)),
452+
self.labels_image.cu().reshape((1, self.LABELS_DIM_Y, self.LABELS_DIM_X)),
447453
self.layered_rdf.num_layered_classes,
448454
self.mean_shift_variances)
449455

@@ -486,6 +492,8 @@ def run_per_hand_pipeline(self, g_id, flip_x):
486492
for i, f_idx in zip(range(len(self.fingertip_idxes)), self.fingertip_idxes):
487493

488494
px, py = label_means[f_idx-1].astype(np.int32)
495+
px *= self.LABELS_REDUCE
496+
py *= self.LABELS_REDUCE
489497
if px < 0 or py < 0 or px >= self.DIM_X or py >= self.DIM_Y:
490498
hand_state.fingertips[i].reset_positions()
491499
else:

src/cuda/tree_eval.cu

+18-14
Original file line numberDiff line numberDiff line change
@@ -25,23 +25,24 @@ extern "C" {__global__
2525
void evaluate_image_using_forest(
2626
int NUM_TREES,
2727
int NUM_IMAGES,
28-
int IMG_DIM_X,
29-
int IMG_DIM_Y,
28+
int depth_dim_x,
29+
int depth_dim_y,
3030
int NUM_CLASSES,
3131
int MAX_TREE_DEPTH,
3232
int BLOCK_DIM_X,
3333
uint16* _img_in,
3434
int filter_class,
3535
uint16* _filter,
3636
float* _forest,
37-
uint16* _labels_out)
37+
uint16* _labels_out,
38+
int labels_reduce)
3839
{
3940

4041
extern __shared__ float _thread_pdf[];
4142
Array2d<float> thread_pdf(_thread_pdf, {BLOCK_DIM_X, NUM_CLASSES});
4243

43-
const int2 IMG_DIM{IMG_DIM_X, IMG_DIM_Y};
44-
const int TOTAL_NUM_PIXELS = NUM_IMAGES * IMG_DIM.x * IMG_DIM.y;
44+
const int2 labels_img_dim{depth_dim_x / labels_reduce, depth_dim_y / labels_reduce};
45+
const int TOTAL_NUM_PIXELS = NUM_IMAGES * labels_img_dim.x * labels_img_dim.y;
4546
const int TREE_NODE_ELS = 7 + NUM_CLASSES + NUM_CLASSES; // (ux,uy,vx,vy,thresh,l_next,r_next,{l_pdf},{r_pdf})
4647

4748
const int i = blockIdx.x * blockDim.x + threadIdx.x;
@@ -59,13 +60,16 @@ extern "C" {__global__
5960

6061
__syncthreads();
6162

62-
const int img_idx = i / (IMG_DIM.x * IMG_DIM.y);
63-
const int i_rem = i % (IMG_DIM.x * IMG_DIM.y);
64-
const int img_y = i_rem / IMG_DIM.x;
65-
const int img_x = i_rem % IMG_DIM.x;
63+
const int img_idx = i / (labels_img_dim.x * labels_img_dim.y);
64+
const int i_rem = i % (labels_img_dim.x * labels_img_dim.y);
65+
const int img_y = i_rem / labels_img_dim.x;
66+
const int img_x = i_rem % labels_img_dim.x;
6667

67-
Array3d<uint16> img_in(_img_in, {NUM_IMAGES,IMG_DIM_Y,IMG_DIM_X}, MAX_UINT16);
68-
Array3d<uint16> labels_out(_labels_out, {NUM_IMAGES,IMG_DIM_Y,IMG_DIM_X});
68+
const int depth_img_y = img_y * labels_reduce;
69+
const int depth_img_x = img_x * labels_reduce;
70+
71+
Array3d<uint16> img_in(_img_in, {NUM_IMAGES,depth_dim_y,depth_dim_x}, MAX_UINT16);
72+
Array3d<uint16> labels_out(_labels_out, {NUM_IMAGES,labels_img_dim.y,labels_img_dim.x});
6973

7074
const int TOTAL_TREE_NODES = (1 << MAX_TREE_DEPTH) - 1;
7175

@@ -74,13 +78,13 @@ extern "C" {__global__
7478

7579
// Don't try to evaluate if filtering by a filter image!
7680
if (filter_class != -1) {
77-
Array3d<uint16> filter(_filter, {NUM_IMAGES,IMG_DIM_Y,IMG_DIM_X}, MAX_UINT16);
81+
Array3d<uint16> filter(_filter, {NUM_IMAGES,labels_img_dim.y,labels_img_dim.x}, MAX_UINT16);
7882
const uint16 img_label = filter.get({img_idx, img_y, img_x});
7983
if ((int)img_label != filter_class) { return; }
8084
}
8185

8286
// Don't try to evaluate if img in has 0 value!
83-
const uint16 img_d = img_in.get({img_idx, img_y, img_x});
87+
const uint16 img_d = img_in.get({img_idx, depth_img_y, depth_img_x});
8488
if (img_d == 0 || img_d == MAX_UINT16) { return; } // max uint16 is also considered 'pixel not present'
8589

8690
// current node ID
@@ -96,7 +100,7 @@ extern "C" {__global__
96100
const int l_next = __float2int_rd(d_ptr[5]);
97101
const int r_next = __float2int_rd(d_ptr[6]);
98102

99-
const float f = compute_feature(img_in, img_idx, int2{img_x, img_y}, u, v);
103+
const float f = compute_feature(img_in, img_idx, int2{depth_img_x, depth_img_y}, u, v);
100104
float* final_pdf = nullptr;
101105

102106
if (f < thresh) {

src/decision_tree.py

+25-15
Original file line numberDiff line numberDiff line change
@@ -170,16 +170,21 @@ def __init__(self, num_trees, max_depth, num_classes):
170170
# comes with gpu memory
171171
class LayeredDecisionForest():
172172
@staticmethod
173-
def load(config_filename, eval_dims):
173+
def load(config_filename, depth_dims, labels_reduce=1):
174174
cfg = json.loads(open(config_filename).read())
175175
# models are loaded 1-by-1 from paths with parent directory as a root
176176
cfg['root'] = os.path.join(*Path(config_filename).parts[0:-1])
177-
return LayeredDecisionForest(cfg, eval_dims)
177+
return LayeredDecisionForest(cfg, depth_dims, labels_reduce)
178178

179-
def __init__(self, cfg, eval_dims):
179+
def __init__(self, cfg, depth_dims, labels_reduce):
180180

181181
self.eval = DecisionTreeEvaluator()
182-
self.eval_dims = eval_dims # y,x !!
182+
183+
self.depth_dims = depth_dims # y,x !!
184+
185+
self.labels_reduce = labels_reduce
186+
self.labels_dims = (depth_dims[0] // labels_reduce, depth_dims[1] // labels_reduce)
187+
183188
self.m = []
184189
for l in cfg['layers']:
185190
# model path is relative to config file itself
@@ -195,7 +200,7 @@ def __init__(self, cfg, eval_dims):
195200

196201
self.num_models = len(self.m)
197202

198-
self.label_images = [GpuBuffer(eval_dims, dtype=np.uint16) for _ in range(self.num_models)]
203+
self.label_images = [GpuBuffer(self.labels_dims, dtype=np.uint16) for _ in range(self.num_models)]
199204

200205
self.labels_images_ptrs_cu = GpuBuffer((self.num_models,), dtype=np.int64)
201206
label_images_ptrs = np.array([i.cu().__cuda_array_interface__['data'][0] for i in self.label_images], dtype=np.int64)
@@ -235,25 +240,27 @@ def run(self, depth_image, labels_image):
235240
i.cu().fill(MAX_UINT16)
236241

237242
# first dim: image id. only one image!
238-
dims = (1,) + self.eval_dims
243+
depth_img_dims = (1,) + self.depth_dims
244+
label_img_dims = (1,) + self.labels_dims
239245

240246
for i in range(self.num_models):
241247
m, filter_model, filter_model_class = self.m[i]
242248
single_labels_image = self.label_images[i]
243249

244250
self.eval.get_labels_forest(
245251
m,
246-
depth_image.cu().reshape(dims),
247-
single_labels_image.cu().reshape(dims),
248-
filter_images=self.label_images[filter_model].cu().reshape(dims) if (filter_model is not None) else None,
252+
depth_image.cu().reshape(depth_img_dims),
253+
single_labels_image.cu().reshape(label_img_dims),
254+
labels_reduce=self.labels_reduce,
255+
filter_images=self.label_images[filter_model].cu().reshape(label_img_dims) if (filter_model is not None) else None,
249256
filter_images_class=filter_model_class)
250257

251258
self.eval.make_composite_labels_image(
252259
self.labels_images_ptrs_cu.cu(),
253-
self.eval_dims[1],
254-
self.eval_dims[0],
260+
self.labels_dims[1],
261+
self.labels_dims[0],
255262
self.labels_conditions_cu.cu(),
256-
labels_image.cu().reshape(dims))
263+
labels_image.cu().reshape(label_img_dims))
257264

258265
# def eval()
259266
class DecisionTreeEvaluator():
@@ -287,14 +294,16 @@ def get_labels(self, tree, depth_images_in, labels_out):
287294

288295

289296
# TODO: support filter image for single tree forest! or not??
290-
def get_labels_forest(self, forest, depth_images_in, labels_out, filter_images=None, filter_images_class=None):
297+
def get_labels_forest(self, forest, depth_images_in, labels_out, labels_reduce = 1, filter_images=None, filter_images_class=None):
291298
num_images, dim_y, dim_x = depth_images_in.shape
292299

300+
assert labels_out.shape == (num_images, dim_y // labels_reduce, dim_x // labels_reduce)
301+
293302
if filter_images is not None:
294303
assert filter_images_class is not None
295-
assert filter_images.shape == depth_images_in.shape
304+
assert filter_images.shape == labels_out.shape
296305

297-
num_test_pixels = num_images * dim_y * dim_x
306+
num_test_pixels = num_images * (dim_y // labels_reduce) * (dim_x // labels_reduce)
298307

299308
BLOCK_DIM_X = int(MAX_THREADS_PER_BLOCK // forest.num_trees)
300309
grid_dim = (int(num_test_pixels // BLOCK_DIM_X) + 1, 1, 1)
@@ -315,6 +324,7 @@ def get_labels_forest(self, forest, depth_images_in, labels_out, filter_images=N
315324
f_img,
316325
forest.forest_cu,
317326
labels_out,
327+
np.int32(labels_reduce),
318328
grid=grid_dim, block=block_dim, shared=(BLOCK_DIM_X * forest.num_classes * 4)) # sizeof(float), right?
319329

320330

src/run_live_layered.py

+26-7
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
class RunLive_Layered(AppBase):
1818
def __init__(self):
19-
super().__init__(title="Layered RDF Demo")
19+
super().__init__(title="Layered RDF Demo", width=848, height=800)
2020

2121
parser = argparse.ArgumentParser(description='Train a classifier RDF for depth images')
2222
parser.add_argument('-cfg', nargs='?', required=True, type=str, help='Path to the layered decision forest config file')
@@ -35,16 +35,20 @@ def __init__(self):
3535

3636
self.pipeline, self.depth_intrin, self.DIM_X, self.DIM_Y, self.FOCAL, self.PP = rs_util.start_stream(args)
3737

38-
self.layered_rdf = LayeredDecisionForest.load(args.cfg, (self.DIM_Y, self.DIM_X))
38+
self.LABELS_REDUCE = 2
39+
40+
self.layered_rdf = LayeredDecisionForest.load(args.cfg, (self.DIM_Y, self.DIM_X), self.LABELS_REDUCE)
3941
self.points_ops = PointsOps()
4042

4143
self.pts = GpuBuffer((self.DIM_Y, self.DIM_X, 4), dtype=np.float32)
4244

4345
self.depth_image = GpuBuffer((1, self.DIM_Y, self.DIM_X), np.uint16)
44-
self.labels_image = GpuBuffer((1, self.DIM_Y, self.DIM_X), dtype=np.uint16)
4546

46-
self.labels_image_rgba = GpuBuffer((self.DIM_Y, self.DIM_X, 4), dtype=np.uint8)
47-
self.labels_image_rgba_tex = GpuTexture((self.DIM_X, self.DIM_Y), (GL_RGBA, GL_UNSIGNED_BYTE))
47+
48+
self.labels_image = GpuBuffer((1, self.DIM_Y // self.LABELS_REDUCE, self.DIM_X // self.LABELS_REDUCE), dtype=np.uint16)
49+
50+
self.labels_image_rgba = GpuBuffer((self.DIM_Y // self.LABELS_REDUCE, self.DIM_X // self.LABELS_REDUCE, 4), dtype=np.uint8)
51+
self.labels_image_rgba_tex = GpuTexture((self.DIM_X // self.LABELS_REDUCE, self.DIM_Y // self.LABELS_REDUCE), (GL_RGBA, GL_UNSIGNED_BYTE))
4852

4953
self.frame_num = 0
5054

@@ -123,8 +127,8 @@ def tick(self, t):
123127
# make RGBA image
124128
self.labels_image_rgba.cu().fill(0)
125129
self.points_ops.make_rgba_from_labels(
126-
np.uint32(self.DIM_X),
127-
np.uint32(self.DIM_Y),
130+
np.uint32(self.DIM_X // self.LABELS_REDUCE),
131+
np.uint32(self.DIM_Y // self.LABELS_REDUCE),
128132
np.uint32(self.layered_rdf.num_layered_classes),
129133
self.labels_image.cu(),
130134
self.layered_rdf.label_colors.cu(),
@@ -135,7 +139,22 @@ def tick(self, t):
135139

136140
self.frame_num += 1
137141

142+
self.begin_imgui_main()
138143
imgui.image(self.labels_image_rgba_tex.gl(), self.DIM_X * self.dpi_scale, self.DIM_Y * self.dpi_scale)
144+
imgui.end()
145+
146+
imgui.set_next_window_size(200 * self.dpi_scale, 124 * self.dpi_scale)
147+
imgui.set_next_window_bg_alpha(0.3)
148+
imgui.begin('profile', imgui.WINDOW_NO_RESIZE | imgui.WINDOW_NO_TITLE_BAR | imgui.WINDOW_NO_SCROLLBAR)
149+
profile_plot_width = 150 * self.dpi_scale
150+
profile_plot_height = 60 * self.dpi_scale
151+
imgui.text(f'ms/frame: {"{:.1f}".format(self.ms_per_frame_log[-1])}')
152+
imgui.plot_lines('##ms-frame',
153+
np.array(self.ms_per_frame_log, dtype=np.float32),
154+
scale_max=100.,
155+
scale_min=0.,
156+
graph_size=(profile_plot_width , profile_plot_height))
157+
imgui.end()
139158

140159
if __name__ == '__main__':
141160
run_app(RunLive_Layered)

0 commit comments

Comments
 (0)