26 #include "descriptor.h"
32 bool Elas::process (uint8_t* I1_,uint8_t* I2_,
float* D1,
float* D2,
const int32_t* dims){
37 bpl = width + 15-(width-1)%16;
40 I1 = (uint8_t*)_mm_malloc(bpl*height*
sizeof(uint8_t),16);
41 I2 = (uint8_t*)_mm_malloc(bpl*height*
sizeof(uint8_t),16);
42 memset (I1,0,bpl*height*
sizeof(uint8_t));
43 memset (I2,0,bpl*height*
sizeof(uint8_t));
45 memcpy(I1,I1_,bpl*height*
sizeof(uint8_t));
46 memcpy(I2,I2_,bpl*height*
sizeof(uint8_t));
48 for (int32_t v=0; v<height; v++) {
49 memcpy(I1+v*bpl,I1_+v*dims[2],width*
sizeof(uint8_t));
50 memcpy(I2+v*bpl,I2_+v*dims[2],width*
sizeof(uint8_t));
55 timer.start(
"Descriptor");
57 Descriptor desc1(I1,width,height,bpl,param.subsampling);
58 Descriptor desc2(I2,width,height,bpl,param.subsampling);
61 timer.start(
"Support Matches");
63 vector<support_pt> p_support = computeSupportMatches(desc1.I_desc,desc2.I_desc);
67 if (p_support.size()>=3)
79 timer.start(
"Delaunay Triangulation");
81 vector<triangle> tri_1 = computeDelaunayTriangulation(p_support,0);
82 vector<triangle> tri_2 = computeDelaunayTriangulation(p_support,1);
85 timer.start(
"Disparity Planes");
87 computeDisparityPlanes(p_support,tri_1,0);
88 computeDisparityPlanes(p_support,tri_2,1);
95 int32_t grid_width = (int32_t)ceil((
float)width/(float)param.grid_size);
96 int32_t grid_height = (int32_t)ceil((
float)height/(float)param.grid_size);
97 int32_t grid_dims[3] = {param.disp_max+2,grid_width,grid_height};
98 int32_t* disparity_grid_1 = (int32_t*)calloc((param.disp_max+2)*grid_height*grid_width,
sizeof(int32_t));
99 int32_t* disparity_grid_2 = (int32_t*)calloc((param.disp_max+2)*grid_height*grid_width,
sizeof(int32_t));
101 createGrid(p_support,disparity_grid_1,grid_dims,0);
102 createGrid(p_support,disparity_grid_2,grid_dims,1);
105 timer.start(
"Matching");
107 computeDisparity(p_support,tri_1,disparity_grid_1,grid_dims,desc1.I_desc,desc2.I_desc,0,D1);
108 computeDisparity(p_support,tri_2,disparity_grid_2,grid_dims,desc1.I_desc,desc2.I_desc,1,D2);
111 timer.start(
"L/R Consistency Check");
113 leftRightConsistencyCheck(D1,D2);
116 timer.start(
"Remove Small Segments");
118 removeSmallSegments(D1);
119 if (!param.postprocess_only_left)
120 removeSmallSegments(D2);
123 timer.start(
"Gap Interpolation");
125 gapInterpolation(D1);
126 if (!param.postprocess_only_left)
127 gapInterpolation(D2);
129 if (param.filter_adaptive_mean) {
131 timer.start(
"Adaptive Mean");
134 if (!param.postprocess_only_left)
138 if (param.filter_median) {
140 timer.start(
"Median");
143 if (!param.postprocess_only_left)
151 free(disparity_grid_1);
152 free(disparity_grid_2);
168 void Elas::removeInconsistentSupportPoints (int16_t* D_can,int32_t D_can_width,int32_t D_can_height) {
171 for (int32_t u_can=0; u_can<D_can_width; u_can++) {
172 for (int32_t v_can=0; v_can<D_can_height; v_can++) {
173 int16_t d_can = *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width));
178 for (int32_t u_can_2=u_can-param.incon_window_size; u_can_2<=u_can+param.incon_window_size; u_can_2++) {
179 for (int32_t v_can_2=v_can-param.incon_window_size; v_can_2<=v_can+param.incon_window_size; v_can_2++) {
180 if (u_can_2>=0 && v_can_2>=0 && u_can_2<D_can_width && v_can_2<D_can_height) {
181 int16_t d_can_2 = *(D_can+getAddressOffsetImage(u_can_2,v_can_2,D_can_width));
182 if (d_can_2>=0 && abs(d_can-d_can_2)<=param.incon_threshold)
189 if (support<param.incon_min_support)
190 *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width)) = -1;
196 void Elas::removeRedundantSupportPoints(int16_t* D_can,int32_t D_can_width,int32_t D_can_height,
197 int32_t redun_max_dist, int32_t redun_threshold,
bool vertical) {
200 int32_t redun_dir_u[2] = {0,0};
201 int32_t redun_dir_v[2] = {0,0};
211 for (int32_t u_can=0; u_can<D_can_width; u_can++) {
212 for (int32_t v_can=0; v_can<D_can_height; v_can++) {
213 int16_t d_can = *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width));
217 bool redundant =
true;
218 for (int32_t i=0; i<2; i++) {
221 int32_t u_can_2 = u_can;
222 int32_t v_can_2 = v_can;
224 bool support =
false;
225 for (int32_t j=0; j<redun_max_dist; j++) {
226 u_can_2 += redun_dir_u[i];
227 v_can_2 += redun_dir_v[i];
228 if (u_can_2<0 || v_can_2<0 || u_can_2>=D_can_width || v_can_2>=D_can_height)
230 d_can_2 = *(D_can+getAddressOffsetImage(u_can_2,v_can_2,D_can_width));
231 if (d_can_2>=0 && abs(d_can-d_can_2)<=redun_threshold) {
246 *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width)) = -1;
252 void Elas::addCornerSupportPoints(vector<support_pt> &p_support) {
255 vector<support_pt> p_border;
256 p_border.push_back(support_pt(0,0,0));
257 p_border.push_back(support_pt(0,height-1,0));
258 p_border.push_back(support_pt(width-1,0,0));
259 p_border.push_back(support_pt(width-1,height-1,0));
262 for (int32_t i=0; i<p_border.size(); i++) {
263 int32_t best_dist = 10000000;
264 for (int32_t j=0; j<p_support.size(); j++) {
265 int32_t du = p_border[i].u-p_support[j].u;
266 int32_t dv = p_border[i].v-p_support[j].v;
267 int32_t curr_dist = du*du+dv*dv;
268 if (curr_dist<best_dist) {
269 best_dist = curr_dist;
270 p_border[i].d = p_support[j].d;
276 p_border.push_back(support_pt(p_border[2].u+p_border[2].d,p_border[2].v,p_border[2].d));
277 p_border.push_back(support_pt(p_border[3].u+p_border[3].d,p_border[3].v,p_border[3].d));
280 for (int32_t i=0; i<p_border.size(); i++)
281 p_support.push_back(p_border[i]);
284 inline int16_t Elas::computeMatchingDisparity (
const int32_t &u,
const int32_t &v,uint8_t* I1_desc,uint8_t* I2_desc,
const bool &right_image) {
286 const int32_t u_step = 2;
287 const int32_t v_step = 2;
288 const int32_t window_size = 3;
290 int32_t desc_offset_1 = -16*u_step-16*width*v_step;
291 int32_t desc_offset_2 = +16*u_step-16*width*v_step;
292 int32_t desc_offset_3 = -16*u_step+16*width*v_step;
293 int32_t desc_offset_4 = +16*u_step+16*width*v_step;
295 __m128i xmm1,xmm2,xmm3,xmm4,xmm5,xmm6;
298 if (u>=window_size+u_step && u<=width-window_size-1-u_step && v>=window_size+v_step && v<=height-window_size-1-v_step) {
301 int32_t line_offset = 16*width*v;
302 uint8_t *I1_line_addr,*I2_line_addr;
304 I1_line_addr = I1_desc+line_offset;
305 I2_line_addr = I2_desc+line_offset;
307 I1_line_addr = I2_desc+line_offset;
308 I2_line_addr = I1_desc+line_offset;
312 uint8_t* I1_block_addr = I1_line_addr+16*u;
313 uint8_t* I2_block_addr;
317 for (int32_t i=0; i<16; i++)
318 sum += abs((int32_t)(*(I1_block_addr+i))-128);
319 if (sum<param.support_texture)
323 xmm1 = _mm_load_si128((__m128i*)(I1_block_addr+desc_offset_1));
324 xmm2 = _mm_load_si128((__m128i*)(I1_block_addr+desc_offset_2));
325 xmm3 = _mm_load_si128((__m128i*)(I1_block_addr+desc_offset_3));
326 xmm4 = _mm_load_si128((__m128i*)(I1_block_addr+desc_offset_4));
332 int16_t min_1_E = 32767;
333 int16_t min_1_d = -1;
334 int16_t min_2_E = 32767;
335 int16_t min_2_d = -1;
338 int32_t disp_min_valid = max(param.disp_min,0);
339 int32_t disp_max_valid = param.disp_max;
340 if (!right_image) disp_max_valid = min(param.disp_max,u-window_size-u_step);
341 else disp_max_valid = min(param.disp_max,width-u-window_size-u_step);
344 if (disp_max_valid-disp_min_valid<10)
348 for (int16_t d=disp_min_valid; d<=disp_max_valid; d++) {
351 if (!right_image) u_warp = u-d;
355 I2_block_addr = I2_line_addr+16*u_warp;
358 xmm6 = _mm_load_si128((__m128i*)(I2_block_addr+desc_offset_1));
359 xmm6 = _mm_sad_epu8(xmm1,xmm6);
360 xmm5 = _mm_load_si128((__m128i*)(I2_block_addr+desc_offset_2));
361 xmm6 = _mm_add_epi16(_mm_sad_epu8(xmm2,xmm5),xmm6);
362 xmm5 = _mm_load_si128((__m128i*)(I2_block_addr+desc_offset_3));
363 xmm6 = _mm_add_epi16(_mm_sad_epu8(xmm3,xmm5),xmm6);
364 xmm5 = _mm_load_si128((__m128i*)(I2_block_addr+desc_offset_4));
365 xmm6 = _mm_add_epi16(_mm_sad_epu8(xmm4,xmm5),xmm6);
366 sum = _mm_extract_epi16(xmm6,0)+_mm_extract_epi16(xmm6,4);
374 }
else if (sum<min_2_E) {
381 if (min_1_d>=0 && min_2_d>=0 && (
float)min_1_E<param.support_threshold*(
float)min_2_E)
390 vector<Elas::support_pt> Elas::computeSupportMatches (uint8_t* I1_desc,uint8_t* I2_desc) {
394 int32_t D_candidate_stepsize = param.candidate_stepsize;
395 if (param.subsampling)
396 D_candidate_stepsize += D_candidate_stepsize%2;
399 int32_t D_can_width = 0;
400 int32_t D_can_height = 0;
401 for (int32_t u=0; u<width; u+=D_candidate_stepsize) D_can_width++;
402 for (int32_t v=0; v<height; v+=D_candidate_stepsize) D_can_height++;
403 int16_t* D_can = (int16_t*)calloc(D_can_width*D_can_height,
sizeof(int16_t));
410 for (int32_t u_can=1; u_can<D_can_width; u_can++) {
411 u = u_can*D_candidate_stepsize;
412 for (int32_t v_can=1; v_can<D_can_height; v_can++) {
413 v = v_can*D_candidate_stepsize;
416 *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width)) = -1;
419 d = computeMatchingDisparity(u,v,I1_desc,I2_desc,
false);
423 d2 = computeMatchingDisparity(u-d,v,I1_desc,I2_desc,
true);
424 if (d2>=0 && abs(d-d2)<=param.lr_threshold)
425 *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width)) = d;
431 removeInconsistentSupportPoints(D_can,D_can_width,D_can_height);
436 removeRedundantSupportPoints(D_can,D_can_width,D_can_height,5,1,
true);
437 removeRedundantSupportPoints(D_can,D_can_width,D_can_height,5,1,
false);
440 vector<support_pt> p_support;
441 for (int32_t u_can=1; u_can<D_can_width; u_can++)
442 for (int32_t v_can=1; v_can<D_can_height; v_can++)
443 if (*(D_can+getAddressOffsetImage(u_can,v_can,D_can_width))>=0)
444 p_support.push_back(support_pt(u_can*D_candidate_stepsize,
445 v_can*D_candidate_stepsize,
446 *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width))));
450 if (param.add_corners)
451 addCornerSupportPoints(p_support);
460 vector<Elas::triangle> Elas::computeDelaunayTriangulation (vector<support_pt> p_support,int32_t right_image) {
463 struct triangulateio in, out;
467 in.numberofpoints = p_support.size();
468 in.pointlist = (
float*)malloc(in.numberofpoints*2*
sizeof(
float));
471 for (int32_t i=0; i<p_support.size(); i++) {
472 in.pointlist[k++] = p_support[i].u;
473 in.pointlist[k++] = p_support[i].v;
476 for (int32_t i=0; i<p_support.size(); i++) {
477 in.pointlist[k++] = p_support[i].u-p_support[i].d;
478 in.pointlist[k++] = p_support[i].v;
481 in.numberofpointattributes = 0;
482 in.pointattributelist = NULL;
483 in.pointmarkerlist = NULL;
484 in.numberofsegments = 0;
485 in.numberofholes = 0;
486 in.numberofregions = 0;
487 in.regionlist = NULL;
490 out.pointlist = NULL;
491 out.pointattributelist = NULL;
492 out.pointmarkerlist = NULL;
493 out.trianglelist = NULL;
494 out.triangleattributelist = NULL;
495 out.neighborlist = NULL;
496 out.segmentlist = NULL;
497 out.segmentmarkerlist = NULL;
499 out.edgemarkerlist = NULL;
502 char parameters[] =
"zQB";
503 triangulate(parameters, &in, &out, NULL);
506 vector<triangle> tri;
508 for (int32_t i=0; i<out.numberoftriangles; i++) {
509 tri.push_back(triangle(out.trianglelist[k],out.trianglelist[k+1],out.trianglelist[k+2]));
516 free(out.trianglelist);
522 void Elas::computeDisparityPlanes (vector<support_pt> p_support,vector<triangle> &tri,int32_t right_image) {
529 for (int32_t i=0; i<tri.size(); i++) {
532 int32_t c1 = tri[i].c1;
533 int32_t c2 = tri[i].c2;
534 int32_t c3 = tri[i].c3;
537 A.val[0][0] = p_support[c1].u;
538 A.val[1][0] = p_support[c2].u;
539 A.val[2][0] = p_support[c3].u;
540 A.val[0][1] = p_support[c1].v; A.val[0][2] = 1;
541 A.val[1][1] = p_support[c2].v; A.val[1][2] = 1;
542 A.val[2][1] = p_support[c3].v; A.val[2][2] = 1;
545 b.val[0][0] = p_support[c1].d;
546 b.val[1][0] = p_support[c2].d;
547 b.val[2][0] = p_support[c3].d;
553 tri[i].t1a = b.val[0][0];
554 tri[i].t1b = b.val[1][0];
555 tri[i].t1c = b.val[2][0];
565 A.val[0][0] = p_support[c1].u-p_support[c1].d;
566 A.val[1][0] = p_support[c2].u-p_support[c2].d;
567 A.val[2][0] = p_support[c3].u-p_support[c3].d;
568 A.val[0][1] = p_support[c1].v; A.val[0][2] = 1;
569 A.val[1][1] = p_support[c2].v; A.val[1][2] = 1;
570 A.val[2][1] = p_support[c3].v; A.val[2][2] = 1;
573 b.val[0][0] = p_support[c1].d;
574 b.val[1][0] = p_support[c2].d;
575 b.val[2][0] = p_support[c3].d;
581 tri[i].t2a = b.val[0][0];
582 tri[i].t2b = b.val[1][0];
583 tri[i].t2c = b.val[2][0];
594 void Elas::createGrid(vector<support_pt> p_support,int32_t* disparity_grid,int32_t* grid_dims,
bool right_image) {
597 int32_t grid_width = grid_dims[1];
598 int32_t grid_height = grid_dims[2];
601 int32_t* temp1 = (int32_t*)calloc((param.disp_max+1)*grid_height*grid_width,
sizeof(int32_t));
602 int32_t* temp2 = (int32_t*)calloc((param.disp_max+1)*grid_height*grid_width,
sizeof(int32_t));
605 for (int32_t i=0; i<p_support.size(); i++) {
608 int32_t x_curr = p_support[i].u;
609 int32_t y_curr = p_support[i].v;
610 int32_t d_curr = p_support[i].d;
611 int32_t d_min = max(d_curr-1,0);
612 int32_t d_max = min(d_curr+1,param.disp_max);
615 for (int32_t d=d_min; d<=d_max; d++) {
618 x = floor((
float)(x_curr/param.grid_size));
620 x = floor((
float)(x_curr-d_curr)/(
float)param.grid_size);
621 int32_t y = floor((
float)y_curr/(
float)param.grid_size);
624 if (x>=0 && x<grid_width &&y>=0 && y<grid_height) {
625 int32_t addr = getAddressOffsetGrid(x,y,d,grid_width,param.disp_max+1);
632 const int32_t* tl = temp1 + (0*grid_width+0)*(param.disp_max+1);
633 const int32_t* tc = temp1 + (0*grid_width+1)*(param.disp_max+1);
634 const int32_t* tr = temp1 + (0*grid_width+2)*(param.disp_max+1);
635 const int32_t* cl = temp1 + (1*grid_width+0)*(param.disp_max+1);
636 const int32_t* cc = temp1 + (1*grid_width+1)*(param.disp_max+1);
637 const int32_t* cr = temp1 + (1*grid_width+2)*(param.disp_max+1);
638 const int32_t* bl = temp1 + (2*grid_width+0)*(param.disp_max+1);
639 const int32_t* bc = temp1 + (2*grid_width+1)*(param.disp_max+1);
640 const int32_t* br = temp1 + (2*grid_width+2)*(param.disp_max+1);
642 int32_t* result = temp2 + (1*grid_width+1)*(param.disp_max+1);
643 int32_t* end_input = temp1 + grid_width*grid_height*(param.disp_max+1);
646 for( ; br != end_input; tl++, tc++, tr++, cl++, cc++, cr++, bl++, bc++, br++, result++ )
647 *result = *tl | *tc | *tr | *cl | *cc | *cr | *bl | *bc | *br;
650 for (int32_t x=0; x<grid_width; x++) {
651 for (int32_t y=0; y<grid_height; y++) {
654 int32_t curr_ind = 1;
657 for (int32_t d=0; d<=param.disp_max; d++) {
660 if (*(temp2+getAddressOffsetGrid(x,y,d,grid_width,param.disp_max+1))>0) {
661 *(disparity_grid+getAddressOffsetGrid(x,y,curr_ind,grid_width,param.disp_max+2))=d;
667 *(disparity_grid+getAddressOffsetGrid(x,y,0,grid_width,param.disp_max+2))=curr_ind-1;
676 inline void Elas::updatePosteriorMinimum(__m128i* I2_block_addr,
const int32_t &d,
const int32_t &w,
677 const __m128i &xmm1,__m128i &xmm2,int32_t &val,int32_t &min_val,int32_t &min_d) {
678 xmm2 = _mm_load_si128(I2_block_addr);
679 xmm2 = _mm_sad_epu8(xmm1,xmm2);
680 val = _mm_extract_epi16(xmm2,0)+_mm_extract_epi16(xmm2,4)+w;
687 inline void Elas::updatePosteriorMinimum(__m128i* I2_block_addr,
const int32_t &d,
688 const __m128i &xmm1,__m128i &xmm2,int32_t &val,int32_t &min_val,int32_t &min_d) {
689 xmm2 = _mm_load_si128(I2_block_addr);
690 xmm2 = _mm_sad_epu8(xmm1,xmm2);
691 val = _mm_extract_epi16(xmm2,0)+_mm_extract_epi16(xmm2,4);
698 inline void Elas::findMatch(int32_t &u,int32_t &v,
float &plane_a,
float &plane_b,
float &plane_c,
699 int32_t* disparity_grid,int32_t *grid_dims,uint8_t* I1_desc,uint8_t* I2_desc,
700 int32_t *P,int32_t &plane_radius,
bool &valid,
bool &right_image,
float* D){
703 const int32_t disp_num = grid_dims[0]-1;
704 const int32_t window_size = 2;
708 if (param.subsampling) d_addr = getAddressOffsetImage(u/2,v/2,width/2);
709 else d_addr = getAddressOffsetImage(u,v,width);
712 if (u<window_size || u>=width-window_size)
716 int32_t line_offset = 16*width*max(min(v,height-3),2);
717 uint8_t *I1_line_addr,*I2_line_addr;
719 I1_line_addr = I1_desc+line_offset;
720 I2_line_addr = I2_desc+line_offset;
722 I1_line_addr = I2_desc+line_offset;
723 I2_line_addr = I1_desc+line_offset;
727 uint8_t* I1_block_addr = I1_line_addr+16*u;
731 for (int32_t i=0; i<16; i++)
732 sum += abs((int32_t)(*(I1_block_addr+i))-128);
733 if (sum<param.match_texture)
737 int32_t d_plane = (int32_t)(plane_a*(
float)u+plane_b*(float)v+plane_c);
738 int32_t d_plane_min = max(d_plane-plane_radius,0);
739 int32_t d_plane_max = min(d_plane+plane_radius,disp_num-1);
742 int32_t grid_x = (int32_t)floor((
float)u/(float)param.grid_size);
743 int32_t grid_y = (int32_t)floor((
float)v/(float)param.grid_size);
744 uint32_t grid_addr = getAddressOffsetGrid(grid_x,grid_y,0,grid_dims[1],grid_dims[0]);
745 int32_t num_grid = *(disparity_grid+grid_addr);
746 int32_t* d_grid = disparity_grid+grid_addr+1;
749 int32_t d_curr, u_warp, val;
750 int32_t min_val = 10000;
752 __m128i xmm1 = _mm_load_si128((__m128i*)I1_block_addr);
757 for (int32_t i=0; i<num_grid; i++) {
759 if (d_curr<d_plane_min || d_curr>d_plane_max) {
761 if (u_warp<window_size || u_warp>=width-window_size)
763 updatePosteriorMinimum((__m128i*)(I2_line_addr+16*u_warp),d_curr,xmm1,xmm2,val,min_val,min_d);
766 for (d_curr=d_plane_min; d_curr<=d_plane_max; d_curr++) {
768 if (u_warp<window_size || u_warp>=width-window_size)
770 updatePosteriorMinimum((__m128i*)(I2_line_addr+16*u_warp),d_curr,valid?*(P+abs(d_curr-d_plane)):0,xmm1,xmm2,val,min_val,min_d);
775 for (int32_t i=0; i<num_grid; i++) {
777 if (d_curr<d_plane_min || d_curr>d_plane_max) {
779 if (u_warp<window_size || u_warp>=width-window_size)
781 updatePosteriorMinimum((__m128i*)(I2_line_addr+16*u_warp),d_curr,xmm1,xmm2,val,min_val,min_d);
784 for (d_curr=d_plane_min; d_curr<=d_plane_max; d_curr++) {
786 if (u_warp<window_size || u_warp>=width-window_size)
788 updatePosteriorMinimum((__m128i*)(I2_line_addr+16*u_warp),d_curr,valid?*(P+abs(d_curr-d_plane)):0,xmm1,xmm2,val,min_val,min_d);
793 if (min_d>=0) *(D+d_addr) = min_d;
794 else *(D+d_addr) = -1;
798 void Elas::computeDisparity(vector<support_pt> p_support,vector<triangle> tri,int32_t* disparity_grid,int32_t *grid_dims,
799 uint8_t* I1_desc,uint8_t* I2_desc,
bool right_image,
float* D) {
802 const int32_t disp_num = grid_dims[0]-1;
805 int32_t window_size = 2;
808 if (param.subsampling) {
809 for (int32_t i=0; i<(width/2)*(height/2); i++)
812 for (int32_t i=0; i<width*height; i++)
817 float two_sigma_squared = 2*param.sigma*param.sigma;
818 int32_t* P =
new int32_t[disp_num];
819 for (int32_t delta_d=0; delta_d<disp_num; delta_d++)
820 P[delta_d] = (int32_t)((-log(param.gamma+exp(-delta_d*delta_d/two_sigma_squared))+log(param.gamma))/param.beta);
821 int32_t plane_radius = (int32_t)max((
float)ceil(param.sigma*param.sradius),(float)2.0);
825 float plane_a,plane_b,plane_c,plane_d;
828 for (uint32_t i=0; i<tri.size(); i++) {
833 plane_a = tri[i].t1a;
834 plane_b = tri[i].t1b;
835 plane_c = tri[i].t1c;
836 plane_d = tri[i].t2a;
838 plane_a = tri[i].t2a;
839 plane_b = tri[i].t2b;
840 plane_c = tri[i].t2c;
841 plane_d = tri[i].t1a;
852 tri_u[0] = p_support[c1].u;
853 tri_u[1] = p_support[c2].u;
854 tri_u[2] = p_support[c3].u;
856 tri_u[0] = p_support[c1].u-p_support[c1].d;
857 tri_u[1] = p_support[c2].u-p_support[c2].d;
858 tri_u[2] = p_support[c3].u-p_support[c3].d;
860 float tri_v[3] = {p_support[c1].v,p_support[c2].v,p_support[c3].v};
862 for (uint32_t j=0; j<3; j++) {
863 for (uint32_t k=0; k<j; k++) {
864 if (tri_u[k]>tri_u[j]) {
865 float tri_u_temp = tri_u[j]; tri_u[j] = tri_u[k]; tri_u[k] = tri_u_temp;
866 float tri_v_temp = tri_v[j]; tri_v[j] = tri_v[k]; tri_v[k] = tri_v_temp;
872 float A_u = tri_u[0];
float A_v = tri_v[0];
873 float B_u = tri_u[1];
float B_v = tri_v[1];
874 float C_u = tri_u[2];
float C_v = tri_v[2];
877 float AB_a = 0;
float AC_a = 0;
float BC_a = 0;
878 if ((int32_t)(A_u)!=(int32_t)(B_u)) AB_a = (A_v-B_v)/(A_u-B_u);
879 if ((int32_t)(A_u)!=(int32_t)(C_u)) AC_a = (A_v-C_v)/(A_u-C_u);
880 if ((int32_t)(B_u)!=(int32_t)(C_u)) BC_a = (B_v-C_v)/(B_u-C_u);
881 float AB_b = A_v-AB_a*A_u;
882 float AC_b = A_v-AC_a*A_u;
883 float BC_b = B_v-BC_a*B_u;
887 bool valid = fabs(plane_a)<0.7 && fabs(plane_d)<0.7;
890 if ((int32_t)(A_u)!=(int32_t)(B_u)) {
891 for (int32_t u=max((int32_t)A_u,0); u<min((int32_t)B_u,width); u++){
892 if (!param.subsampling || u%2==0) {
893 int32_t v_1 = (uint32_t)(AC_a*(
float)u+AC_b);
894 int32_t v_2 = (uint32_t)(AB_a*(
float)u+AB_b);
895 for (int32_t v=min(v_1,v_2); v<max(v_1,v_2); v++)
896 if (!param.subsampling || v%2==0) {
897 findMatch(u,v,plane_a,plane_b,plane_c,disparity_grid,grid_dims,
898 I1_desc,I2_desc,P,plane_radius,valid,right_image,D);
905 if ((int32_t)(B_u)!=(int32_t)(C_u)) {
906 for (int32_t u=max((int32_t)B_u,0); u<min((int32_t)C_u,width); u++){
907 if (!param.subsampling || u%2==0) {
908 int32_t v_1 = (uint32_t)(AC_a*(
float)u+AC_b);
909 int32_t v_2 = (uint32_t)(BC_a*(
float)u+BC_b);
910 for (int32_t v=min(v_1,v_2); v<max(v_1,v_2); v++)
911 if (!param.subsampling || v%2==0) {
912 findMatch(u,v,plane_a,plane_b,plane_c,disparity_grid,grid_dims,
913 I1_desc,I2_desc,P,plane_radius,valid,right_image,D);
924 void Elas::leftRightConsistencyCheck(
float* D1,
float* D2) {
927 int32_t D_width = width;
928 int32_t D_height = height;
929 if (param.subsampling) {
935 float* D1_copy = (
float*)malloc(D_width*D_height*
sizeof(
float));
936 float* D2_copy = (
float*)malloc(D_width*D_height*
sizeof(
float));
937 memcpy(D1_copy,D1,D_width*D_height*
sizeof(
float));
938 memcpy(D2_copy,D2,D_width*D_height*
sizeof(
float));
941 uint32_t addr,addr_warp;
942 float u_warp_1,u_warp_2,d1,d2;
945 for (int32_t u=0; u<D_width; u++) {
946 for (int32_t v=0; v<D_height; v++) {
949 addr = getAddressOffsetImage(u,v,D_width);
950 d1 = *(D1_copy+addr);
951 d2 = *(D2_copy+addr);
952 if (param.subsampling) {
953 u_warp_1 = (float)u-d1/2;
954 u_warp_2 = (float)u+d2/2;
956 u_warp_1 = (float)u-d1;
957 u_warp_2 = (float)u+d2;
962 if (d1>=0 && u_warp_1>=0 && u_warp_1<D_width) {
965 addr_warp = getAddressOffsetImage((int32_t)u_warp_1,v,D_width);
968 if (fabs(*(D2_copy+addr_warp)-d1)>param.lr_threshold)
976 if (d2>=0 && u_warp_2>=0 && u_warp_2<D_width) {
979 addr_warp = getAddressOffsetImage((int32_t)u_warp_2,v,D_width);
982 if (fabs(*(D1_copy+addr_warp)-d2)>param.lr_threshold)
996 void Elas::removeSmallSegments (
float* D) {
999 int32_t D_width = width;
1000 int32_t D_height = height;
1001 int32_t D_speckle_size = param.speckle_size;
1002 if (param.subsampling) {
1004 D_height = height/2;
1005 D_speckle_size = sqrt((
float)param.speckle_size)*2;
1009 int32_t *D_done = (int32_t*)calloc(D_width*D_height,
sizeof(int32_t));
1010 int32_t *seg_list_u = (int32_t*)calloc(D_width*D_height,
sizeof(int32_t));
1011 int32_t *seg_list_v = (int32_t*)calloc(D_width*D_height,
sizeof(int32_t));
1012 int32_t seg_list_count;
1013 int32_t seg_list_curr;
1014 int32_t u_neighbor[4];
1015 int32_t v_neighbor[4];
1020 int32_t addr_start, addr_curr, addr_neighbor;
1023 for (int32_t u=0; u<D_width; u++) {
1024 for (int32_t v=0; v<D_height; v++) {
1027 addr_start = getAddressOffsetImage(u,v,D_width);
1030 if (*(D_done+addr_start)==0) {
1034 *(seg_list_u+0) = u;
1035 *(seg_list_v+0) = v;
1042 while (seg_list_curr<seg_list_count) {
1045 u_seg_curr = *(seg_list_u+seg_list_curr);
1046 v_seg_curr = *(seg_list_v+seg_list_curr);
1049 addr_curr = getAddressOffsetImage(u_seg_curr,v_seg_curr,D_width);
1052 u_neighbor[0] = u_seg_curr-1; v_neighbor[0] = v_seg_curr;
1053 u_neighbor[1] = u_seg_curr+1; v_neighbor[1] = v_seg_curr;
1054 u_neighbor[2] = u_seg_curr; v_neighbor[2] = v_seg_curr-1;
1055 u_neighbor[3] = u_seg_curr; v_neighbor[3] = v_seg_curr+1;
1058 for (int32_t i=0; i<4; i++) {
1061 if (u_neighbor[i]>=0 && v_neighbor[i]>=0 && u_neighbor[i]<D_width && v_neighbor[i]<D_height) {
1064 addr_neighbor = getAddressOffsetImage(u_neighbor[i],v_neighbor[i],D_width);
1067 if (*(D_done+addr_neighbor)==0 && *(D+addr_neighbor)>=0) {
1071 if (fabs(*(D+addr_curr)-*(D+addr_neighbor))<=param.speckle_sim_threshold) {
1074 *(seg_list_u+seg_list_count) = u_neighbor[i];
1075 *(seg_list_v+seg_list_count) = v_neighbor[i];
1081 *(D_done+addr_neighbor) = 1;
1092 *(D_done+addr_curr) = 1;
1097 if (seg_list_count<D_speckle_size) {
1100 for (int32_t i=0; i<seg_list_count; i++) {
1101 addr_curr = getAddressOffsetImage(*(seg_list_u+i),*(seg_list_v+i),D_width);
1102 *(D+addr_curr) = -10;
1116 void Elas::gapInterpolation(
float* D) {
1119 int32_t D_width = width;
1120 int32_t D_height = height;
1121 int32_t D_ipol_gap_width = param.ipol_gap_width;
1122 if (param.subsampling) {
1124 D_height = height/2;
1125 D_ipol_gap_width = param.ipol_gap_width/2+1;
1129 float discon_threshold = 3.0;
1132 int32_t count,addr,v_first,v_last,u_first,u_last;
1137 for (int32_t v=0; v<D_height; v++) {
1143 for (int32_t u=0; u<D_width; u++) {
1146 addr = getAddressOffsetImage(u,v,D_width);
1152 if (count>=1 && count<=D_ipol_gap_width) {
1159 if (u_first>0 && u_last<D_width-1) {
1162 d1 = *(D+getAddressOffsetImage(u_first-1,v,D_width));
1163 d2 = *(D+getAddressOffsetImage(u_last+1,v,D_width));
1164 if (fabs(d1-d2)<discon_threshold) d_ipol = (d1+d2)/2;
1165 else d_ipol = min(d1,d2);
1168 for (int32_t u_curr=u_first; u_curr<=u_last; u_curr++)
1169 *(D+getAddressOffsetImage(u_curr,v,D_width)) = d_ipol;
1184 if (param.add_corners) {
1187 for (int32_t u=0; u<D_width; u++) {
1190 addr = getAddressOffsetImage(u,v,D_width);
1194 for (int32_t u2=max(u-D_ipol_gap_width,0); u2<u; u2++)
1195 *(D+getAddressOffsetImage(u2,v,D_width)) = *(D+addr);
1201 for (int32_t u=D_width-1; u>=0; u--) {
1204 addr = getAddressOffsetImage(u,v,D_width);
1208 for (int32_t u2=u; u2<=min(u+D_ipol_gap_width,D_width-1); u2++)
1209 *(D+getAddressOffsetImage(u2,v,D_width)) = *(D+addr);
1218 for (int32_t u=0; u<D_width; u++) {
1224 for (int32_t v=0; v<D_height; v++) {
1227 addr = getAddressOffsetImage(u,v,D_width);
1233 if (count>=1 && count<=D_ipol_gap_width) {
1240 if (v_first>0 && v_last<D_height-1) {
1243 d1 = *(D+getAddressOffsetImage(u,v_first-1,D_width));
1244 d2 = *(D+getAddressOffsetImage(u,v_last+1,D_width));
1245 if (fabs(d1-d2)<discon_threshold) d_ipol = (d1+d2)/2;
1246 else d_ipol = min(d1,d2);
1249 for (int32_t v_curr=v_first; v_curr<=v_last; v_curr++)
1250 *(D+getAddressOffsetImage(u,v_curr,D_width)) = d_ipol;
1268 if (param.add_corners) {
1271 for (int32_t v=0; v<D_height; v++) {
1274 addr = getAddressOffsetImage(u,v,D_width);
1278 for (int32_t v2=max(v-D_ipol_gap_width,0); v2<v; v2++)
1279 *(D+getAddressOffsetImage(u,v2,D_width)) = *(D+addr);
1285 for (int32_t v=D_height-1; v>=0; v--) {
1288 addr = getAddressOffsetImage(u,v,D_width);
1292 for (int32_t v2=v; v2<=min(v+D_ipol_gap_width,D_height-1); v2++)
1293 *(D+getAddressOffsetImage(u,v2,D_width)) = *(D+addr);
1302 void Elas::adaptiveMean (
float* D) {
1305 int32_t D_width = width;
1306 int32_t D_height = height;
1307 if (param.subsampling) {
1309 D_height = height/2;
1313 float* D_copy = (
float*)malloc(D_width*D_height*
sizeof(
float));
1314 float* D_tmp = (
float*)malloc(D_width*D_height*
sizeof(
float));
1315 memcpy(D_copy,D,D_width*D_height*
sizeof(
float));
1319 for (int32_t i=0; i<D_width*D_height; i++) {
1326 __m128 xconst0 = _mm_set1_ps(0);
1327 __m128 xconst4 = _mm_set1_ps(4);
1328 __m128 xval,xweight1,xweight2,xfactor1,xfactor2;
1330 float *val = (
float *)_mm_malloc(8*
sizeof(
float),16);
1331 float *weight = (
float*)_mm_malloc(4*
sizeof(
float),16);
1332 float *factor = (
float*)_mm_malloc(4*
sizeof(
float),16);
1335 __m128 xabsmask = _mm_set1_ps(0x7FFFFFFF);
1338 if (param.subsampling) {
1341 for (int32_t v=3; v<D_height-3; v++) {
1344 for (int32_t u=0; u<3; u++)
1345 val[u] = *(D_copy+v*D_width+u);
1348 for (int32_t u=3; u<D_width; u++) {
1351 float val_curr = *(D_copy+v*D_width+(u-1));
1352 val[u%4] = *(D_copy+v*D_width+u);
1354 xval = _mm_load_ps(val);
1355 xweight1 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1356 xweight1 = _mm_and_ps(xweight1,xabsmask);
1357 xweight1 = _mm_sub_ps(xconst4,xweight1);
1358 xweight1 = _mm_max_ps(xconst0,xweight1);
1359 xfactor1 = _mm_mul_ps(xval,xweight1);
1361 _mm_store_ps(weight,xweight1);
1362 _mm_store_ps(factor,xfactor1);
1364 float weight_sum = weight[0]+weight[1]+weight[2]+weight[3];
1365 float factor_sum = factor[0]+factor[1]+factor[2]+factor[3];
1368 float d = factor_sum/weight_sum;
1369 if (d>=0) *(D_tmp+v*D_width+(u-1)) = d;
1375 for (int32_t u=3; u<D_width-3; u++) {
1378 for (int32_t v=0; v<3; v++)
1379 val[v] = *(D_tmp+v*D_width+u);
1382 for (int32_t v=3; v<D_height; v++) {
1385 float val_curr = *(D_tmp+(v-1)*D_width+u);
1386 val[v%4] = *(D_tmp+v*D_width+u);
1388 xval = _mm_load_ps(val);
1389 xweight1 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1390 xweight1 = _mm_and_ps(xweight1,xabsmask);
1391 xweight1 = _mm_sub_ps(xconst4,xweight1);
1392 xweight1 = _mm_max_ps(xconst0,xweight1);
1393 xfactor1 = _mm_mul_ps(xval,xweight1);
1395 _mm_store_ps(weight,xweight1);
1396 _mm_store_ps(factor,xfactor1);
1398 float weight_sum = weight[0]+weight[1]+weight[2]+weight[3];
1399 float factor_sum = factor[0]+factor[1]+factor[2]+factor[3];
1402 float d = factor_sum/weight_sum;
1403 if (d>=0) *(D+(v-1)*D_width+u) = d;
1413 for (int32_t v=3; v<D_height-3; v++) {
1416 for (int32_t u=0; u<7; u++)
1417 val[u] = *(D_copy+v*D_width+u);
1420 for (int32_t u=7; u<D_width; u++) {
1423 float val_curr = *(D_copy+v*D_width+(u-3));
1424 val[u%8] = *(D_copy+v*D_width+u);
1426 xval = _mm_load_ps(val);
1427 xweight1 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1428 xweight1 = _mm_and_ps(xweight1,xabsmask);
1429 xweight1 = _mm_sub_ps(xconst4,xweight1);
1430 xweight1 = _mm_max_ps(xconst0,xweight1);
1431 xfactor1 = _mm_mul_ps(xval,xweight1);
1433 xval = _mm_load_ps(val+4);
1434 xweight2 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1435 xweight2 = _mm_and_ps(xweight2,xabsmask);
1436 xweight2 = _mm_sub_ps(xconst4,xweight2);
1437 xweight2 = _mm_max_ps(xconst0,xweight2);
1438 xfactor2 = _mm_mul_ps(xval,xweight2);
1440 xweight1 = _mm_add_ps(xweight1,xweight2);
1441 xfactor1 = _mm_add_ps(xfactor1,xfactor2);
1443 _mm_store_ps(weight,xweight1);
1444 _mm_store_ps(factor,xfactor1);
1446 float weight_sum = weight[0]+weight[1]+weight[2]+weight[3];
1447 float factor_sum = factor[0]+factor[1]+factor[2]+factor[3];
1450 float d = factor_sum/weight_sum;
1451 if (d>=0) *(D_tmp+v*D_width+(u-3)) = d;
1457 for (int32_t u=3; u<D_width-3; u++) {
1460 for (int32_t v=0; v<7; v++)
1461 val[v] = *(D_tmp+v*D_width+u);
1464 for (int32_t v=7; v<D_height; v++) {
1467 float val_curr = *(D_tmp+(v-3)*D_width+u);
1468 val[v%8] = *(D_tmp+v*D_width+u);
1470 xval = _mm_load_ps(val);
1471 xweight1 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1472 xweight1 = _mm_and_ps(xweight1,xabsmask);
1473 xweight1 = _mm_sub_ps(xconst4,xweight1);
1474 xweight1 = _mm_max_ps(xconst0,xweight1);
1475 xfactor1 = _mm_mul_ps(xval,xweight1);
1477 xval = _mm_load_ps(val+4);
1478 xweight2 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1479 xweight2 = _mm_and_ps(xweight2,xabsmask);
1480 xweight2 = _mm_sub_ps(xconst4,xweight2);
1481 xweight2 = _mm_max_ps(xconst0,xweight2);
1482 xfactor2 = _mm_mul_ps(xval,xweight2);
1484 xweight1 = _mm_add_ps(xweight1,xweight2);
1485 xfactor1 = _mm_add_ps(xfactor1,xfactor2);
1487 _mm_store_ps(weight,xweight1);
1488 _mm_store_ps(factor,xfactor1);
1490 float weight_sum = weight[0]+weight[1]+weight[2]+weight[3];
1491 float factor_sum = factor[0]+factor[1]+factor[2]+factor[3];
1494 float d = factor_sum/weight_sum;
1495 if (d>=0) *(D+(v-3)*D_width+u) = d;
1509 void Elas::median (
float* D) {
1512 int32_t D_width = width;
1513 int32_t D_height = height;
1514 if (param.subsampling) {
1516 D_height = height/2;
1520 float *D_temp = (
float*)calloc(D_width*D_height,
sizeof(
float));
1522 int32_t window_size = 3;
1524 float *vals =
new float[window_size*2+1];
1529 for (int32_t u=window_size; u<D_width-window_size; u++) {
1530 for (int32_t v=window_size; v<D_height-window_size; v++) {
1531 if (*(D+getAddressOffsetImage(u,v,D_width))>=0) {
1533 for (int32_t u2=u-window_size; u2<=u+window_size; u2++) {
1534 temp = *(D+getAddressOffsetImage(u2,v,D_width));
1536 while (i>=0 && *(vals+i)>temp) {
1537 *(vals+i+1) = *(vals+i);
1543 *(D_temp+getAddressOffsetImage(u,v,D_width)) = *(vals+window_size);
1545 *(D_temp+getAddressOffsetImage(u,v,D_width)) = *(D+getAddressOffsetImage(u,v,D_width));
1552 for (int32_t u=window_size; u<D_width-window_size; u++) {
1553 for (int32_t v=window_size; v<D_height-window_size; v++) {
1554 if (*(D+getAddressOffsetImage(u,v,D_width))>=0) {
1556 for (int32_t v2=v-window_size; v2<=v+window_size; v2++) {
1557 temp = *(D_temp+getAddressOffsetImage(u,v2,D_width));
1559 while (i>=0 && *(vals+i)>temp) {
1560 *(vals+i+1) = *(vals+i);
1566 *(D+getAddressOffsetImage(u,v,D_width)) = *(vals+window_size);
1568 *(D+getAddressOffsetImage(u,v,D_width)) = *(D+getAddressOffsetImage(u,v,D_width));