From d65d05c69bee764bb9e3ba57286402a89e6c9974 Mon Sep 17 00:00:00 2001 From: Guntram Blohm Date: Mon, 15 Jun 2020 18:04:04 +0200 Subject: [PATCH] Recognize shifted streaks, testcases --- src/mentionmatch/MatrixComparison.java | 101 +++++++++++-------- testcase_9/abr_wb_meawblom.csv | 5 + testcase_9/abr_wb_refwblom.csv | 5 + testcase_ralfreal_1/.abr_wb_meawblom.csv.swp | Bin 0 -> 16384 bytes testcase_ralfreal_1/.abr_wb_refwblom.csv.swp | Bin 0 -> 12288 bytes 5 files changed, 71 insertions(+), 40 deletions(-) create mode 100644 testcase_9/abr_wb_meawblom.csv create mode 100644 testcase_9/abr_wb_refwblom.csv create mode 100644 testcase_ralfreal_1/.abr_wb_meawblom.csv.swp create mode 100644 testcase_ralfreal_1/.abr_wb_refwblom.csv.swp diff --git a/src/mentionmatch/MatrixComparison.java b/src/mentionmatch/MatrixComparison.java index 58f7f6b..a0e21c3 100644 --- a/src/mentionmatch/MatrixComparison.java +++ b/src/mentionmatch/MatrixComparison.java @@ -134,7 +134,6 @@ class MatrixComparison { } dumpMatrix("tried to match"); -/* Forget about this now; this doesn't work. // There may still be an occasional swap, like with 0.1/0.0/0.35/0.25 above. // On the measurements, this looks like this: @@ -143,56 +142,78 @@ class MatrixComparison { // This may either be due to inconsistent stretching (in which case UX/VW // should be turned into UW/VX), or one new wire break (U) and an old one // that wasn't recognized (X). We assume it's inconsistent stretching and - // swap the matches if the rations AU/AW, UV/WX and VB/XB are close to 1.0, + // swap the matches if the rations MU/MW, UV/WX and VN/XN are close to 1.0, // and assume it's new/old breaks if they aren't. // Note we can also have something like // M---------------A-B-C-D----------------N // M-----------P-Q-R-S--------------------N // where we get AR/BS/CQ/DP but want AP/BQ/CR/DS. + // Algorithm: find a swapped pair of x/y values. Expand left and right + // to the surrounding rectangle as long as distances are small. + // Sort the y values within. Arrays.sort(matches); - float maxAllowedStretch = ratio(matches[matches.length-1].x, matches[matches.length-1].y)*1.1f; // allow 10% stretch - for (int i=0; i matches[i+1].y) { // i=B, i+1=C in above case - // find the last pair that has decrementing Y (DP) - int decrementingLength=1; - int minInvolvedY=matches[i+1].y; - while (i+decrementingLength < matches.length-1 - && matches[i].y > matches[i+decrementingLength+1].y) { - if (minInvolvedY > matches[i+decrementingLength+1].y) { - minInvolvedY = matches[i+decrementingLength+1].y; + + boolean foundShiftedStreaks; + do { + foundShiftedStreaks = false; + for (int i=0; i matches[i+1].y && xAxisVal[matches[i].x] > xAxisVal[matches[i+1].x] - 0.03) { + foundShiftedStreaks = true; + int minx = i; + int maxx = i+1; + int miny = matches[maxx].y; + int maxy = matches[minx].y; + boolean expanded; + do { + // find largest y difference between start/end spots + float deltax = 0; + for (int j=minx; j deltax) { + deltax = thisdelta; + } + } + // and allow 1.5 times this for the rest + deltax = deltax * 3 / 2; + // but only 3% of the whole length + if (deltax > 0.03f) { + deltax = 0.03f; + } + System.out.println("shifted streak found from "+minx+" to "+maxx+", miny="+miny+", maxy="+maxy+", maximum x delta is "+deltax); + expanded = false; + // expand right + while (maxx < matches.length-1 && matches[maxx+1].y < maxy && xAxisVal[maxx+1] < xAxisVal[maxx]+deltax) { + maxx++; + expanded = true; + if (miny > matches[maxx].y) { + miny = matches[maxx].y; + } + } + // expand left + while (minx > 0 && matches[minx-1].y > miny && xAxisVal[minx-1] > xAxisVal[minx]-deltax) { + minx--; + expanded = true; + if (maxy < matches[minx].y) { + maxy = matches[minx].y; + } + } + } while (expanded); + System.out.println("expansions ended"); + // As the X values are sorted anyway, sort the Y values as well, and assign them to the X values in order + int[] sortedY = new int[maxx-minx+1]; + for (int j=0; j= minInvolvedY - && matches[j].y <= matches[i].y) { - matchingYs++; + Arrays.sort(sortedY); + for (int j=0; jUsnr3o+GjICZ+; zJNKUdJ@AtGEws&2A`=&of^X2?>|MB^o zJ@{1=p1*v2u)H}xSzJ0f{_KDAt4o1ZMtUtAn5508&7&X@a- zpIua%Cl~XZd)JSjob8`J{boDZ{e6eP+Z2JP zmIvqBJ1b|6UiG^_@$u;gKk&%g6pWqOI|OzJ>=4)?utQ*nzz%^O0y_lWo(L>&+){lF z({Js}pLP6}4SriEYxBTov+?&%+_4=3I|OzJ>=4)?utQ*nzz%^O0y_kD2<#BpA+ST> ze-MHB)v9`#ef?M(e!l--_5VM5Usat#pMidMS5-X$Md;6WR@IBp73d!5)jO){+t9<% zN1)fz(C@o&<_|~+ylMJ{>6qp=nWqIBQ*Ruy>b0;e=*%# zTsu6P_%I2^CS#Iv>zcJZoi_EXt|w+9RNXv0;U)g%;E9Qshm38KylianG4nc3rqgMh zdCM#3(#Soy6!U0d(Vrw0qWNz*nlfw=M*GPimHb{1W^ zuuI0JQdPIGjaE-)W8rAxV4H)5u`v3^G%N_WmW_xUZMHb3a^c3~TU!?Pp$=Ot_CQoV zJy_UG8e7{2hg9BRxyZ(K3lE?Ub=}mZs?K5)lHIa!6Wj5H>xplZi`)$5DP`_cx3DiQ z_D#*Xo;_n%yLQ{cwHbxlu~75|eaa1%tL)ou;lUYupUl8r)@R)MvkQCULoByhLhag# zx5+zil>6FcxmiISON$#$e*j*$u&>h==klDO8(lccLbYvmK@9JBZL-B}Q!eaXumc;q zg+tSBS-5ejwWAA%iEG-x{Znq7%RDdaW=*?VxSknnn$COA1_aK^aV;G$3df<9Wh$CuT^pFLZ`^C z-51lmYf~EAkwrlWp>35%FN%>I?f1&TuBxPKDUw(Ll-2|f@ylqXf}g9~C>GYjie z=IvzKl!N0IMS=|>rj{MS8&M=7r_PE}VVbQCS94@XHcf1s>Y98e++d1w<3&*@!Z@eK zwW#BmqIP0Abx%GJPMFMczOtSpFHg-&3fPVj586Pi!!+gik8-16lVOYFmlzmEO~WLR zmUu1PI#G-qx>QXVUHg%3LYs21=)mq$?c(U_#9|6p#Iz`adkTk@yebqZ6Wm6nfOqj! zlQ`Se)55vpf-D!Su!9=J#!+OQC~#yZ<&L8fS@I8x?5tg1EX|!Qe=IM17aAIh7DarJ zX?@!a14$VSt<{BU+`im^;byA^{Cy3esgot&jU1`&+$K`9C|W73jb=_a1Cm~mAq3?_ zH9UzY4>^e%1(9f`nBuJhMV5<`FdHX9fhSLK6ziu&4r~WI{0vv^m-lsy1J~>vL6*k_ zdC_^&lW;=ITJD!mo2%n?L?4^PizEsO7`&QqSTl3QRW&XCJQmK37#(7R-wKavR*1oB zif}XhyElU?Z${0KorLJUpoapv`<>jI5ygzsc6k_ZYO$RJr|zBQ{-v;Ky8uKc=pLpt z$ucG=@^0J-=%}_tfs4e@EaUs?ig;3Qd)-8e&F!ve1byc@0OgTfpfWzL zijaFnmcnAp6-`!P6+9H?osq*Ol;`e&qGbaQ!Oj7iTH^`KD$_^g#kpAh;^eJ*uz1mh z=%%@Ha&XQViUZ}V&_crN^_Un(FQ#~sqq@p^R{HSKyq}E=&N?Nkg>%UJHnx=F@YaxM zS8|Vq-M~WjY$MTBc_h1w9J4q38Fff+gO8kPxpsktzHg%wEG?-=Fov)C5DvD}BheA; z-L_IuJWBNsqN7U>-3hh1(&#T%??HOyN6 zH&->Pyh2ZOaUJize{s5ha(F}jCw_AI@#C8pj;=1AnOt7-zw(m@$W%9IQ7;{zPAPbL>mzqB}b_~77lv7GQJzw!B_`GbegEUr#293322(*M6gqx_4m|6hH-|8x5O zBj}URU+DW^gdT%F4gH3MrmtN63UjRm5TRJ95PQQAGkW z4I@=^uMjzEL$Dc_L+pkMTXrO;M2)15=$I6Ba?;#?rd;94F|ft6ZSB>M;N*Oj!3@(H zCOCy@kxVdl{Pfner!G)^2uEj@*&&J;S*;c6n5vbosR;gBNMomuTgDk6#EmMRE>lIz zO)#9VEDUcb{Zw90j@Evc7f$Ky!jhP}x_FSIm{p|FIf{4z)kY&{01qTdmFZ;E;*F_- z56!@a)l?yLrQ~4Y#wR}*ehe(GC#`D|U0&uutMYn)v%dS9l^HB-8)vvxK{#BPTspv~ zT$mvom&7q$$eh@ra!$)k2L}r?kRk6Er`B1_+-J)f)2EM*C5f-CVYEaor<{6FIUCUn z$eH&Su*L_+nk9C~NX{gnf>U6)EQhY$>sxz)E-#je*qwx-cQ2 zF)gvmsoS&6wZQ^E4E*5Mj$PQ;@M6n&Bq^t-Q7&Ud6w&dl?-{9xc5vA46*-NVql&1l zN`(wi@{Xu~drU)1l{FGV==7NE#z-%&oO_{sHJl)$^oknq8JBHu9~2t`2U!&bw&q3U zs|-Rz+ADIk&Ex+qlOQq%Gcd+inKx=8_l(k6-DQ9gkByh`^(07gMS*e>fbxt{nzeINwz zVy%e6i6L3ex!KL|&R&rf?;6_?M}h*28w+;~HdJ0We&xkt-ZQKg%Er&|V#_o`6tq%s ztmOjP&4CQ1rA*UWMt#B)6`{&p1_flJe+_=@IySl-0|v39r9#kc(Bx@=a5L((9uPwjK5rjLcTm=kMdcgPsu=}p>>_Cw$&5KNtb!QB5#H=j$W^7I_m`L*w8Cp3pURPHP zbakAlU!AX&yL$0#iL4Go{?l{U%2X7S4_EV`vS;Jklj)0dFYwU7W)ous6j)ADc7#kO P&JI-G17j%5OGW8hmuB(N+9x-j7Au;Y5PNp~kRS$mQpsoh== z?p_2xffw=SK|FgDzkuGn=@(E058}m}K9yvqlOAEfgRsmSc=A+rzi%q_t3R2cGZXLc z><00+U^t#-?CsBg`QYzMzp>wMFm^Co`cKb2%G9OJ{4hN)@F6ZZ*{^c9O6zQ|uI^@Y z*GzUlIxXHQi+y*`xr4NJlijRzMOE&mZhkm%S>xtulTGV%nb%pmoXSZ$tEa2kw9fL? zY*`&`&zt+_-2r`11TH9ndoF)HzRIFdwoCo`%P$4bKX>PX!k}&w0U|&IhyW2F0z`la z5P^$Hz#VU~N9g(!efMAJeQU$}Dj5+V0z`la5CI}U1c(3;AOb{y2oM1x@E8(E7x>x# zDaJm5{O|uK|Nnpe3}cU=_n=pxzn*67d*}=32)YSffquTq*jJE)ZbFx#AFm(|%Ar@G zKXC1D`!?QzzX<*GBx7GfN05cKpdUD6pFvaTHR#XFjC}`v3f+dTLBC#N>>KDI^f5#S z5g-CYfCvzQ$B)3OsIy>(_t`$=QNpcEtPrYm8+WcG4}w5!3&8~sHBOY&gJA!O1Z52; z6Vdp08vp0iYL1u19Q#0aP?S6rJTk(Bx^orT(|Qo-_GTqE0m+9WClem&MEcF6b!;B3 zpWQr0SuvNwD3#dOwdy?#hg*(9Ypf{_%5)i&l?&=@ncYuI$E6R~)<)`#4|c4`rw?dK z(?%PmyR10uTt>r2!_6!otzv7?b9gK}x3L~(c`&24{GBN+lIV;N?$B?nqpUVM5~0VV zA|PsA7&~YS#5b~hKcY}${92EF+oIGTryYl=k48Sq|irA|f lz6ZaR&RtdWo#u9yznkYx<~T}S#v5gNt9Xzt_)eKu>>uN*6i@&F literal 0 HcmV?d00001