Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Changes In Branch diff-experimental Excluding Merge-Ins
This is equivalent to a diff from 98cf5c33bc to 5d836cbda7
2012-02-06
| ||
15:21 | Merge the diff enhancements from the diff-experimental branch into trunk. check-in: bba7aea8ca user: drh tags: trunk | |
15:02 | Tweak to side-by-side alignment: Be more aggressive about marking lines as changed if they are naturally aligned to begin with. Closed-Leaf check-in: 5d836cbda7 user: drh tags: diff-experimental | |
14:28 | Adjust the alignment similarity cutoff score. check-in: 9713e42356 user: drh tags: diff-experimental | |
01:55 | Trying out a greedy algorithm for aligning the two sides of a change with side-by-side diff. This helps in some cases, but we could probably benefit from a better algorithm. check-in: 881b65141b user: drh tags: diff-experimental | |
2012-02-05
| ||
20:22 | Add the "diff optimizer" which tries to shift inserts and deletes to align with natural boundaries in the text. The resulting diff is no more or less correct than the original; it just seems more natural to human readers. check-in: 98cf5c33bc user: drh tags: trunk | |
17:19 | Rearrange code and edit comments in diff logic, for clarity of presentation. No functional changes. check-in: 032da543f0 user: drh tags: trunk | |
Changes to src/diff.c.
353 353 ** Status of a single output line 354 354 */ 355 355 typedef struct SbsLine SbsLine; 356 356 struct SbsLine { 357 357 char *zLine; /* The output line under construction */ 358 358 int n; /* Index of next unused slot in the zLine[] */ 359 359 int width; /* Maximum width of a column in the output */ 360 - unsigned char escHtml; /* True to escape html characters */ 360 + unsigned char escHtml; /* True to escape html characters */ 361 + int iStart; /* Write zStart prior to character iStart */ 362 + const char *zStart; /* A <span> tag */ 363 + int iEnd; /* Write </span> prior to character iEnd */ 361 364 }; 362 365 363 366 /* 364 367 ** Flags for sbsWriteText() 365 368 */ 366 -#define SBS_NEWLINE 0x0001 /* End with \n\000 */ 367 -#define SBS_PAD 0x0002 /* Pad output to width spaces */ 368 -#define SBS_ENDSPAN 0x0004 /* Write a </span> after text */ 369 +#define SBS_NEWLINE 0x0001 /* End with \n\000 */ 370 +#define SBS_PAD 0x0002 /* Pad output to width spaces */ 369 371 370 372 /* 371 373 ** Write up to width characters of pLine into z[]. Translate tabs into 372 374 ** spaces. Add a newline if SBS_NEWLINE is set. Translate HTML characters 373 375 ** if SBS_HTML is set. Pad the rendering out width bytes if SBS_PAD is set. 374 376 */ 375 377 static void sbsWriteText(SbsLine *p, DLine *pLine, unsigned flags){ ................................................................................ 378 380 int j; /* Number of output characters generated */ 379 381 int k; /* Cursor position */ 380 382 const char *zIn = pLine->z; 381 383 char *z = &p->zLine[p->n]; 382 384 int w = p->width; 383 385 for(i=j=k=0; k<w && i<n; i++, k++){ 384 386 char c = zIn[i]; 387 + if( p->escHtml ){ 388 + if( i==p->iStart ){ 389 + int x = strlen(p->zStart); 390 + memcpy(z+j, p->zStart, x); 391 + j += x; 392 + }else if( i==p->iEnd ){ 393 + memcpy(z+j, "</span>", 7); 394 + j += 7; 395 + } 396 + } 385 397 if( c=='\t' ){ 386 398 z[j++] = ' '; 387 399 while( (k&7)!=7 && k<w ){ z[j++] = ' '; k++; } 388 400 }else if( c=='\r' || c=='\f' ){ 389 401 z[j++] = ' '; 390 402 }else if( c=='<' && p->escHtml ){ 391 403 memcpy(&z[j], "<", 4); ................................................................................ 396 408 }else if( c=='>' && p->escHtml ){ 397 409 memcpy(&z[j], ">", 4); 398 410 j += 4; 399 411 }else{ 400 412 z[j++] = c; 401 413 } 402 414 } 403 - if( (flags & SBS_ENDSPAN) && p->escHtml ){ 415 + if( p->escHtml && i<=p->iEnd ){ 404 416 memcpy(&z[j], "</span>", 7); 405 417 j += 7; 406 418 } 407 419 if( (flags & SBS_PAD)!=0 ){ 408 420 while( k<w ){ k++; z[j++] = ' '; } 409 421 } 410 422 if( flags & SBS_NEWLINE ){ ................................................................................ 442 454 sbsWriteHtml(p, "<span class=\"diffln\">"); 443 455 sqlite3_snprintf(7, &p->zLine[p->n], "%5d ", ln+1); 444 456 p->n += 6; 445 457 sbsWriteHtml(p, "</span>"); 446 458 p->zLine[p->n++] = ' '; 447 459 } 448 460 461 +/* 462 +** Write out lines that have been edited. Adjust the highlight to cover 463 +** only those parts of the line that actually changed. 464 +*/ 465 +static void sbsWriteLineChange( 466 + SbsLine *p, /* The SBS output line */ 467 + DLine *pLeft, /* Left line of the change */ 468 + int lnLeft, /* Line number for the left line */ 469 + DLine *pRight, /* Right line of the change */ 470 + int lnRight /* Line number of the right line */ 471 +){ 472 + int nLeft; /* Length of left line in bytes */ 473 + int nRight; /* Length of right line in bytes */ 474 + int nPrefix; /* Length of common prefix */ 475 + int nSuffix; /* Length of common suffix */ 476 + int width; /* Total column width */ 477 + const char *zLeft; /* Text of the left line */ 478 + const char *zRight; /* Text of the right line */ 479 + 480 + nLeft = pLeft->h & LENGTH_MASK; 481 + zLeft = pLeft->z; 482 + nRight = pRight->h & LENGTH_MASK; 483 + zRight = pRight->z; 484 + 485 + nPrefix = 0; 486 + while( nPrefix<nLeft && nPrefix<nRight && zLeft[nPrefix]==zRight[nPrefix] ){ 487 + nPrefix++; 488 + } 489 + nSuffix = 0; 490 + if( nPrefix<nLeft && nPrefix<nRight ){ 491 + while( nSuffix<nLeft && nSuffix<nRight 492 + && zLeft[nLeft-nSuffix-1]==zRight[nRight-nSuffix-1] ){ 493 + nSuffix++; 494 + } 495 + if( nSuffix==nLeft || nSuffix==nRight ) nPrefix = 0; 496 + } 497 + if( nPrefix+nSuffix > nLeft ) nSuffix = nLeft - nPrefix; 498 + if( nPrefix+nSuffix > nRight ) nSuffix = nRight - nPrefix; 499 + if( nPrefix+nSuffix==nLeft ){ 500 + /* Text inserted on the right */ 501 + sbsWriteLineno(p, lnLeft); 502 + p->iStart = p->iEnd = -1; 503 + sbsWriteText(p, pLeft, SBS_PAD); 504 + sbsWrite(p, " | ", 3); 505 + sbsWriteLineno(p, lnRight); 506 + p->iStart = nPrefix; 507 + p->iEnd = nRight - nSuffix; 508 + p->zStart = "<span class=\"diffadd\">"; 509 + sbsWriteText(p, pRight, SBS_NEWLINE); 510 + }else if( nPrefix+nSuffix==nRight ){ 511 + /* Text deleted from the left */ 512 + sbsWriteLineno(p, lnLeft); 513 + p->iStart = nPrefix; 514 + p->iEnd = nLeft - nSuffix; 515 + p->zStart = "<span class=\"diffrm\">"; 516 + sbsWriteText(p, pLeft, SBS_PAD); 517 + sbsWrite(p, " | ", 3); 518 + sbsWriteLineno(p, lnRight); 519 + p->iStart = p->iEnd = -1; 520 + sbsWriteText(p, pRight, SBS_NEWLINE); 521 + }else{ 522 + /* Text modified between left and right */ 523 + sbsWriteLineno(p, lnLeft); 524 + p->iStart = nPrefix; 525 + p->iEnd = nLeft - nSuffix; 526 + p->zStart = "<span class=\"diffchng\">"; 527 + sbsWriteText(p, pLeft, SBS_PAD); 528 + sbsWrite(p, " | ", 3); 529 + sbsWriteLineno(p, lnRight); 530 + p->iEnd = nRight - nSuffix; 531 + sbsWriteText(p, pRight, SBS_NEWLINE); 532 + } 533 +} 534 + 535 +/* 536 +** Return the number between 0 and 100 that is smaller the closer pA and 537 +** pB match. Return 0 for a perfect match. Return 100 if pA and pB are 538 +** completely different. 539 +*/ 540 +static int match_dline(DLine *pA, DLine *pB){ 541 + const char *zA; 542 + const char *zB; 543 + int nA; 544 + int nB; 545 + int minDist; 546 + int i; 547 + int nMatch; 548 + int score; 549 + 550 + zA = pA->z; 551 + zB = pB->z; 552 + nA = pA->h & LENGTH_MASK; 553 + nB = pB->h & LENGTH_MASK; 554 + while( nA>0 && fossil_isspace(zA[0]) ){ nA--; zA++; } 555 + while( nA>0 && fossil_isspace(zA[nA-1]) ){ nA--; } 556 + while( nB>0 && fossil_isspace(zB[0]) ){ nB--; zB++; } 557 + while( nB>0 && fossil_isspace(zB[nB-1]) ){ nB--; } 558 + minDist = nA; 559 + if( minDist>nB ) minDist = nB; 560 + if( minDist==0 ){ 561 + score = 100; 562 + }else{ 563 + for(i=0; i<nA && i<nB && zA[i]==zB[i]; i++){} 564 + nMatch = i; 565 + for(i=1; i<=nA && i<=nB && zA[nA-i]==zB[nB-i]; i++){} 566 + nMatch += i-1; 567 + score = (nMatch >= minDist) ? 0 : ((minDist - nMatch)*100)/minDist; 568 + } 569 + return score; 570 +} 571 + 572 +/* 573 +** There is a change block in which nLeft lines of text on the left are 574 +** converted into nRight lines of text on the right. This routine computes 575 +** how the lines on the left line up with the lines on the right. 576 +** 577 +** The return value is a buffer of unsigned characters, obtained from 578 +** fossil_malloc(). (The caller needs to free the return value using 579 +** fossil_free().) Entries in the returned array have values as follows: 580 +** 581 +** 1. Delete the next line of pLeft. 582 +** 2. The next line of pLeft changes into the next line of pRight. 583 +** 3. Insert the next line of pRight. 584 +** 585 +** The length of the returned array will be just large enough to cause 586 +** all elements of pLeft and pRight to be consumed. 587 +** 588 +** Algorithm: Wagner's minimum edit-distance algorithm, modified by 589 +** adding a cost to each match based on how well the two rows match 590 +** each other. Insertion and deletion costs are 50. Match costs 591 +** are between 0 and 100 where 0 is a perfect match 100 is a complete 592 +** mismatch. 593 +*/ 594 +static unsigned char *sbsAlignment( 595 + DLine *aLeft, int nLeft, /* Text on the left */ 596 + DLine *aRight, int nRight /* Text on the right */ 597 +){ 598 + int i, j, k; /* Loop counters */ 599 + int *a; /* One row of the Wagner matrix */ 600 + int *pToFree; /* Space that needs to be freed */ 601 + unsigned char *aM; /* Wagner result matrix */ 602 + int aBuf[100]; /* Stack space for a[] if nRight not to big */ 603 + 604 + aM = fossil_malloc( (nLeft+1)*(nRight+1) ); 605 + if( nLeft==0 ){ 606 + memset(aM, 3, nRight); 607 + return aM; 608 + } 609 + if( nRight==0 ){ 610 + memset(aM, 1, nLeft); 611 + return aM; 612 + } 613 + if( nRight < (sizeof(aBuf)/sizeof(aBuf[0]))-1 ){ 614 + pToFree = 0; 615 + a = aBuf; 616 + }else{ 617 + a = pToFree = fossil_malloc( sizeof(a[0])*(nRight+1) ); 618 + } 619 + 620 + /* Compute the best alignment */ 621 + for(i=0; i<=nRight; i++){ 622 + aM[i] = 3; 623 + a[i] = i*50; 624 + } 625 + aM[0] = 0; 626 + for(j=1; j<=nLeft; j++){ 627 + int p = a[0]; 628 + a[0] = p+50; 629 + aM[j*(nRight+1)] = 1; 630 + for(i=1; i<=nRight; i++){ 631 + int m = a[i-1]+50; 632 + int d = 3; 633 + if( m>a[i]+50 ){ 634 + m = a[i]+50; 635 + d = 1; 636 + } 637 + if( m>p ){ 638 + int score = match_dline(&aLeft[j-1], &aRight[i-1]); 639 + if( (score<66 || (i<j+1 && i>j-1)) && m>p+score ){ 640 + m = p+score; 641 + d = 2; 642 + } 643 + } 644 + p = a[i]; 645 + a[i] = m; 646 + aM[j*(nRight+1)+i] = d; 647 + } 648 + } 649 + 650 + /* Compute the lowest-cost path back through the matrix */ 651 + i = nRight; 652 + j = nLeft; 653 + k = (nRight+1)*(nLeft+1)-1; 654 + while( i+j>0 ){ 655 + unsigned char c = aM[k--]; 656 + if( c==2 ){ 657 + assert( i>0 && j>0 ); 658 + i--; 659 + j--; 660 + }else if( c==3 ){ 661 + assert( i>0 ); 662 + i--; 663 + }else{ 664 + assert( j>0 ); 665 + j--; 666 + } 667 + aM[k] = aM[j*(nRight+1)+i]; 668 + } 669 + k++; 670 + i = (nRight+1)*(nLeft+1) - k; 671 + memmove(aM, &aM[k], i); 672 + 673 + /* Return the result */ 674 + fossil_free(pToFree); 675 + return aM; 676 +} 449 677 450 678 /* 451 679 ** Given a diff context in which the aEdit[] array has been filled 452 680 ** in, compute a side-by-side diff into pOut. 453 681 */ 454 682 static void sbsDiff( 455 683 DContext *p, /* The computed diff */ ................................................................................ 472 700 int skip; /* Number of lines to skip */ 473 701 SbsLine s; /* Output line buffer */ 474 702 475 703 s.zLine = fossil_malloc( 10*width + 100 ); 476 704 if( s.zLine==0 ) return; 477 705 s.width = width; 478 706 s.escHtml = escHtml; 707 + s.iStart = -1; 708 + s.iEnd = -1; 479 709 A = p->aFrom; 480 710 B = p->aTo; 481 711 R = p->aEdit; 482 712 mxr = p->nEdit; 483 713 while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; } 484 714 for(r=0; r<mxr; r += 3*nr){ 485 715 /* Figure out how many triples to show in a single block */ ................................................................................ 525 755 /* Show the initial common area */ 526 756 a += skip; 527 757 b += skip; 528 758 m = R[r] - skip; 529 759 for(j=0; j<m; j++){ 530 760 s.n = 0; 531 761 sbsWriteLineno(&s, a+j); 762 + s.iStart = s.iEnd = -1; 532 763 sbsWriteText(&s, &A[a+j], SBS_PAD); 533 764 sbsWrite(&s, " ", 3); 534 765 sbsWriteLineno(&s, b+j); 535 766 sbsWriteText(&s, &B[b+j], SBS_NEWLINE); 536 767 blob_append(pOut, s.zLine, s.n); 537 768 } 538 769 a += m; 539 770 b += m; 540 771 541 772 /* Show the differences */ 542 773 for(i=0; i<nr; i++){ 543 - ma = R[r+i*3+1]; 544 - mb = R[r+i*3+2]; 545 - m = ma<mb ? ma : mb; 546 - for(j=0; j<m; j++){ 547 - s.n = 0; 548 - sbsWriteLineno(&s, a+j); 549 - sbsWriteHtml(&s, "<span class=\"diffchng\">"); 550 - sbsWriteText(&s, &A[a+j], SBS_PAD | SBS_ENDSPAN); 551 - sbsWrite(&s, " | ", 3); 552 - sbsWriteLineno(&s, b+j); 553 - sbsWriteHtml(&s, "<span class=\"diffchng\">"); 554 - sbsWriteText(&s, &B[b+j], SBS_NEWLINE | SBS_ENDSPAN); 555 - blob_append(pOut, s.zLine, s.n); 556 - } 557 - a += m; 558 - b += m; 559 - ma -= m; 560 - mb -= m; 561 - for(j=0; j<ma; j++){ 562 - s.n = 0; 563 - sbsWriteLineno(&s, a+j); 564 - sbsWriteHtml(&s, "<span class=\"diffrm\">"); 565 - sbsWriteText(&s, &A[a+j], SBS_PAD | SBS_ENDSPAN); 566 - sbsWrite(&s, " <\n", 3); 567 - blob_append(pOut, s.zLine, s.n); 774 + unsigned char *alignment; 775 + ma = R[r+i*3+1]; /* Lines on left but not on right */ 776 + mb = R[r+i*3+2]; /* Lines on right but not on left */ 777 + alignment = sbsAlignment(&A[a], ma, &B[b], mb); 778 + for(j=0; ma+mb>0; j++){ 779 + if( alignment[j]==1 ){ 780 + s.n = 0; 781 + sbsWriteLineno(&s, a); 782 + s.iStart = 0; 783 + s.zStart = "<span class=\"diffrm\">"; 784 + s.iEnd = s.width; 785 + sbsWriteText(&s, &A[a], SBS_PAD); 786 + sbsWrite(&s, " <\n", 3); 787 + blob_append(pOut, s.zLine, s.n); 788 + assert( ma>0 ); 789 + ma--; 790 + a++; 791 + }else if( alignment[j]==2 ){ 792 + s.n = 0; 793 + sbsWriteLineChange(&s, &A[a], a, &B[b], b); 794 + blob_append(pOut, s.zLine, s.n); 795 + assert( ma>0 && mb>0 ); 796 + ma--; 797 + mb--; 798 + a++; 799 + b++; 800 + }else{ 801 + s.n = 0; 802 + sbsWriteSpace(&s, width + 7); 803 + sbsWrite(&s, " > ", 3); 804 + sbsWriteLineno(&s, b); 805 + s.iStart = 0; 806 + s.zStart = "<span class=\"diffadd\">"; 807 + s.iEnd = s.width; 808 + sbsWriteText(&s, &B[b], SBS_NEWLINE); 809 + blob_append(pOut, s.zLine, s.n); 810 + assert( mb>0 ); 811 + mb--; 812 + b++; 813 + } 568 814 } 569 - a += ma; 570 - for(j=0; j<mb; j++){ 571 - s.n = 0; 572 - sbsWriteSpace(&s, width + 7); 573 - sbsWrite(&s, " > ", 3); 574 - sbsWriteLineno(&s, b+j); 575 - sbsWriteHtml(&s, "<span class=\"diffadd\">"); 576 - sbsWriteText(&s, &B[b+j], SBS_NEWLINE | SBS_ENDSPAN); 577 - blob_append(pOut, s.zLine, s.n); 578 - } 579 - b += mb; 815 + fossil_free(alignment); 580 816 if( i<nr-1 ){ 581 817 m = R[r+i*3+3]; 582 818 for(j=0; j<m; j++){ 583 819 s.n = 0; 584 820 sbsWriteLineno(&s, a+j); 821 + s.iStart = s.iEnd = -1; 585 822 sbsWriteText(&s, &A[a+j], SBS_PAD); 586 823 sbsWrite(&s, " ", 3); 587 824 sbsWriteLineno(&s, b+j); 588 825 sbsWriteText(&s, &B[b+j], SBS_NEWLINE); 589 826 blob_append(pOut, s.zLine, s.n); 590 827 } 591 828 b += m; ................................................................................ 596 833 /* Show the final common area */ 597 834 assert( nr==i ); 598 835 m = R[r+nr*3]; 599 836 if( m>nContext ) m = nContext; 600 837 for(j=0; j<m; j++){ 601 838 s.n = 0; 602 839 sbsWriteLineno(&s, a+j); 840 + s.iStart = s.iEnd = -1; 603 841 sbsWriteText(&s, &A[a+j], SBS_PAD); 604 842 sbsWrite(&s, " ", 3); 605 843 sbsWriteLineno(&s, b+j); 606 844 sbsWriteText(&s, &B[b+j], SBS_NEWLINE); 607 845 blob_append(pOut, s.zLine, s.n); 608 846 } 609 847 } ................................................................................ 941 1179 lnTo--; 942 1180 p->aEdit[r]--; 943 1181 p->aEdit[r+3]++; 944 1182 cpy--; 945 1183 } 946 1184 947 1185 /* Shift insertions toward the end of the file */ 948 - while( p->aEdit[r+3]>0 && del==0 && ins>0 ){ 1186 + while( r+3<p->nEdit && p->aEdit[r+3]>0 && del==0 && ins>0 ){ 949 1187 DLine *pTop = &p->aTo[lnTo]; /* First line inserted */ 950 1188 DLine *pBtm = &p->aTo[lnTo+ins]; /* First line past end of insert */ 951 1189 if( same_dline(pTop, pBtm)==0 ) break; 952 1190 if( LENGTH(pTop)+LENGTH(pBtm-1)<=LENGTH(pTop+1)+LENGTH(pBtm) ) break; 953 1191 lnFrom++; 954 1192 lnTo++; 955 1193 p->aEdit[r]++; ................................................................................ 967 1205 lnTo--; 968 1206 p->aEdit[r]--; 969 1207 p->aEdit[r+3]++; 970 1208 cpy--; 971 1209 } 972 1210 973 1211 /* Shift deletions toward the end of the file */ 974 - while( p->aEdit[r+3]>0 && del>0 && ins==0 ){ 1212 + while( r+3<p->nEdit && p->aEdit[r+3]>0 && del>0 && ins==0 ){ 975 1213 DLine *pTop = &p->aFrom[lnFrom]; /* First line deleted */ 976 1214 DLine *pBtm = &p->aFrom[lnFrom+del]; /* First line past end of delete */ 977 1215 if( same_dline(pTop, pBtm)==0 ) break; 978 1216 if( LENGTH(pTop)+LENGTH(pBtm-1)<=LENGTH(pTop)+LENGTH(pBtm) ) break; 979 1217 lnFrom++; 980 1218 lnTo++; 981 1219 p->aEdit[r]++;
Changes to src/style.c.
762 762 { "div.udiff", 763 763 "context diff display", 764 764 @ font-family: monospace; 765 765 @ white-space: pre; 766 766 }, 767 767 { "span.diffchng", 768 768 "changes in a diff", 769 - @ background-color: #ffffc8; 769 + @ background-color: #e0e0ff; 770 770 }, 771 771 { "span.diffadd", 772 772 "added code in a diff", 773 773 @ background-color: #e0ffe0; 774 774 }, 775 775 { "span.diffrm", 776 776 "deleted in a diff",