Changes On Branch diff-experimental
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch diff-experimental Excluding Merge-Ins

This is equivalent to a diff from 98cf5c33bc to 5d836cbda7

2012-02-06
15:21
Merge the diff enhancements from the diff-experimental branch into trunk. check-in: bba7aea8ca user: drh tags: trunk
15:02
Tweak to side-by-side alignment: Be more aggressive about marking lines as changed if they are naturally aligned to begin with. Closed-Leaf check-in: 5d836cbda7 user: drh tags: diff-experimental
14:28
Adjust the alignment similarity cutoff score. check-in: 9713e42356 user: drh tags: diff-experimental
01:55
Trying out a greedy algorithm for aligning the two sides of a change with side-by-side diff. This helps in some cases, but we could probably benefit from a better algorithm. check-in: 881b65141b user: drh tags: diff-experimental
2012-02-05
20:22
Add the "diff optimizer" which tries to shift inserts and deletes to align with natural boundaries in the text. The resulting diff is no more or less correct than the original; it just seems more natural to human readers. check-in: 98cf5c33bc user: drh tags: trunk
17:19
Rearrange code and edit comments in diff logic, for clarity of presentation. No functional changes. check-in: 032da543f0 user: drh tags: trunk

Changes to src/diff.c.

   353    353   ** Status of a single output line
   354    354   */
   355    355   typedef struct SbsLine SbsLine;
   356    356   struct SbsLine {
   357    357     char *zLine;             /* The output line under construction */
   358    358     int n;                   /* Index of next unused slot in the zLine[] */
   359    359     int width;               /* Maximum width of a column in the output */
   360         -  unsigned char escHtml;  /* True to escape html characters */
          360  +  unsigned char escHtml;   /* True to escape html characters */
          361  +  int iStart;              /* Write zStart prior to character iStart */
          362  +  const char *zStart;      /* A <span> tag */
          363  +  int iEnd;                /* Write </span> prior to character iEnd */
   361    364   };
   362    365   
   363    366   /*
   364    367   ** Flags for sbsWriteText()
   365    368   */
   366         -#define SBS_NEWLINE  0x0001   /* End with \n\000 */
   367         -#define SBS_PAD      0x0002   /* Pad output to width spaces */
   368         -#define SBS_ENDSPAN  0x0004   /* Write a </span> after text */
          369  +#define SBS_NEWLINE      0x0001   /* End with \n\000 */
          370  +#define SBS_PAD          0x0002   /* Pad output to width spaces */
   369    371   
   370    372   /*
   371    373   ** Write up to width characters of pLine into z[].  Translate tabs into
   372    374   ** spaces.  Add a newline if SBS_NEWLINE is set.  Translate HTML characters
   373    375   ** if SBS_HTML is set.  Pad the rendering out width bytes if SBS_PAD is set.
   374    376   */
   375    377   static void sbsWriteText(SbsLine *p, DLine *pLine, unsigned flags){
................................................................................
   378    380     int j;   /* Number of output characters generated */
   379    381     int k;   /* Cursor position */
   380    382     const char *zIn = pLine->z;
   381    383     char *z = &p->zLine[p->n];
   382    384     int w = p->width;
   383    385     for(i=j=k=0; k<w && i<n; i++, k++){
   384    386       char c = zIn[i];
          387  +    if( p->escHtml ){
          388  +      if( i==p->iStart ){
          389  +        int x = strlen(p->zStart);
          390  +        memcpy(z+j, p->zStart, x);
          391  +        j += x;
          392  +      }else if( i==p->iEnd ){
          393  +        memcpy(z+j, "</span>", 7);
          394  +        j += 7;
          395  +      }
          396  +    }
   385    397       if( c=='\t' ){
   386    398         z[j++] = ' ';
   387    399         while( (k&7)!=7 && k<w ){ z[j++] = ' '; k++; }
   388    400       }else if( c=='\r' || c=='\f' ){
   389    401         z[j++] = ' ';
   390    402       }else if( c=='<' && p->escHtml ){
   391    403         memcpy(&z[j], "&lt;", 4);
................................................................................
   396    408       }else if( c=='>' && p->escHtml ){
   397    409         memcpy(&z[j], "&gt;", 4);
   398    410         j += 4;
   399    411       }else{
   400    412         z[j++] = c;
   401    413       }
   402    414     }
   403         -  if( (flags & SBS_ENDSPAN) && p->escHtml ){
          415  +  if( p->escHtml && i<=p->iEnd ){
   404    416       memcpy(&z[j], "</span>", 7);
   405    417       j += 7;
   406    418     }
   407    419     if( (flags & SBS_PAD)!=0 ){
   408    420       while( k<w ){ k++;  z[j++] = ' '; }
   409    421     }
   410    422     if( flags & SBS_NEWLINE ){
................................................................................
   442    454     sbsWriteHtml(p, "<span class=\"diffln\">");
   443    455     sqlite3_snprintf(7, &p->zLine[p->n], "%5d ", ln+1);
   444    456     p->n += 6;
   445    457     sbsWriteHtml(p, "</span>");
   446    458     p->zLine[p->n++] = ' ';
   447    459   }
   448    460   
          461  +/*
          462  +** Write out lines that have been edited.  Adjust the highlight to cover
          463  +** only those parts of the line that actually changed.
          464  +*/
          465  +static void sbsWriteLineChange(
          466  +  SbsLine *p,          /* The SBS output line */
          467  +  DLine *pLeft,        /* Left line of the change */
          468  +  int lnLeft,          /* Line number for the left line */
          469  +  DLine *pRight,       /* Right line of the change */
          470  +  int lnRight          /* Line number of the right line */
          471  +){
          472  +  int nLeft;           /* Length of left line in bytes */
          473  +  int nRight;          /* Length of right line in bytes */
          474  +  int nPrefix;         /* Length of common prefix */
          475  +  int nSuffix;         /* Length of common suffix */
          476  +  int width;           /* Total column width */
          477  +  const char *zLeft;   /* Text of the left line */
          478  +  const char *zRight;  /* Text of the right line */
          479  +
          480  +  nLeft = pLeft->h & LENGTH_MASK;
          481  +  zLeft = pLeft->z;
          482  +  nRight = pRight->h & LENGTH_MASK;
          483  +  zRight = pRight->z;
          484  +
          485  +  nPrefix = 0;
          486  +  while( nPrefix<nLeft && nPrefix<nRight && zLeft[nPrefix]==zRight[nPrefix] ){
          487  +    nPrefix++;
          488  +  }
          489  +  nSuffix = 0;
          490  +  if( nPrefix<nLeft && nPrefix<nRight ){
          491  +    while( nSuffix<nLeft && nSuffix<nRight
          492  +           && zLeft[nLeft-nSuffix-1]==zRight[nRight-nSuffix-1] ){
          493  +      nSuffix++;
          494  +    }
          495  +    if( nSuffix==nLeft || nSuffix==nRight ) nPrefix = 0;
          496  +  }
          497  +  if( nPrefix+nSuffix > nLeft ) nSuffix = nLeft - nPrefix;
          498  +  if( nPrefix+nSuffix > nRight ) nSuffix = nRight - nPrefix;
          499  +  if( nPrefix+nSuffix==nLeft ){
          500  +    /* Text inserted on the right */
          501  +    sbsWriteLineno(p, lnLeft);
          502  +    p->iStart = p->iEnd = -1;
          503  +    sbsWriteText(p, pLeft, SBS_PAD);
          504  +    sbsWrite(p, " | ", 3);
          505  +    sbsWriteLineno(p, lnRight);
          506  +    p->iStart = nPrefix;
          507  +    p->iEnd = nRight - nSuffix;
          508  +    p->zStart = "<span class=\"diffadd\">";
          509  +    sbsWriteText(p, pRight, SBS_NEWLINE);
          510  +  }else if( nPrefix+nSuffix==nRight ){
          511  +    /* Text deleted from the left */
          512  +    sbsWriteLineno(p, lnLeft);
          513  +    p->iStart = nPrefix;
          514  +    p->iEnd = nLeft - nSuffix;
          515  +    p->zStart = "<span class=\"diffrm\">";
          516  +    sbsWriteText(p, pLeft, SBS_PAD);
          517  +    sbsWrite(p, " | ", 3);
          518  +    sbsWriteLineno(p, lnRight);
          519  +    p->iStart = p->iEnd = -1;
          520  +    sbsWriteText(p, pRight, SBS_NEWLINE);
          521  +  }else{
          522  +    /* Text modified between left and right */
          523  +    sbsWriteLineno(p, lnLeft);
          524  +    p->iStart = nPrefix;
          525  +    p->iEnd = nLeft - nSuffix;
          526  +    p->zStart = "<span class=\"diffchng\">";
          527  +    sbsWriteText(p, pLeft, SBS_PAD);
          528  +    sbsWrite(p, " | ", 3);
          529  +    sbsWriteLineno(p, lnRight);
          530  +    p->iEnd = nRight - nSuffix;
          531  +    sbsWriteText(p, pRight, SBS_NEWLINE);
          532  +  }
          533  +}
          534  +
          535  +/*
          536  +** Return the number between 0 and 100 that is smaller the closer pA and
          537  +** pB match.  Return 0 for a perfect match.  Return 100 if pA and pB are
          538  +** completely different.
          539  +*/
          540  +static int match_dline(DLine *pA, DLine *pB){
          541  +  const char *zA;
          542  +  const char *zB;
          543  +  int nA;
          544  +  int nB;
          545  +  int minDist;
          546  +  int i;
          547  +  int nMatch;
          548  +  int score;
          549  +
          550  +  zA = pA->z;
          551  +  zB = pB->z;
          552  +  nA = pA->h & LENGTH_MASK;
          553  +  nB = pB->h & LENGTH_MASK;
          554  +  while( nA>0 && fossil_isspace(zA[0]) ){ nA--; zA++; }
          555  +  while( nA>0 && fossil_isspace(zA[nA-1]) ){ nA--; }
          556  +  while( nB>0 && fossil_isspace(zB[0]) ){ nB--; zB++; }
          557  +  while( nB>0 && fossil_isspace(zB[nB-1]) ){ nB--; }
          558  +  minDist = nA;
          559  +  if( minDist>nB ) minDist = nB;
          560  +  if( minDist==0 ){
          561  +    score = 100;
          562  +  }else{
          563  +    for(i=0; i<nA && i<nB && zA[i]==zB[i]; i++){}
          564  +    nMatch = i;
          565  +    for(i=1; i<=nA && i<=nB && zA[nA-i]==zB[nB-i]; i++){}
          566  +    nMatch += i-1;
          567  +    score = (nMatch >= minDist) ? 0 : ((minDist - nMatch)*100)/minDist;
          568  +  }
          569  +  return score;
          570  +}
          571  +
          572  +/*
          573  +** There is a change block in which nLeft lines of text on the left are
          574  +** converted into nRight lines of text on the right.  This routine computes
          575  +** how the lines on the left line up with the lines on the right.
          576  +**
          577  +** The return value is a buffer of unsigned characters, obtained from
          578  +** fossil_malloc().  (The caller needs to free the return value using
          579  +** fossil_free().)  Entries in the returned array have values as follows:
          580  +**
          581  +**    1.   Delete the next line of pLeft.
          582  +**    2.   The next line of pLeft changes into the next line of pRight.
          583  +**    3.   Insert the next line of pRight.
          584  +**
          585  +** The length of the returned array will be just large enough to cause
          586  +** all elements of pLeft and pRight to be consumed.
          587  +**
          588  +** Algorithm:  Wagner's minimum edit-distance algorithm, modified by
          589  +** adding a cost to each match based on how well the two rows match
          590  +** each other.  Insertion and deletion costs are 50.  Match costs
          591  +** are between 0 and 100 where 0 is a perfect match 100 is a complete
          592  +** mismatch.
          593  +*/
          594  +static unsigned char *sbsAlignment(
          595  +   DLine *aLeft, int nLeft,       /* Text on the left */
          596  +   DLine *aRight, int nRight      /* Text on the right */
          597  +){
          598  +  int i, j, k;                 /* Loop counters */
          599  +  int *a;                      /* One row of the Wagner matrix */
          600  +  int *pToFree;                /* Space that needs to be freed */
          601  +  unsigned char *aM;           /* Wagner result matrix */
          602  +  int aBuf[100];               /* Stack space for a[] if nRight not to big */
          603  +
          604  +  aM = fossil_malloc( (nLeft+1)*(nRight+1) );
          605  +  if( nLeft==0 ){
          606  +    memset(aM, 3, nRight);
          607  +    return aM;
          608  +  }
          609  +  if( nRight==0 ){
          610  +    memset(aM, 1, nLeft);
          611  +    return aM;
          612  +  }
          613  +  if( nRight < (sizeof(aBuf)/sizeof(aBuf[0]))-1 ){
          614  +    pToFree = 0;
          615  +    a = aBuf;
          616  +  }else{
          617  +    a = pToFree = fossil_malloc( sizeof(a[0])*(nRight+1) );
          618  +  }
          619  +
          620  +  /* Compute the best alignment */
          621  +  for(i=0; i<=nRight; i++){
          622  +    aM[i] = 3;
          623  +    a[i] = i*50;
          624  +  }
          625  +  aM[0] = 0;
          626  +  for(j=1; j<=nLeft; j++){
          627  +    int p = a[0];
          628  +    a[0] = p+50;
          629  +    aM[j*(nRight+1)] = 1;
          630  +    for(i=1; i<=nRight; i++){
          631  +      int m = a[i-1]+50;
          632  +      int d = 3;
          633  +      if( m>a[i]+50 ){
          634  +        m = a[i]+50;
          635  +        d = 1;
          636  +      }
          637  +      if( m>p ){
          638  +        int score = match_dline(&aLeft[j-1], &aRight[i-1]);
          639  +        if( (score<66 || (i<j+1 && i>j-1)) && m>p+score ){
          640  +          m = p+score;
          641  +          d = 2;
          642  +        }
          643  +      }
          644  +      p = a[i];
          645  +      a[i] = m;
          646  +      aM[j*(nRight+1)+i] = d;
          647  +    }
          648  +  }
          649  +
          650  +  /* Compute the lowest-cost path back through the matrix */
          651  +  i = nRight;
          652  +  j = nLeft;
          653  +  k = (nRight+1)*(nLeft+1)-1;
          654  +  while( i+j>0 ){
          655  +    unsigned char c = aM[k--];
          656  +    if( c==2 ){
          657  +      assert( i>0 && j>0 );
          658  +      i--;
          659  +      j--;
          660  +    }else if( c==3 ){
          661  +      assert( i>0 );
          662  +      i--;
          663  +    }else{
          664  +      assert( j>0 );
          665  +      j--;
          666  +    }
          667  +    aM[k] = aM[j*(nRight+1)+i];
          668  +  }
          669  +  k++;
          670  +  i = (nRight+1)*(nLeft+1) - k;
          671  +  memmove(aM, &aM[k], i);
          672  +
          673  +  /* Return the result */
          674  +  fossil_free(pToFree);
          675  +  return aM;
          676  +}
   449    677   
   450    678   /*
   451    679   ** Given a diff context in which the aEdit[] array has been filled
   452    680   ** in, compute a side-by-side diff into pOut.
   453    681   */
   454    682   static void sbsDiff(
   455    683     DContext *p,       /* The computed diff */
................................................................................
   472    700     int skip;     /* Number of lines to skip */
   473    701     SbsLine s;    /* Output line buffer */
   474    702   
   475    703     s.zLine = fossil_malloc( 10*width + 100 );
   476    704     if( s.zLine==0 ) return;
   477    705     s.width = width;
   478    706     s.escHtml = escHtml;
          707  +  s.iStart = -1;
          708  +  s.iEnd = -1;
   479    709     A = p->aFrom;
   480    710     B = p->aTo;
   481    711     R = p->aEdit;
   482    712     mxr = p->nEdit;
   483    713     while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; }
   484    714     for(r=0; r<mxr; r += 3*nr){
   485    715       /* Figure out how many triples to show in a single block */
................................................................................
   525    755       /* Show the initial common area */
   526    756       a += skip;
   527    757       b += skip;
   528    758       m = R[r] - skip;
   529    759       for(j=0; j<m; j++){
   530    760         s.n = 0;
   531    761         sbsWriteLineno(&s, a+j);
          762  +      s.iStart = s.iEnd = -1;
   532    763         sbsWriteText(&s, &A[a+j], SBS_PAD);
   533    764         sbsWrite(&s, "   ", 3);
   534    765         sbsWriteLineno(&s, b+j);
   535    766         sbsWriteText(&s, &B[b+j], SBS_NEWLINE);
   536    767         blob_append(pOut, s.zLine, s.n);
   537    768       }
   538    769       a += m;
   539    770       b += m;
   540    771   
   541    772       /* Show the differences */
   542    773       for(i=0; i<nr; i++){
   543         -      ma = R[r+i*3+1];
   544         -      mb = R[r+i*3+2];
   545         -      m = ma<mb ? ma : mb;
   546         -      for(j=0; j<m; j++){
   547         -        s.n = 0;
   548         -        sbsWriteLineno(&s, a+j);
   549         -        sbsWriteHtml(&s, "<span class=\"diffchng\">");
   550         -        sbsWriteText(&s, &A[a+j], SBS_PAD | SBS_ENDSPAN);
   551         -        sbsWrite(&s, " | ", 3);
   552         -        sbsWriteLineno(&s, b+j);
   553         -        sbsWriteHtml(&s, "<span class=\"diffchng\">");
   554         -        sbsWriteText(&s, &B[b+j], SBS_NEWLINE | SBS_ENDSPAN);
   555         -        blob_append(pOut, s.zLine, s.n);
   556         -      }
   557         -      a += m;
   558         -      b += m;
   559         -      ma -= m;
   560         -      mb -= m;
   561         -      for(j=0; j<ma; j++){
   562         -        s.n = 0;
   563         -        sbsWriteLineno(&s, a+j);
   564         -        sbsWriteHtml(&s, "<span class=\"diffrm\">");
   565         -        sbsWriteText(&s, &A[a+j], SBS_PAD | SBS_ENDSPAN);
   566         -        sbsWrite(&s, " <\n", 3);
   567         -        blob_append(pOut, s.zLine, s.n);
          774  +      unsigned char *alignment;
          775  +      ma = R[r+i*3+1];   /* Lines on left but not on right */
          776  +      mb = R[r+i*3+2];   /* Lines on right but not on left */
          777  +      alignment = sbsAlignment(&A[a], ma, &B[b], mb);
          778  +      for(j=0; ma+mb>0; j++){
          779  +        if( alignment[j]==1 ){
          780  +          s.n = 0;
          781  +          sbsWriteLineno(&s, a);
          782  +          s.iStart = 0;
          783  +          s.zStart = "<span class=\"diffrm\">";
          784  +          s.iEnd = s.width;
          785  +          sbsWriteText(&s, &A[a], SBS_PAD);
          786  +          sbsWrite(&s, " <\n", 3);
          787  +          blob_append(pOut, s.zLine, s.n);
          788  +          assert( ma>0 );
          789  +          ma--;
          790  +          a++;
          791  +        }else if( alignment[j]==2 ){
          792  +          s.n = 0;
          793  +          sbsWriteLineChange(&s, &A[a], a, &B[b], b);
          794  +          blob_append(pOut, s.zLine, s.n);
          795  +          assert( ma>0 && mb>0 );
          796  +          ma--;
          797  +          mb--;
          798  +          a++;
          799  +          b++;
          800  +        }else{
          801  +          s.n = 0;
          802  +          sbsWriteSpace(&s, width + 7);
          803  +          sbsWrite(&s, " > ", 3);
          804  +          sbsWriteLineno(&s, b);
          805  +          s.iStart = 0;
          806  +          s.zStart = "<span class=\"diffadd\">";
          807  +          s.iEnd = s.width;
          808  +          sbsWriteText(&s, &B[b], SBS_NEWLINE);
          809  +          blob_append(pOut, s.zLine, s.n);
          810  +          assert( mb>0 );
          811  +          mb--;
          812  +          b++;
          813  +        }
   568    814         }
   569         -      a += ma;
   570         -      for(j=0; j<mb; j++){
   571         -        s.n = 0;
   572         -        sbsWriteSpace(&s, width + 7);
   573         -        sbsWrite(&s, " > ", 3);
   574         -        sbsWriteLineno(&s, b+j);
   575         -        sbsWriteHtml(&s, "<span class=\"diffadd\">");
   576         -        sbsWriteText(&s, &B[b+j], SBS_NEWLINE | SBS_ENDSPAN);
   577         -        blob_append(pOut, s.zLine, s.n);
   578         -      }
   579         -      b += mb;
          815  +      fossil_free(alignment);
   580    816         if( i<nr-1 ){
   581    817           m = R[r+i*3+3];
   582    818           for(j=0; j<m; j++){
   583    819             s.n = 0;
   584    820             sbsWriteLineno(&s, a+j);
          821  +          s.iStart = s.iEnd = -1;
   585    822             sbsWriteText(&s, &A[a+j], SBS_PAD);
   586    823             sbsWrite(&s, "   ", 3);
   587    824             sbsWriteLineno(&s, b+j);
   588    825             sbsWriteText(&s, &B[b+j], SBS_NEWLINE);
   589    826             blob_append(pOut, s.zLine, s.n);
   590    827           }
   591    828           b += m;
................................................................................
   596    833       /* Show the final common area */
   597    834       assert( nr==i );
   598    835       m = R[r+nr*3];
   599    836       if( m>nContext ) m = nContext;
   600    837       for(j=0; j<m; j++){
   601    838         s.n = 0;
   602    839         sbsWriteLineno(&s, a+j);
          840  +      s.iStart = s.iEnd = -1;
   603    841         sbsWriteText(&s, &A[a+j], SBS_PAD);
   604    842         sbsWrite(&s, "   ", 3);
   605    843         sbsWriteLineno(&s, b+j);
   606    844         sbsWriteText(&s, &B[b+j], SBS_NEWLINE);
   607    845         blob_append(pOut, s.zLine, s.n);
   608    846       }
   609    847     }
................................................................................
   941   1179         lnTo--;
   942   1180         p->aEdit[r]--;
   943   1181         p->aEdit[r+3]++;
   944   1182         cpy--;
   945   1183       }
   946   1184   
   947   1185       /* Shift insertions toward the end of the file */
   948         -    while( p->aEdit[r+3]>0 && del==0 && ins>0 ){
         1186  +    while( r+3<p->nEdit && p->aEdit[r+3]>0 && del==0 && ins>0 ){
   949   1187         DLine *pTop = &p->aTo[lnTo];       /* First line inserted */
   950   1188         DLine *pBtm = &p->aTo[lnTo+ins];   /* First line past end of insert */
   951   1189         if( same_dline(pTop, pBtm)==0 ) break;
   952   1190         if( LENGTH(pTop)+LENGTH(pBtm-1)<=LENGTH(pTop+1)+LENGTH(pBtm) ) break;
   953   1191         lnFrom++;
   954   1192         lnTo++;
   955   1193         p->aEdit[r]++;
................................................................................
   967   1205         lnTo--;
   968   1206         p->aEdit[r]--;
   969   1207         p->aEdit[r+3]++;
   970   1208         cpy--;
   971   1209       }
   972   1210   
   973   1211       /* Shift deletions toward the end of the file */
   974         -    while( p->aEdit[r+3]>0 && del>0 && ins==0 ){
         1212  +    while( r+3<p->nEdit && p->aEdit[r+3]>0 && del>0 && ins==0 ){
   975   1213         DLine *pTop = &p->aFrom[lnFrom];     /* First line deleted */
   976   1214         DLine *pBtm = &p->aFrom[lnFrom+del]; /* First line past end of delete */
   977   1215         if( same_dline(pTop, pBtm)==0 ) break;
   978   1216         if( LENGTH(pTop)+LENGTH(pBtm-1)<=LENGTH(pTop)+LENGTH(pBtm) ) break;
   979   1217         lnFrom++;
   980   1218         lnTo++;
   981   1219         p->aEdit[r]++;

Changes to src/style.c.

   762    762     { "div.udiff",
   763    763       "context diff display",
   764    764       @   font-family: monospace;
   765    765       @   white-space: pre;
   766    766     },
   767    767     { "span.diffchng",
   768    768       "changes in a diff",
   769         -    @   background-color: #ffffc8;
          769  +    @   background-color: #e0e0ff;
   770    770     },
   771    771     { "span.diffadd",
   772    772       "added code in a diff",
   773    773       @   background-color: #e0ffe0;
   774    774     },
   775    775     { "span.diffrm",
   776    776       "deleted in a diff",