Changes On Branch commitWarning
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch commitWarning Excluding Merge-Ins

This is equivalent to a diff from 70b4f105eb to d57f0a9361

2012-11-01
03:44
Merge commit warning and looks_like_text() enhancements to trunk. Further changes based on these will occur on a branch. check-in: 6182584217 user: mistachkin tags: trunk
2012-10-30
10:23
Fix a typo in a comment. check-in: 1167d7b145 user: drh tags: trunk
08:06
take over various stylistic adjustments check-in: ab5a6b436e user: jan.nijtmans tags: use-blob_strip_bom
02:17
Refactor commit warning functionality. Break out UTF-16 BOM detection into a new function. Style and comment fixes. Closed-Leaf check-in: d57f0a9361 user: mistachkin tags: commitWarning
2012-10-29
15:57
(experiment) Allow 'c' for converting unicode/crlf files at user request, not complete yet. check-in: 121394b04f user: jan.nijtmans tags: use-blob_strip_bom
14:45
Allow unicode files for Wiki-rendering on Windows. Trying to commit a file with an UTF-16 BOM will now give a warning, just as a file containing crlf The --args command line option now accept UTF-16 files as well check-in: 70b4f105eb user: jan.nijtmans tags: trunk
14:35
fix "possible unresolved merge conflict" warning message to show the pathname relative to the wd in stead of the root check-in: 2a15d87edb user: jan.nijtmans tags: trunk
14:04
fix handling of "crlf-glob" option: UTF-16 BOM's always give a warning now Closed-Leaf check-in: 9267dba498 user: jan.nijtmans tags: use-blob_strip_bom

Changes to src/checkin.c.

   880    880     md5sum_blob(pOut, &mcksum);
   881    881     blob_appendf(pOut, "Z %b\n", &mcksum);
   882    882     if( pnFBcard ) *pnFBcard = nFBcard;
   883    883   }
   884    884   
   885    885   /*
   886    886   ** Issue a warning and give the user an opportunity to abandon out
   887         -** if unicode or a \r\n line ending is seen in a text file.
          887  +** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending
          888  +** is seen in a text file.
   888    889   */
   889         -static void encoding_warning(const Blob *p, int crnlOk, const char *zFilename){
   890         -  int looksLike;          /* return value of looks_like_text() */
          890  +static void commit_warning(const Blob *p, int crnlOk, const char *zFilename){
          891  +  int eType;              /* return value of looks_like_text() */
          892  +  int fUnicode;           /* return value of starts_with_utf16_bom() */
   891    893     char *zMsg;             /* Warning message */
   892    894     Blob fname;             /* Relative pathname of the file */
   893    895     static int allOk = 0;   /* Set to true to disable this routine */
   894    896   
   895    897     if( allOk ) return;
   896         -  looksLike = looks_like_text(p);
   897         -  if( looksLike<0 ){
   898         -    const char *type;
          898  +  eType = looks_like_text(p);
          899  +  fUnicode = starts_with_utf16_bom(p);
          900  +  if( eType==-1 || fUnicode ){
          901  +    const char *zWarning;
   899    902       Blob ans;
   900    903       char cReply;
   901    904   
   902         -    if( looksLike&1 ){
          905  +    if( eType==-1 && fUnicode ){
          906  +      zWarning = "Unicode and CR/NL line endings";
          907  +    }else if( eType==-1 ){
   903    908         if( crnlOk ){
   904         -        return; /* We don't want CrLf warnings for this file. */
          909  +        return; /* We don't want CR/NL warnings for this file. */
   905    910         }
   906         -      type = "CR/NL line endings";
          911  +      zWarning = "CR/NL line endings";
   907    912       }else{
   908         -      type = "unicode";
          913  +      zWarning = "Unicode";
   909    914       }
   910    915       file_relative_name(zFilename, &fname, 0);
   911    916       blob_zero(&ans);
   912    917       zMsg = mprintf(
   913    918            "%s contains %s; commit anyhow (a=all/y/N)?",
   914         -         blob_str(&fname), type);
          919  +         blob_str(&fname), zWarning);
   915    920       prompt_user(zMsg, &ans);
   916    921       fossil_free(zMsg);
   917    922       cReply = blob_str(&ans)[0];
   918    923       if( cReply=='a' || cReply=='A' ){
   919    924         allOk = 1;
   920    925       }else if( cReply!='y' && cReply!='Y' ){
   921    926         fossil_fatal("Abandoning commit due to %s in %s",
   922         -                   type, blob_str(&fname));
          927  +                   zWarning, blob_str(&fname));
   923    928       }
   924    929       blob_reset(&ans);
   925    930       blob_reset(&fname);
   926    931     }
   927    932   }
   928    933   
   929    934   /*
................................................................................
  1230   1235       blob_zero(&content);
  1231   1236       if( file_wd_islink(zFullname) ){
  1232   1237         /* Instead of file content, put link destination path */
  1233   1238         blob_read_link(&content, zFullname);
  1234   1239       }else{
  1235   1240         blob_read_from_file(&content, zFullname);
  1236   1241       }
  1237         -    encoding_warning(&content, crnlOk, zFullname);
         1242  +    commit_warning(&content, crnlOk, zFullname);
  1238   1243       if( chnged==1 && contains_merge_marker(&content) ){
  1239   1244         Blob fname; /* Relative pathname of the file */
  1240   1245   
  1241   1246         nConflict++;
  1242   1247         file_relative_name(zFullname, &fname, 0);
  1243   1248         fossil_print("possible unresolved merge conflict in %s\n",
  1244   1249                      blob_str(&fname));

Changes to src/diff.c.

    46     46   */
    47     47   #define DIFF_CANNOT_COMPUTE_BINARY \
    48     48       "cannot compute difference between binary files\n"
    49     49   
    50     50   #define DIFF_CANNOT_COMPUTE_SYMLINK \
    51     51       "cannot compute difference between symlink and regular file\n"
    52     52   
    53         -#define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0)
           53  +#define looks_like_binary(blob) (looks_like_text((blob)) == 0)
    54     54   #endif /* INTERFACE */
    55     55   
    56     56   /*
    57     57   ** Maximum length of a line in a text file.  (8192)
    58     58   */
    59     59   #define LENGTH_MASK_SZ  13
    60     60   #define LENGTH_MASK     ((1<<LENGTH_MASK_SZ)-1)
................................................................................
   168    168   
   169    169     /* Return results */
   170    170     *pnLine = nLine;
   171    171     return a;
   172    172   }
   173    173   
   174    174   /*
   175         -** Returns 1, if everything OK
   176         -** Returns 0 if the specified content appears to be binary or
   177         -** contains a line that is too long
   178         -** Returns -1, if the file appears text, but it contains CrLf
   179         -** Returns -2, if the file starts with an UTF-16 BOM (le or be)
          175  +** This function attempts to scan each logical line within the blob to
          176  +** determine the type of content it appears to contain.  Possible return
          177  +** values are:
          178  +**
          179  +**  (1) -- The content appears to consist entirely of text, with lines
          180  +**         delimited by line-feed characters; however, the encoding may
          181  +**         not be UTF-8.
          182  +**
          183  +**  (0) -- The content appears to be binary because it contains embedded
          184  +**         NUL (\000) characters or an extremely long line.  Since this
          185  +**         function does not understand UTF-16, it may falsely consider
          186  +**         UTF-16 text to be binary.
          187  +**
          188  +** (-1) -- The content appears to consist entirely of text, with lines
          189  +**         delimited by carriage-return, line-feed pairs; however, the
          190  +**         encoding may not be UTF-8.
          191  +**
   180    192   */
   181    193   int looks_like_text(const Blob *pContent){
   182    194     const char *z = blob_buffer(pContent);
   183    195     unsigned int n = blob_size(pContent);
   184    196     int j, c;
   185         -  int result = 1;  /* Assume text with no CrLf */
          197  +  int result = 1;  /* Assume text with no CR/NL */
   186    198   
   187    199     /* Check individual lines.
   188    200     */
   189    201     if( n==0 ) return result;  /* Empty file -> text */
   190    202     c = *z;
   191    203     if( c==0 ) return 0;  /* \000 byte in a file -> binary */
   192         -  if ( n > 1 ){
   193         -    if ( (c==(char)0xff) && (z[1]==(char)0xfe) ){
   194         -      return -2;
   195         -    } else if ( (c==(char)0xfe) && (z[1]==(char)0xff) ){
   196         -      return -2;
   197         -    }
   198         -  }
   199    204     j = (c!='\n');
   200    205     while( --n>0 ){
   201    206       c = *++z; ++j;
   202    207       if( c==0 ) return 0;  /* \000 byte in a file -> binary */
   203    208       if( c=='\n' ){
   204    209         if( z[-1]=='\r' ){
   205         -        result = -1;  /* Contains CrLf, continue */
          210  +        result = -1;  /* Contains CR/NL, continue */
   206    211         }
   207    212         if( j>LENGTH_MASK ){
   208    213           return 0;  /* Very long line -> binary */
   209    214         }
   210    215         j = 0;
   211    216       }
   212    217     }
   213    218     if( j>LENGTH_MASK ){
   214    219       return 0;  /* Very long line -> binary */
   215    220     }
   216    221     return result;  /* No problems seen -> not binary */
   217    222   }
          223  +
          224  +/*
          225  +** This function returns non-zero if the blob starts with a UTF-16le or
          226  +** UTF-16be byte-order-mark (BOM).
          227  +*/
          228  +int starts_with_utf16_bom(const Blob *pContent){
          229  +  const char *z = blob_buffer(pContent);
          230  +  int c1, c2;
          231  +
          232  +  if( blob_size(pContent)<2 ) return 0;
          233  +  c1 = z[0]; c2 = z[1];
          234  +  if( (c1==(char)0xff) && (c2==(char)0xfe) ){
          235  +    return 1;
          236  +  }else if( (c1==(char)0xff) && (c2==(char)0xfe) ){
          237  +    return 1;
          238  +  }
          239  +  return 0;
          240  +}
   218    241   
   219    242   /*
   220    243   ** Return true if two DLine elements are identical.
   221    244   */
   222    245   static int same_dline(DLine *pA, DLine *pB){
   223    246     return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0;
   224    247   }