Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Changes In Branch commitWarning Excluding Merge-Ins
This is equivalent to a diff from 70b4f105eb to d57f0a9361
2012-11-01
| ||
03:44 | Merge commit warning and looks_like_text() enhancements to trunk. Further changes based on these will occur on a branch. check-in: 6182584217 user: mistachkin tags: trunk | |
2012-10-30
| ||
10:23 | Fix a typo in a comment. check-in: 1167d7b145 user: drh tags: trunk | |
08:06 | take over various stylistic adjustments check-in: ab5a6b436e user: jan.nijtmans tags: use-blob_strip_bom | |
02:17 | Refactor commit warning functionality. Break out UTF-16 BOM detection into a new function. Style and comment fixes. Closed-Leaf check-in: d57f0a9361 user: mistachkin tags: commitWarning | |
2012-10-29
| ||
15:57 | (experiment) Allow 'c' for converting unicode/crlf files at user request, not complete yet. check-in: 121394b04f user: jan.nijtmans tags: use-blob_strip_bom | |
14:45 | Allow unicode files for Wiki-rendering on Windows. Trying to commit a file with an UTF-16 BOM will now give a warning, just as a file containing crlf The --args command line option now accept UTF-16 files as well check-in: 70b4f105eb user: jan.nijtmans tags: trunk | |
14:35 | fix "possible unresolved merge conflict" warning message to show the pathname relative to the wd in stead of the root check-in: 2a15d87edb user: jan.nijtmans tags: trunk | |
14:04 | fix handling of "crlf-glob" option: UTF-16 BOM's always give a warning now Closed-Leaf check-in: 9267dba498 user: jan.nijtmans tags: use-blob_strip_bom | |
Changes to src/checkin.c.
880 880 md5sum_blob(pOut, &mcksum); 881 881 blob_appendf(pOut, "Z %b\n", &mcksum); 882 882 if( pnFBcard ) *pnFBcard = nFBcard; 883 883 } 884 884 885 885 /* 886 886 ** Issue a warning and give the user an opportunity to abandon out 887 -** if unicode or a \r\n line ending is seen in a text file. 887 +** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending 888 +** is seen in a text file. 888 889 */ 889 -static void encoding_warning(const Blob *p, int crnlOk, const char *zFilename){ 890 - int looksLike; /* return value of looks_like_text() */ 890 +static void commit_warning(const Blob *p, int crnlOk, const char *zFilename){ 891 + int eType; /* return value of looks_like_text() */ 892 + int fUnicode; /* return value of starts_with_utf16_bom() */ 891 893 char *zMsg; /* Warning message */ 892 894 Blob fname; /* Relative pathname of the file */ 893 895 static int allOk = 0; /* Set to true to disable this routine */ 894 896 895 897 if( allOk ) return; 896 - looksLike = looks_like_text(p); 897 - if( looksLike<0 ){ 898 - const char *type; 898 + eType = looks_like_text(p); 899 + fUnicode = starts_with_utf16_bom(p); 900 + if( eType==-1 || fUnicode ){ 901 + const char *zWarning; 899 902 Blob ans; 900 903 char cReply; 901 904 902 - if( looksLike&1 ){ 905 + if( eType==-1 && fUnicode ){ 906 + zWarning = "Unicode and CR/NL line endings"; 907 + }else if( eType==-1 ){ 903 908 if( crnlOk ){ 904 - return; /* We don't want CrLf warnings for this file. */ 909 + return; /* We don't want CR/NL warnings for this file. */ 905 910 } 906 - type = "CR/NL line endings"; 911 + zWarning = "CR/NL line endings"; 907 912 }else{ 908 - type = "unicode"; 913 + zWarning = "Unicode"; 909 914 } 910 915 file_relative_name(zFilename, &fname, 0); 911 916 blob_zero(&ans); 912 917 zMsg = mprintf( 913 918 "%s contains %s; commit anyhow (a=all/y/N)?", 914 - blob_str(&fname), type); 919 + blob_str(&fname), zWarning); 915 920 prompt_user(zMsg, &ans); 916 921 fossil_free(zMsg); 917 922 cReply = blob_str(&ans)[0]; 918 923 if( cReply=='a' || cReply=='A' ){ 919 924 allOk = 1; 920 925 }else if( cReply!='y' && cReply!='Y' ){ 921 926 fossil_fatal("Abandoning commit due to %s in %s", 922 - type, blob_str(&fname)); 927 + zWarning, blob_str(&fname)); 923 928 } 924 929 blob_reset(&ans); 925 930 blob_reset(&fname); 926 931 } 927 932 } 928 933 929 934 /* ................................................................................ 1230 1235 blob_zero(&content); 1231 1236 if( file_wd_islink(zFullname) ){ 1232 1237 /* Instead of file content, put link destination path */ 1233 1238 blob_read_link(&content, zFullname); 1234 1239 }else{ 1235 1240 blob_read_from_file(&content, zFullname); 1236 1241 } 1237 - encoding_warning(&content, crnlOk, zFullname); 1242 + commit_warning(&content, crnlOk, zFullname); 1238 1243 if( chnged==1 && contains_merge_marker(&content) ){ 1239 1244 Blob fname; /* Relative pathname of the file */ 1240 1245 1241 1246 nConflict++; 1242 1247 file_relative_name(zFullname, &fname, 0); 1243 1248 fossil_print("possible unresolved merge conflict in %s\n", 1244 1249 blob_str(&fname));
Changes to src/diff.c.
46 46 */ 47 47 #define DIFF_CANNOT_COMPUTE_BINARY \ 48 48 "cannot compute difference between binary files\n" 49 49 50 50 #define DIFF_CANNOT_COMPUTE_SYMLINK \ 51 51 "cannot compute difference between symlink and regular file\n" 52 52 53 -#define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0) 53 +#define looks_like_binary(blob) (looks_like_text((blob)) == 0) 54 54 #endif /* INTERFACE */ 55 55 56 56 /* 57 57 ** Maximum length of a line in a text file. (8192) 58 58 */ 59 59 #define LENGTH_MASK_SZ 13 60 60 #define LENGTH_MASK ((1<<LENGTH_MASK_SZ)-1) ................................................................................ 168 168 169 169 /* Return results */ 170 170 *pnLine = nLine; 171 171 return a; 172 172 } 173 173 174 174 /* 175 -** Returns 1, if everything OK 176 -** Returns 0 if the specified content appears to be binary or 177 -** contains a line that is too long 178 -** Returns -1, if the file appears text, but it contains CrLf 179 -** Returns -2, if the file starts with an UTF-16 BOM (le or be) 175 +** This function attempts to scan each logical line within the blob to 176 +** determine the type of content it appears to contain. Possible return 177 +** values are: 178 +** 179 +** (1) -- The content appears to consist entirely of text, with lines 180 +** delimited by line-feed characters; however, the encoding may 181 +** not be UTF-8. 182 +** 183 +** (0) -- The content appears to be binary because it contains embedded 184 +** NUL (\000) characters or an extremely long line. Since this 185 +** function does not understand UTF-16, it may falsely consider 186 +** UTF-16 text to be binary. 187 +** 188 +** (-1) -- The content appears to consist entirely of text, with lines 189 +** delimited by carriage-return, line-feed pairs; however, the 190 +** encoding may not be UTF-8. 191 +** 180 192 */ 181 193 int looks_like_text(const Blob *pContent){ 182 194 const char *z = blob_buffer(pContent); 183 195 unsigned int n = blob_size(pContent); 184 196 int j, c; 185 - int result = 1; /* Assume text with no CrLf */ 197 + int result = 1; /* Assume text with no CR/NL */ 186 198 187 199 /* Check individual lines. 188 200 */ 189 201 if( n==0 ) return result; /* Empty file -> text */ 190 202 c = *z; 191 203 if( c==0 ) return 0; /* \000 byte in a file -> binary */ 192 - if ( n > 1 ){ 193 - if ( (c==(char)0xff) && (z[1]==(char)0xfe) ){ 194 - return -2; 195 - } else if ( (c==(char)0xfe) && (z[1]==(char)0xff) ){ 196 - return -2; 197 - } 198 - } 199 204 j = (c!='\n'); 200 205 while( --n>0 ){ 201 206 c = *++z; ++j; 202 207 if( c==0 ) return 0; /* \000 byte in a file -> binary */ 203 208 if( c=='\n' ){ 204 209 if( z[-1]=='\r' ){ 205 - result = -1; /* Contains CrLf, continue */ 210 + result = -1; /* Contains CR/NL, continue */ 206 211 } 207 212 if( j>LENGTH_MASK ){ 208 213 return 0; /* Very long line -> binary */ 209 214 } 210 215 j = 0; 211 216 } 212 217 } 213 218 if( j>LENGTH_MASK ){ 214 219 return 0; /* Very long line -> binary */ 215 220 } 216 221 return result; /* No problems seen -> not binary */ 217 222 } 223 + 224 +/* 225 +** This function returns non-zero if the blob starts with a UTF-16le or 226 +** UTF-16be byte-order-mark (BOM). 227 +*/ 228 +int starts_with_utf16_bom(const Blob *pContent){ 229 + const char *z = blob_buffer(pContent); 230 + int c1, c2; 231 + 232 + if( blob_size(pContent)<2 ) return 0; 233 + c1 = z[0]; c2 = z[1]; 234 + if( (c1==(char)0xff) && (c2==(char)0xfe) ){ 235 + return 1; 236 + }else if( (c1==(char)0xff) && (c2==(char)0xfe) ){ 237 + return 1; 238 + } 239 + return 0; 240 +} 218 241 219 242 /* 220 243 ** Return true if two DLine elements are identical. 221 244 */ 222 245 static int same_dline(DLine *pA, DLine *pB){ 223 246 return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0; 224 247 }