Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Changes In Branch simplify-starts-with Excluding Merge-Ins
This is equivalent to a diff from 43c4522623 to c209105f0f
2013-02-07
| ||
15:28 | Divide blob length check (even number of bytes) and UTF-32 check in the 3 versions of the UTF-16 BOM functions. check-in: be6756e26b user: jan.nijtmans tags: trunk | |
09:39 | merge trunk check-in: 8994f3680a user: jan.nijtmans tags: improve_commit_warning | |
09:19 | If file starts with UTF-32 BOM, always consider it binary without warning. Closed-Leaf check-in: c209105f0f user: jan.nijtmans tags: simplify-starts-with | |
08:47 | Combine 4 "starts_with_utf??_bom" functions to a single - easier to use - function "starts_with_bom". In addition, it only checks for an UTF-16 BOM if the blob has an even number of bytes. check-in: 6c417d8bf5 user: jan.nijtmans tags: simplify-starts-with | |
02:08 | Add the test-ssh-far-side command that can be used in place of a shell for the remote side of an ssh: sync. check-in: 43c4522623 user: drh tags: trunk | |
00:24 | Add the shell= query parameter to the ssh: scheme for cloning and syncing. check-in: 2163cd9666 user: drh tags: trunk | |
Changes to src/blob.c.
1093 1093 ** Strip a possible byte-order-mark (BOM) from the blob. On Windows, if there 1094 1094 ** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion to UTF-8 is 1095 1095 ** done. If useMbcs is false and there is no BOM, the input string is assumed 1096 1096 ** to be UTF-8 already, so no conversion is done. 1097 1097 */ 1098 1098 void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){ 1099 1099 char *zUtf8; 1100 - int bomSize = 0; 1101 - if( starts_with_utf8_bom(pBlob, &bomSize) ){ 1100 + int bomSize = starts_with_bom(pBlob); 1101 + if( bomSize == 3 ){ 1102 1102 struct Blob temp; 1103 1103 zUtf8 = blob_str(pBlob) + bomSize; 1104 1104 blob_zero(&temp); 1105 1105 blob_append(&temp, zUtf8, -1); 1106 1106 blob_swap(pBlob, &temp); 1107 1107 blob_reset(&temp); 1108 1108 #ifdef _WIN32 1109 - }else if( starts_with_utf16le_bom(pBlob, &bomSize) ){ 1110 - /* Make sure the blob contains two terminating 0-bytes */ 1111 - blob_append(pBlob, "", 1); 1112 - zUtf8 = blob_str(pBlob) + bomSize; 1113 - zUtf8 = fossil_unicode_to_utf8(zUtf8); 1114 - blob_zero(pBlob); 1115 - blob_append(pBlob, zUtf8, -1); 1116 - fossil_unicode_free(zUtf8); 1117 - }else if( starts_with_utf16be_bom(pBlob, &bomSize) ){ 1118 - unsigned int i = blob_size(pBlob); 1109 + }else if( bomSize == 2 ){ 1119 1110 zUtf8 = blob_buffer(pBlob); 1120 - while( i > 0 ){ 1121 - /* swap bytes of unicode representation */ 1122 - char zTemp = zUtf8[--i]; 1123 - zUtf8[i] = zUtf8[i-1]; 1124 - zUtf8[--i] = zTemp; 1111 + if (*((unsigned short *)zUtf8) == 0xfffe) { 1112 + /* Found BOM, but with reversed bytes */ 1113 + unsigned int i = blob_size(pBlob); 1114 + while( i > 0 ){ 1115 + /* swap bytes of unicode representation */ 1116 + char zTemp = zUtf8[--i]; 1117 + zUtf8[i] = zUtf8[i-1]; 1118 + zUtf8[--i] = zTemp; 1119 + } 1125 1120 } 1126 1121 /* Make sure the blob contains two terminating 0-bytes */ 1127 1122 blob_append(pBlob, "", 1); 1128 1123 zUtf8 = blob_str(pBlob) + bomSize; 1129 1124 zUtf8 = fossil_unicode_to_utf8(zUtf8); 1130 1125 blob_zero(pBlob); 1131 1126 blob_append(pBlob, zUtf8, -1);
Changes to src/checkin.c.
897 897 Blob *p, /* The content of the file being committed. */ 898 898 int crnlOk, /* Non-zero if CR/NL warnings should be disabled. */ 899 899 int binOk, /* Non-zero if binary warnings should be disabled. */ 900 900 int encodingOk, /* Non-zero if encoding warnings should be disabled. */ 901 901 const char *zFilename /* The full name of the file being committed. */ 902 902 ){ 903 903 int eType; /* return value of looks_like_utf8/utf16() */ 904 - int fUnicode; /* return value of starts_with_utf16_bom() */ 904 + int fUnicode; /* 1 if blob starts with UTF-16 BOM */ 905 905 char *zMsg; /* Warning message */ 906 906 Blob fname; /* Relative pathname of the file */ 907 907 static int allOk = 0; /* Set to true to disable this routine */ 908 908 909 909 if( allOk ) return 0; 910 - fUnicode = starts_with_utf16_bom(p, 0); 910 + fUnicode = (starts_with_bom(p) == 2); 911 911 eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p); 912 912 if( eType==0 || eType==-1 || fUnicode ){ 913 913 const char *zWarning; 914 914 const char *zDisable; 915 915 const char *zConvert = "c=convert/"; 916 916 Blob ans; 917 917 char cReply;
Changes to src/diff.c.
338 338 0xEF, 0xBB, 0xBF, 0x00, 0x00, 0x00 339 339 }; 340 340 if( pnByte ) *pnByte = 3; 341 341 return bom; 342 342 } 343 343 344 344 /* 345 -** This function returns non-zero if the blob starts with a UTF-8 346 -** byte-order-mark (BOM). 345 +** This function returns detected BOM size if the blob starts with 346 +** a UTF-8, UTF-16le or UTF-16be byte-order-mark (BOM). 347 347 */ 348 -int starts_with_utf8_bom(const Blob *pContent, int *pnByte){ 348 +int starts_with_bom(const Blob *pContent){ 349 349 const char *z = blob_buffer(pContent); 350 - int bomSize = 0; 350 + int c1, bomSize = 0; 351 351 const unsigned char *bom = get_utf8_bom(&bomSize); 352 352 353 - if( pnByte ) *pnByte = bomSize; 354 - if( blob_size(pContent)<bomSize ) return 0; 355 - return memcmp(z, bom, bomSize)==0; 356 -} 357 - 358 -/* 359 -** This function returns non-zero if the blob starts with a UTF-16le or 360 -** UTF-16be byte-order-mark (BOM). 361 -*/ 362 -int starts_with_utf16_bom(const Blob *pContent, int *pnByte){ 363 - const char *z = blob_buffer(pContent); 364 - int c1, c2; 365 - 366 - if( pnByte ) *pnByte = 2; 367 - if( blob_size(pContent)<2 ) return 0; 368 - c1 = z[0]; c2 = z[1]; 369 - if( (c1==(char)0xff) && (c2==(char)0xfe) ){ 370 - return 1; 371 - }else if( (c1==(char)0xfe) && (c2==(char)0xff) ){ 372 - return 1; 373 - } 374 - return 0; 375 -} 376 - 377 -/* 378 -** This function returns non-zero if the blob starts with a UTF-16le 379 -** byte-order-mark (BOM). 380 -*/ 381 -int starts_with_utf16le_bom(const Blob *pContent, int *pnByte){ 382 - const char *z = blob_buffer(pContent); 383 - int c1, c2; 384 - 385 - if( pnByte ) *pnByte = 2; 386 - if( blob_size(pContent)<2 ) return 0; 387 - c1 = z[0]; c2 = z[1]; 388 - if( (c1==(char)0xff) && (c2==(char)0xfe) ){ 389 - return 1; 390 - } 391 - return 0; 392 -} 393 - 394 -/* 395 -** This function returns non-zero if the blob starts with a UTF-16be 396 -** byte-order-mark (BOM). 397 -*/ 398 -int starts_with_utf16be_bom(const Blob *pContent, int *pnByte){ 399 - const char *z = blob_buffer(pContent); 400 - int c1, c2; 401 - 402 - if( pnByte ) *pnByte = 2; 403 - if( blob_size(pContent)<2 ) return 0; 404 - c1 = z[0]; c2 = z[1]; 405 - if( (c1==(char)0xfe) && (c2==(char)0xff) ){ 406 - return 1; 353 + if( (blob_size(pContent)>=bomSize) 354 + && (memcmp(z, bom, bomSize)==0) ){ 355 + return bomSize; 356 + } 357 + /* Only accept UTF-16 BOM if the blob has an even number of bytes */ 358 + if( (blob_size(pContent)<2) || (blob_size(pContent)&1) ) return 0; 359 + c1 = *((unsigned short *)z); 360 + if( (c1==0xfffe) || (c1==0xfeff) ){ 361 + if( blob_size(pContent)>=4 ){ 362 + /* For UTF-32 BOM, always return 0. */ 363 + if( ((unsigned short *)z)[1] == 0 ) return 0; 364 + } 365 + return 2; 407 366 } 408 367 return 0; 409 368 } 410 369 411 370 /* 412 371 ** Return true if two DLine elements are identical. 413 372 */ ................................................................................ 2367 2326 2368 2327 zLimit = find_option("limit",0,1); 2369 2328 if( zLimit==0 || zLimit[0]==0 ) zLimit = "-1"; 2370 2329 iLimit = atoi(zLimit); 2371 2330 showLog = find_option("log",0,0)!=0; 2372 2331 fileVers = find_option("filevers",0,0)!=0; 2373 2332 db_must_be_within_tree(); 2374 - if (g.argc<3) { 2333 + if( g.argc<3 ){ 2375 2334 usage("FILENAME"); 2376 2335 } 2377 2336 file_tree_name(g.argv[2], &treename, 1); 2378 2337 zFilename = blob_str(&treename); 2379 2338 fnid = db_int(0, "SELECT fnid FROM filename WHERE name=%Q", zFilename); 2380 2339 if( fnid==0 ){ 2381 2340 fossil_fatal("no such file: %s", zFilename); 2382 2341 } 2383 2342 fid = db_int(0, "SELECT rid FROM vfile WHERE pathname=%Q", zFilename); 2384 2343 if( fid==0 ){ 2385 2344 fossil_fatal("not part of current checkout: %s", zFilename); 2386 2345 } 2387 2346 cid = db_lget_int("checkout", 0); 2388 - if (cid == 0){ 2347 + if( cid == 0 ){ 2389 2348 fossil_fatal("Not in a checkout"); 2390 2349 } 2391 2350 if( iLimit<=0 ) iLimit = 1000000000; 2392 2351 compute_direct_ancestors(cid, iLimit); 2393 2352 mid = db_int(0, "SELECT mlink.mid FROM mlink, ancestor " 2394 2353 " WHERE mlink.fid=%d AND mlink.fnid=%d AND mlink.mid=ancestor.rid" 2395 2354 " ORDER BY ancestor.generation ASC LIMIT 1",