Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Changes In Branch disallow-invalid-utf8-in-filenames Excluding Merge-Ins
This is equivalent to a diff from 1d462a683f to 7dabede3b3
2013-01-23
| ||
13:15 | Further fine-tuning of the check for valid UTF8 characters in filenames. check-in: 4d456c9fd1 user: drh tags: trunk | |
10:38 | put settings in right alphabetical order check-in: 4ddd099b57 user: jan.nijtmans tags: trunk | |
2013-01-21
| ||
13:12 | Oops, make it work correct now. Closed-Leaf check-in: 7dabede3b3 user: jan.nijtmans tags: disallow-invalid-utf8-in-filenames | |
10:29 | merge trunk check-in: a68dffbff3 user: jan.nijtmans tags: improve_commit_warning | |
09:39 | From the changes.wiki for Fossil 1.25: "Disallow invalid UTF8 characters (such as characters in the surrogate pair range) in filenames." This completes the set of UTF8 characters which are generally considered invalid, so they should be disallowed in filenames: the "overlong form", invalid continuation bytes, and -finally- noncharacters. check-in: 011d5f692d user: jan.nijtmans tags: disallow-invalid-utf8-in-filenames | |
2013-01-20
| ||
10:57 | Fix the SQL for the command-line timeline so that it works for timeline items that are not associated with a particular branch. check-in: 1d462a683f user: drh tags: trunk | |
2013-01-18
| ||
21:34 | Run "analyze" after a rebuild. For small repositories, the time doesn't matter and for large repositories, the effect on the query plans are huge. Push/pull for example will otherwise do a sequential scan of the blob table and joining that with the unclustered table afterwards, when the other way around is several order of magnitudes more efficient... check-in: 80bf94e0f7 user: joerg tags: trunk | |
Changes to src/file.c.
488 488 ** * Does not contain any of these characters in the path: "\" 489 489 ** * Does not end with "/". 490 490 ** * Does not contain two or more "/" characters in a row. 491 491 ** * Contains at least one character 492 492 ** 493 493 ** Invalid UTF8 characters result in a false return if bStrictUtf8 is 494 494 ** true. If bStrictUtf8 is false, invalid UTF8 characters are silently 495 -** ignored. 495 +** ignored. See http://en.wikipedia.org/wiki/UTF-8#Invalid_byte_sequences 496 +** and http://en.wikipedia.org/wiki/Unicode (for the noncharacters) 496 497 */ 497 498 int file_is_simple_pathname(const char *z, int bStrictUtf8){ 498 499 int i; 499 - char c = z[0]; 500 + unsigned char c = (unsigned char) z[0]; 500 501 char maskNonAscii = bStrictUtf8 ? 0x80 : 0x00; 501 502 if( c=='/' || c==0 ) return 0; 502 503 if( c=='.' ){ 503 504 if( z[1]=='/' || z[1]==0 ) return 0; 504 505 if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0; 505 506 } 506 - for(i=0; (c=z[i])!=0; i++){ 507 + for(i=0; (c=(unsigned char)z[i])!=0; i++){ 507 508 if( c & maskNonAscii ){ 508 - if( (c & 0xf0) == 0xf0 ) { 509 - /* Unicode characters > U+FFFF are not supported. 510 - * Windows XP and earlier cannot handle them. 511 - */ 509 + if( c<0xc2 ){ 510 + /* Invalid 1-byte UTF-8 sequence, or 2-byte overlong form. */ 512 511 return 0; 513 - } 514 - if( (c & 0xf0) == 0xe0 ) { 512 + }else if( (c&0xe0)==0xe0 ){ 513 + /* 3-byte or more */ 514 + int unicode; 515 + if( c&0x10 ){ 516 + /* Unicode characters > U+FFFF are not supported. 517 + * Windows XP and earlier cannot handle them. 518 + */ 519 + return 0; 520 + } 515 521 /* This is a 3-byte UTF-8 character */ 516 - if ( (c & 0xfe) == 0xee ){ 517 - /* Range U+E000 - U+FFFF (Starting with 0xee or 0xef in UTF-8 ) */ 518 - if ( !(c & 1) || ((z[i+1] & 0xff) < 0xa4) ){ 519 - /* Unicode character in the range U+E000 - U+F8FF are for 520 - * private use, they shouldn't occur in filenames. */ 522 + unicode = ((c&0x0f)<<12) + ((z[i+1]&0x3f)<<6) + (z[i+2]&0x3f); 523 + if( unicode <= 0x07ff ){ 524 + /* overlong form */ 525 + return 0; 526 + }else if( unicode>=0xe000 ){ 527 + /* U+E000..U+FFFF */ 528 + if( (unicode<=0xf8ff) || (unicode>=0xfffe) ){ 529 + /* U+E000..U+F8FF are for private use. 530 + * U+FFFE..U+FFFF are noncharacters. */ 531 + return 0; 532 + } else if( (unicode>=0xfdd0) && (unicode<=0xfdef) ){ 533 + /* U+FDD0..U+FDEF are noncharacters. */ 521 534 return 0; 522 535 } 523 - }else if( ((c & 0xff) == 0xed) && ((z[i+1] & 0xe0) == 0xa0) ){ 524 - /* Unicode character in the range U+D800 - U+DFFF are for 525 - * surrogate pairs, they shouldn't occur in filenames. */ 536 + }else if( (unicode>=0xD800) && (unicode<=0xDFFF) ){ 537 + /* U+D800..U+DFFF are for surrogate pairs. */ 526 538 return 0; 527 539 } 528 540 } 541 + do{ 542 + if( (z[i+1]&0xc0)!=0x80 ){ 543 + /* Invalid continuation byte (multi-byte UTF-8) */ 544 + return 0; 545 + } 546 + /* The hi-bits of c are used to keep track of the number of expected 547 + * continuation-bytes, so we don't need a separate counter. */ 548 + c<<=1; ++i; 549 + }while( c>=0xc0 ); 529 550 }else if( c=='\\' ){ 530 551 return 0; 531 552 } 532 553 if( c=='/' ){ 533 554 if( z[i+1]=='/' ) return 0; 534 555 if( z[i+1]=='.' ){ 535 556 if( z[i+2]=='/' || z[i+2]==0 ) return 0; ................................................................................ 576 597 #if defined(_WIN32) 577 598 for(i=0; i<n; i++){ 578 599 if( z[i]=='\\' ) z[i] = '/'; 579 600 } 580 601 #endif 581 602 582 603 /* Removing trailing "/" characters */ 583 - if ( !slash ){ 604 + if( !slash ){ 584 605 while( n>1 && z[n-1]=='/' ){ n--; } 585 606 } 586 607 587 608 /* Remove duplicate '/' characters. Except, two // at the beginning 588 609 ** of a pathname is allowed since this is important on windows. */ 589 610 for(i=j=1; i<n; i++){ 590 611 z[j++] = z[i]; ................................................................................ 833 854 if( zPath[i]==0 ){ 834 855 blob_reset(pOut); 835 856 if( zPwd[i]==0 ){ 836 857 blob_append(pOut, ".", 1); 837 858 }else{ 838 859 blob_append(pOut, "..", 2); 839 860 for(j=i+1; zPwd[j]; j++){ 840 - if( zPwd[j]=='/' ) { 861 + if( zPwd[j]=='/' ){ 841 862 blob_append(pOut, "/..", 3); 842 863 } 843 864 } 844 865 } 845 866 return; 846 867 } 847 868 if( zPwd[i]==0 && zPath[i]=='/' ){ ................................................................................ 850 871 blob_append(pOut, &zPath[i+1], -1); 851 872 blob_reset(&tmp); 852 873 return; 853 874 } 854 875 while( zPath[i-1]!='/' ){ i--; } 855 876 blob_set(&tmp, "../"); 856 877 for(j=i; zPwd[j]; j++){ 857 - if( zPwd[j]=='/' ) { 878 + if( zPwd[j]=='/' ){ 858 879 blob_append(&tmp, "../", 3); 859 880 } 860 881 } 861 882 blob_append(&tmp, &zPath[i], -1); 862 883 blob_reset(pOut); 863 884 memcpy(pOut, &tmp, sizeof(tmp)); 864 885 }