Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Changes In Branch disallow-invalid-utf8-in-filenames Excluding Merge-Ins
This is equivalent to a diff from 1d462a683f to 7dabede3b3
2013-01-23
| ||
13:15 | Further fine-tuning of the check for valid UTF8 characters in filenames. check-in: 4d456c9fd1 user: drh tags: trunk | |
10:38 | put settings in right alphabetical order check-in: 4ddd099b57 user: jan.nijtmans tags: trunk | |
2013-01-21
| ||
13:12 | Oops, make it work correct now. Closed-Leaf check-in: 7dabede3b3 user: jan.nijtmans tags: disallow-invalid-utf8-in-filenames | |
10:29 | merge trunk check-in: a68dffbff3 user: jan.nijtmans tags: improve_commit_warning | |
09:39 | From the changes.wiki for Fossil 1.25: "Disallow invalid UTF8 characters (such as characters in the surrogate pair range) in filenames." This completes the set of UTF8 characters which are generally considered invalid, so they should be disallowed in filenames: the "overlong form", invalid continuation bytes, and -finally- noncharacters. check-in: 011d5f692d user: jan.nijtmans tags: disallow-invalid-utf8-in-filenames | |
2013-01-20
| ||
10:57 | Fix the SQL for the command-line timeline so that it works for timeline items that are not associated with a particular branch. check-in: 1d462a683f user: drh tags: trunk | |
2013-01-18
| ||
21:34 | Run "analyze" after a rebuild. For small repositories, the time doesn't matter and for large repositories, the effect on the query plans are huge. Push/pull for example will otherwise do a sequential scan of the blob table and joining that with the unclustered table afterwards, when the other way around is several order of magnitudes more efficient... check-in: 80bf94e0f7 user: joerg tags: trunk | |
Changes to src/file.c.
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 ... 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 ... 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 ... 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 |
** * Does not contain any of these characters in the path: "\" ** * Does not end with "/". ** * Does not contain two or more "/" characters in a row. ** * Contains at least one character ** ** Invalid UTF8 characters result in a false return if bStrictUtf8 is ** true. If bStrictUtf8 is false, invalid UTF8 characters are silently ** ignored. */ int file_is_simple_pathname(const char *z, int bStrictUtf8){ int i; char c = z[0]; char maskNonAscii = bStrictUtf8 ? 0x80 : 0x00; if( c=='/' || c==0 ) return 0; if( c=='.' ){ if( z[1]=='/' || z[1]==0 ) return 0; if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0; } for(i=0; (c=z[i])!=0; i++){ if( c & maskNonAscii ){ if( (c & 0xf0) == 0xf0 ) { /* Unicode characters > U+FFFF are not supported. * Windows XP and earlier cannot handle them. */ return 0; } if( (c & 0xf0) == 0xe0 ) { /* This is a 3-byte UTF-8 character */ if ( (c & 0xfe) == 0xee ){ /* Range U+E000 - U+FFFF (Starting with 0xee or 0xef in UTF-8 ) */ if ( !(c & 1) || ((z[i+1] & 0xff) < 0xa4) ){ /* Unicode character in the range U+E000 - U+F8FF are for * private use, they shouldn't occur in filenames. */ return 0; } }else if( ((c & 0xff) == 0xed) && ((z[i+1] & 0xe0) == 0xa0) ){ /* Unicode character in the range U+D800 - U+DFFF are for * surrogate pairs, they shouldn't occur in filenames. */ return 0; } } }else if( c=='\\' ){ return 0; } if( c=='/' ){ if( z[i+1]=='/' ) return 0; if( z[i+1]=='.' ){ if( z[i+2]=='/' || z[i+2]==0 ) return 0; ................................................................................ #if defined(_WIN32) for(i=0; i<n; i++){ if( z[i]=='\\' ) z[i] = '/'; } #endif /* Removing trailing "/" characters */ if ( !slash ){ while( n>1 && z[n-1]=='/' ){ n--; } } /* Remove duplicate '/' characters. Except, two // at the beginning ** of a pathname is allowed since this is important on windows. */ for(i=j=1; i<n; i++){ z[j++] = z[i]; ................................................................................ if( zPath[i]==0 ){ blob_reset(pOut); if( zPwd[i]==0 ){ blob_append(pOut, ".", 1); }else{ blob_append(pOut, "..", 2); for(j=i+1; zPwd[j]; j++){ if( zPwd[j]=='/' ) { blob_append(pOut, "/..", 3); } } } return; } if( zPwd[i]==0 && zPath[i]=='/' ){ ................................................................................ blob_append(pOut, &zPath[i+1], -1); blob_reset(&tmp); return; } while( zPath[i-1]!='/' ){ i--; } blob_set(&tmp, "../"); for(j=i; zPwd[j]; j++){ if( zPwd[j]=='/' ) { blob_append(&tmp, "../", 3); } } blob_append(&tmp, &zPath[i], -1); blob_reset(pOut); memcpy(pOut, &tmp, sizeof(tmp)); } |
| > | | | > > > > > > | | | | | < < < < < < > > > | > > > > > > > > > < < < > > > > > > > > > > > | | | |
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 ... 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 ... 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 ... 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 |
** * Does not contain any of these characters in the path: "\" ** * Does not end with "/". ** * Does not contain two or more "/" characters in a row. ** * Contains at least one character ** ** Invalid UTF8 characters result in a false return if bStrictUtf8 is ** true. If bStrictUtf8 is false, invalid UTF8 characters are silently ** ignored. See http://en.wikipedia.org/wiki/UTF-8#Invalid_byte_sequences ** and http://en.wikipedia.org/wiki/Unicode (for the noncharacters) */ int file_is_simple_pathname(const char *z, int bStrictUtf8){ int i; unsigned char c = (unsigned char) z[0]; char maskNonAscii = bStrictUtf8 ? 0x80 : 0x00; if( c=='/' || c==0 ) return 0; if( c=='.' ){ if( z[1]=='/' || z[1]==0 ) return 0; if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0; } for(i=0; (c=(unsigned char)z[i])!=0; i++){ if( c & maskNonAscii ){ if( c<0xc2 ){ /* Invalid 1-byte UTF-8 sequence, or 2-byte overlong form. */ return 0; }else if( (c&0xe0)==0xe0 ){ /* 3-byte or more */ int unicode; if( c&0x10 ){ /* Unicode characters > U+FFFF are not supported. * Windows XP and earlier cannot handle them. */ return 0; } /* This is a 3-byte UTF-8 character */ unicode = ((c&0x0f)<<12) + ((z[i+1]&0x3f)<<6) + (z[i+2]&0x3f); if( unicode <= 0x07ff ){ /* overlong form */ return 0; }else if( unicode>=0xe000 ){ /* U+E000..U+FFFF */ if( (unicode<=0xf8ff) || (unicode>=0xfffe) ){ /* U+E000..U+F8FF are for private use. * U+FFFE..U+FFFF are noncharacters. */ return 0; } else if( (unicode>=0xfdd0) && (unicode<=0xfdef) ){ /* U+FDD0..U+FDEF are noncharacters. */ return 0; } }else if( (unicode>=0xD800) && (unicode<=0xDFFF) ){ /* U+D800..U+DFFF are for surrogate pairs. */ return 0; } } do{ if( (z[i+1]&0xc0)!=0x80 ){ /* Invalid continuation byte (multi-byte UTF-8) */ return 0; } /* The hi-bits of c are used to keep track of the number of expected * continuation-bytes, so we don't need a separate counter. */ c<<=1; ++i; }while( c>=0xc0 ); }else if( c=='\\' ){ return 0; } if( c=='/' ){ if( z[i+1]=='/' ) return 0; if( z[i+1]=='.' ){ if( z[i+2]=='/' || z[i+2]==0 ) return 0; ................................................................................ #if defined(_WIN32) for(i=0; i<n; i++){ if( z[i]=='\\' ) z[i] = '/'; } #endif /* Removing trailing "/" characters */ if( !slash ){ while( n>1 && z[n-1]=='/' ){ n--; } } /* Remove duplicate '/' characters. Except, two // at the beginning ** of a pathname is allowed since this is important on windows. */ for(i=j=1; i<n; i++){ z[j++] = z[i]; ................................................................................ if( zPath[i]==0 ){ blob_reset(pOut); if( zPwd[i]==0 ){ blob_append(pOut, ".", 1); }else{ blob_append(pOut, "..", 2); for(j=i+1; zPwd[j]; j++){ if( zPwd[j]=='/' ){ blob_append(pOut, "/..", 3); } } } return; } if( zPwd[i]==0 && zPath[i]=='/' ){ ................................................................................ blob_append(pOut, &zPath[i+1], -1); blob_reset(&tmp); return; } while( zPath[i-1]!='/' ){ i--; } blob_set(&tmp, "../"); for(j=i; zPwd[j]; j++){ if( zPwd[j]=='/' ){ blob_append(&tmp, "../", 3); } } blob_append(&tmp, &zPath[i], -1); blob_reset(pOut); memcpy(pOut, &tmp, sizeof(tmp)); } |