Index: src/checkin.c ================================================================== --- src/checkin.c +++ src/checkin.c @@ -887,26 +887,22 @@ ** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending ** is seen in a text file. */ static void commit_warning(const Blob *p, int crnlOk, const char *zFilename){ int eType; /* return value of looks_like_text() */ - int fUnicode; /* return value of starts_with_utf16_bom() */ char *zMsg; /* Warning message */ Blob fname; /* Relative pathname of the file */ static int allOk = 0; /* Set to true to disable this routine */ if( allOk ) return; eType = looks_like_text(p); - fUnicode = starts_with_utf16_bom(p); - if( eType==-1 || fUnicode ){ - const char *zWarning; + if( eType<0 ){ + const char *zWarning ; Blob ans; char cReply; - if( eType==-1 && fUnicode ){ - zWarning = "Unicode and CR/NL line endings"; - }else if( eType==-1 ){ + if( eType==-3 ){ if( crnlOk ){ return; /* We don't want CR/NL warnings for this file. */ } zWarning = "CR/NL line endings"; }else{ Index: src/diff.c ================================================================== --- src/diff.c +++ src/diff.c @@ -48,11 +48,11 @@ "cannot compute difference between binary files\n" #define DIFF_CANNOT_COMPUTE_SYMLINK \ "cannot compute difference between symlink and regular file\n" -#define looks_like_binary(blob) (looks_like_text((blob)) == 0) +#define looks_like_binary(blob) ((looks_like_text(blob)&3) != 1) #endif /* INTERFACE */ /* ** Maximum length of a line in a text file. (8192) */ @@ -179,68 +179,93 @@ ** (1) -- The content appears to consist entirely of text, with lines ** delimited by line-feed characters; however, the encoding may ** not be UTF-8. ** ** (0) -- The content appears to be binary because it contains embedded -** NUL (\000) characters or an extremely long line. Since this -** function does not understand UTF-16, it may falsely consider -** UTF-16 text to be binary. +** NUL (\000) characters or an extremely long line. ** -** (-1) -- The content appears to consist entirely of text, with lines +** (-1) -- The content appears to consist entirely of text, in the +** UTF-16 (LE) encoding. +** +** (-2) -- The content appears to consist entirely of text, in the +** UTF-16 (BE) encoding. +** +** (-3) -- The content appears to consist entirely of text, with lines ** delimited by carriage-return, line-feed pairs; however, the ** encoding may not be UTF-8. ** */ int looks_like_text(const Blob *pContent){ - const char *z = blob_buffer(pContent); + unsigned char *z = (unsigned char *) blob_buffer(pContent); unsigned int n = blob_size(pContent); - int j, c; + int j; + unsigned char c; int result = 1; /* Assume text with no CR/NL */ /* Check individual lines. */ if( n==0 ) return result; /* Empty file -> text */ c = *z; if( c==0 ) return 0; /* \000 byte in a file -> binary */ - j = (c!='\n'); + if ( (n&1)==0 ){ /* UTF-16 must have an even blob length */ + if ( (c==0xff) && (z[1]==0xfe) ){ /* UTF-16 LE BOM */ + result = -1; + j = LENGTH_MASK/3; + while( (n-=2)>0 ){ + c = *(z+=2); + if( z[1]==0 ){ /* High-byte must be 0 for further checks */ + if( c==0 ) return 0; /* \000 char in a file -> binary */ + if( c=='\n' ){ + j = LENGTH_MASK/3; + } + }else if( (z[1]==0xff)&&(c>0xfd) ){ + /* FFFE and FFFF are invalid UTF-16. */ + return 0; + } + if( --j==0 ){ + return 0; /* Very long line -> binary */ + } + } + return result; + } else if ( (c==0xfe) && (z[1]==0xff) ){ /* UTF-16 BE BOM */ + result = -2; + ++z; j = LENGTH_MASK/3; + while( (n-=2)>0 ){ + c = *(z+=2); + if ( z[-1]==0 ){ /* High-byte must be 0 for further checks */ + if( c==0 ) return 0; /* \000 char in a file -> binary */ + if( c=='\n' ){ + j = LENGTH_MASK/3; + } + }else if( (z[-1]==0xff)&&(c>0xfd) ){ + /* FFFE and FFFF are invalid UTF-16. */ + return 0; + } + if( --j==0 ){ + return 0; /* Very long line -> binary */ + } + } + return result; + } + } + j = LENGTH_MASK - (c!='\n'); while( --n>0 ){ - c = *++z; ++j; + c = *++z; if( c==0 ) return 0; /* \000 byte in a file -> binary */ if( c=='\n' ){ if( z[-1]=='\r' ){ - result = -1; /* Contains CR/NL, continue */ + result = -3; /* Contains CR/NL, continue */ } - if( j>LENGTH_MASK ){ - return 0; /* Very long line -> binary */ - } - j = 0; + j = LENGTH_MASK; + } + if( --j==0 ){ + return 0; /* Very long line -> binary */ } } - if( j>LENGTH_MASK ){ - return 0; /* Very long line -> binary */ - } return result; /* No problems seen -> not binary */ } -/* -** This function returns non-zero if the blob starts with a UTF-16le or -** UTF-16be byte-order-mark (BOM). -*/ -int starts_with_utf16_bom(const Blob *pContent){ - const char *z = blob_buffer(pContent); - int c1, c2; - - if( blob_size(pContent)<2 ) return 0; - c1 = z[0]; c2 = z[1]; - if( (c1==(char)0xff) && (c2==(char)0xfe) ){ - return 1; - }else if( (c1==(char)0xfe) && (c2==(char)0xff) ){ - return 1; - } - return 0; -} - /* ** Return true if two DLine elements are identical. */ static int same_dline(DLine *pA, DLine *pB){ return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0; Index: src/doc.c ================================================================== --- src/doc.c +++ src/doc.c @@ -26,22 +26,22 @@ ** Try to guess the mimetype from content. ** ** If the content is pure text, return NULL. ** ** For image types, attempt to return an appropriate mimetype -** name like "image/gif" or "image/jpeg". +** name like "image/gif" or "image/jpeg". ** ** For any other binary type, return "unknown/unknown". */ const char *mimetype_from_content(Blob *pBlob){ int i; int n; const unsigned char *x; - static const char isBinary[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, + static const char isBinary[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, }; /* A table of mimetypes based on file content prefixes */ static const struct { @@ -58,11 +58,11 @@ x = (const unsigned char*)blob_buffer(pBlob); n = blob_size(pBlob); for(i=0; i<n; i++){ unsigned char c = x[i]; - if( c<=0x1f && isBinary[c] ){ + if( isBinary[c] ){ break; } } if( i>=n ){ return 0; /* Plain text */ @@ -83,11 +83,11 @@ int i; int first, last; int len; char zSuffix[20]; - /* A table of mimetypes based on file suffixes. + /* A table of mimetypes based on file suffixes. ** Suffixes must be in sorted order so that we can do a binary ** search to find the mime-type */ static const struct { const char *zSuffix; /* The file suffix */ @@ -474,11 +474,11 @@ } db_end_transaction(0); } /* The file is now contained in the filebody blob. Deliver the - ** file to the user + ** file to the user */ zMime = P("mimetype"); if( zMime==0 ){ zMime = mimetype_from_name(zName); } @@ -513,79 +513,79 @@ /* Jump here when unable to locate the document */ db_end_transaction(0); style_header("Document Not Found"); @ <p>No such document: %h(zName)</p> style_footer(); - return; + return; } /* ** The default logo. */ static const unsigned char aLogo[] = { - 71, 73, 70, 56, 55, 97, 62, 0, 71, 0, 244, 0, 0, 85, - 129, 149, 95, 136, 155, 99, 139, 157, 106, 144, 162, 113, 150, 166, - 116, 152, 168, 127, 160, 175, 138, 168, 182, 148, 176, 188, 159, 184, - 195, 170, 192, 202, 180, 199, 208, 184, 202, 210, 191, 207, 215, 201, - 215, 221, 212, 223, 228, 223, 231, 235, 226, 227, 226, 226, 234, 237, - 233, 239, 241, 240, 244, 246, 244, 247, 248, 255, 255, 255, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 0, 0, - 0, 0, 62, 0, 71, 0, 0, 5, 255, 96, 100, 141, 100, 105, - 158, 168, 37, 41, 132, 192, 164, 112, 44, 207, 102, 99, 0, 56, - 16, 84, 116, 239, 199, 141, 65, 110, 232, 248, 25, 141, 193, 161, - 82, 113, 108, 202, 32, 55, 229, 210, 73, 61, 41, 164, 88, 102, - 181, 10, 41, 96, 179, 91, 106, 35, 240, 5, 135, 143, 137, 242, - 87, 123, 246, 33, 190, 81, 108, 163, 237, 198, 14, 30, 113, 233, - 131, 78, 115, 72, 11, 115, 87, 101, 19, 124, 51, 66, 74, 8, - 19, 16, 67, 100, 74, 133, 50, 15, 101, 135, 56, 11, 74, 6, - 143, 49, 126, 106, 56, 8, 145, 67, 9, 152, 48, 139, 155, 5, - 22, 13, 74, 115, 161, 41, 147, 101, 13, 130, 57, 132, 170, 40, - 167, 155, 0, 94, 57, 3, 178, 48, 183, 181, 57, 160, 186, 40, - 19, 141, 189, 0, 69, 192, 40, 16, 195, 155, 185, 199, 41, 201, - 189, 191, 205, 193, 188, 131, 210, 49, 175, 88, 209, 214, 38, 19, - 3, 11, 19, 111, 127, 60, 219, 39, 55, 204, 19, 11, 6, 100, - 5, 10, 227, 228, 37, 163, 0, 239, 117, 56, 238, 243, 49, 195, - 177, 247, 48, 158, 56, 251, 50, 216, 254, 197, 56, 128, 107, 158, - 2, 125, 171, 114, 92, 218, 246, 96, 66, 3, 4, 50, 134, 176, - 145, 6, 97, 64, 144, 24, 19, 136, 108, 91, 177, 160, 0, 194, - 19, 253, 0, 216, 107, 214, 224, 192, 129, 5, 16, 83, 255, 244, - 43, 213, 195, 24, 159, 27, 169, 64, 230, 88, 208, 227, 129, 182, - 54, 4, 89, 158, 24, 181, 163, 199, 1, 155, 52, 233, 8, 130, - 176, 83, 24, 128, 137, 50, 18, 32, 48, 48, 114, 11, 173, 137, - 19, 110, 4, 64, 105, 1, 194, 30, 140, 68, 15, 24, 24, 224, - 50, 76, 70, 0, 11, 171, 54, 26, 160, 181, 194, 149, 148, 40, - 174, 148, 122, 64, 180, 208, 161, 17, 207, 112, 164, 1, 128, 96, - 148, 78, 18, 21, 194, 33, 229, 51, 247, 65, 133, 97, 5, 250, - 69, 229, 100, 34, 220, 128, 166, 116, 190, 62, 8, 167, 195, 170, - 47, 163, 0, 130, 90, 152, 11, 160, 173, 170, 27, 154, 26, 91, - 232, 151, 171, 18, 14, 162, 253, 98, 170, 18, 70, 171, 64, 219, - 10, 67, 136, 134, 187, 116, 75, 180, 46, 179, 174, 135, 4, 189, - 229, 231, 78, 40, 10, 62, 226, 164, 172, 64, 240, 167, 170, 10, - 18, 124, 188, 10, 107, 65, 193, 94, 11, 93, 171, 28, 248, 17, - 239, 46, 140, 78, 97, 34, 25, 153, 36, 99, 65, 130, 7, 203, - 183, 168, 51, 34, 136, 25, 140, 10, 6, 16, 28, 255, 145, 241, - 230, 140, 10, 66, 178, 167, 112, 48, 192, 128, 129, 9, 31, 141, - 84, 138, 63, 163, 162, 2, 203, 206, 240, 56, 55, 98, 192, 188, - 15, 185, 50, 160, 6, 0, 125, 62, 33, 214, 195, 33, 5, 24, - 184, 25, 231, 14, 201, 245, 144, 23, 126, 104, 228, 0, 145, 2, - 13, 140, 244, 212, 17, 21, 20, 176, 159, 17, 95, 225, 160, 128, - 16, 1, 32, 224, 142, 32, 227, 125, 87, 64, 0, 16, 54, 129, - 205, 2, 141, 76, 53, 130, 103, 37, 166, 64, 144, 107, 78, 196, - 5, 192, 0, 54, 50, 229, 9, 141, 49, 84, 194, 35, 12, 196, - 153, 48, 192, 137, 57, 84, 24, 7, 87, 159, 249, 240, 215, 143, - 105, 241, 118, 149, 9, 139, 4, 64, 203, 141, 35, 140, 129, 131, - 16, 222, 125, 231, 128, 2, 238, 17, 152, 66, 3, 5, 56, 224, - 159, 103, 16, 76, 25, 75, 5, 11, 164, 215, 96, 9, 14, 16, - 36, 225, 15, 11, 40, 144, 192, 156, 41, 10, 178, 199, 3, 66, - 64, 80, 193, 3, 124, 90, 48, 129, 129, 102, 177, 18, 192, 154, - 49, 84, 240, 208, 92, 22, 149, 96, 39, 9, 31, 74, 17, 94, - 3, 8, 177, 199, 72, 59, 85, 76, 25, 216, 8, 139, 194, 197, - 138, 163, 69, 96, 115, 0, 147, 72, 72, 84, 28, 14, 79, 86, - 233, 230, 23, 113, 26, 160, 128, 3, 10, 58, 129, 103, 14, 159, - 214, 163, 146, 117, 238, 213, 154, 128, 151, 109, 84, 64, 217, 13, - 27, 10, 228, 39, 2, 235, 164, 168, 74, 8, 0, 59, + 71, 73, 70, 56, 55, 97, 62, 0, 71, 0, 244, 0, 0, 85, + 129, 149, 95, 136, 155, 99, 139, 157, 106, 144, 162, 113, 150, 166, + 116, 152, 168, 127, 160, 175, 138, 168, 182, 148, 176, 188, 159, 184, + 195, 170, 192, 202, 180, 199, 208, 184, 202, 210, 191, 207, 215, 201, + 215, 221, 212, 223, 228, 223, 231, 235, 226, 227, 226, 226, 234, 237, + 233, 239, 241, 240, 244, 246, 244, 247, 248, 255, 255, 255, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 0, 0, + 0, 0, 62, 0, 71, 0, 0, 5, 255, 96, 100, 141, 100, 105, + 158, 168, 37, 41, 132, 192, 164, 112, 44, 207, 102, 99, 0, 56, + 16, 84, 116, 239, 199, 141, 65, 110, 232, 248, 25, 141, 193, 161, + 82, 113, 108, 202, 32, 55, 229, 210, 73, 61, 41, 164, 88, 102, + 181, 10, 41, 96, 179, 91, 106, 35, 240, 5, 135, 143, 137, 242, + 87, 123, 246, 33, 190, 81, 108, 163, 237, 198, 14, 30, 113, 233, + 131, 78, 115, 72, 11, 115, 87, 101, 19, 124, 51, 66, 74, 8, + 19, 16, 67, 100, 74, 133, 50, 15, 101, 135, 56, 11, 74, 6, + 143, 49, 126, 106, 56, 8, 145, 67, 9, 152, 48, 139, 155, 5, + 22, 13, 74, 115, 161, 41, 147, 101, 13, 130, 57, 132, 170, 40, + 167, 155, 0, 94, 57, 3, 178, 48, 183, 181, 57, 160, 186, 40, + 19, 141, 189, 0, 69, 192, 40, 16, 195, 155, 185, 199, 41, 201, + 189, 191, 205, 193, 188, 131, 210, 49, 175, 88, 209, 214, 38, 19, + 3, 11, 19, 111, 127, 60, 219, 39, 55, 204, 19, 11, 6, 100, + 5, 10, 227, 228, 37, 163, 0, 239, 117, 56, 238, 243, 49, 195, + 177, 247, 48, 158, 56, 251, 50, 216, 254, 197, 56, 128, 107, 158, + 2, 125, 171, 114, 92, 218, 246, 96, 66, 3, 4, 50, 134, 176, + 145, 6, 97, 64, 144, 24, 19, 136, 108, 91, 177, 160, 0, 194, + 19, 253, 0, 216, 107, 214, 224, 192, 129, 5, 16, 83, 255, 244, + 43, 213, 195, 24, 159, 27, 169, 64, 230, 88, 208, 227, 129, 182, + 54, 4, 89, 158, 24, 181, 163, 199, 1, 155, 52, 233, 8, 130, + 176, 83, 24, 128, 137, 50, 18, 32, 48, 48, 114, 11, 173, 137, + 19, 110, 4, 64, 105, 1, 194, 30, 140, 68, 15, 24, 24, 224, + 50, 76, 70, 0, 11, 171, 54, 26, 160, 181, 194, 149, 148, 40, + 174, 148, 122, 64, 180, 208, 161, 17, 207, 112, 164, 1, 128, 96, + 148, 78, 18, 21, 194, 33, 229, 51, 247, 65, 133, 97, 5, 250, + 69, 229, 100, 34, 220, 128, 166, 116, 190, 62, 8, 167, 195, 170, + 47, 163, 0, 130, 90, 152, 11, 160, 173, 170, 27, 154, 26, 91, + 232, 151, 171, 18, 14, 162, 253, 98, 170, 18, 70, 171, 64, 219, + 10, 67, 136, 134, 187, 116, 75, 180, 46, 179, 174, 135, 4, 189, + 229, 231, 78, 40, 10, 62, 226, 164, 172, 64, 240, 167, 170, 10, + 18, 124, 188, 10, 107, 65, 193, 94, 11, 93, 171, 28, 248, 17, + 239, 46, 140, 78, 97, 34, 25, 153, 36, 99, 65, 130, 7, 203, + 183, 168, 51, 34, 136, 25, 140, 10, 6, 16, 28, 255, 145, 241, + 230, 140, 10, 66, 178, 167, 112, 48, 192, 128, 129, 9, 31, 141, + 84, 138, 63, 163, 162, 2, 203, 206, 240, 56, 55, 98, 192, 188, + 15, 185, 50, 160, 6, 0, 125, 62, 33, 214, 195, 33, 5, 24, + 184, 25, 231, 14, 201, 245, 144, 23, 126, 104, 228, 0, 145, 2, + 13, 140, 244, 212, 17, 21, 20, 176, 159, 17, 95, 225, 160, 128, + 16, 1, 32, 224, 142, 32, 227, 125, 87, 64, 0, 16, 54, 129, + 205, 2, 141, 76, 53, 130, 103, 37, 166, 64, 144, 107, 78, 196, + 5, 192, 0, 54, 50, 229, 9, 141, 49, 84, 194, 35, 12, 196, + 153, 48, 192, 137, 57, 84, 24, 7, 87, 159, 249, 240, 215, 143, + 105, 241, 118, 149, 9, 139, 4, 64, 203, 141, 35, 140, 129, 131, + 16, 222, 125, 231, 128, 2, 238, 17, 152, 66, 3, 5, 56, 224, + 159, 103, 16, 76, 25, 75, 5, 11, 164, 215, 96, 9, 14, 16, + 36, 225, 15, 11, 40, 144, 192, 156, 41, 10, 178, 199, 3, 66, + 64, 80, 193, 3, 124, 90, 48, 129, 129, 102, 177, 18, 192, 154, + 49, 84, 240, 208, 92, 22, 149, 96, 39, 9, 31, 74, 17, 94, + 3, 8, 177, 199, 72, 59, 85, 76, 25, 216, 8, 139, 194, 197, + 138, 163, 69, 96, 115, 0, 147, 72, 72, 84, 28, 14, 79, 86, + 233, 230, 23, 113, 26, 160, 128, 3, 10, 58, 129, 103, 14, 159, + 214, 163, 146, 117, 238, 213, 154, 128, 151, 109, 84, 64, 217, 13, + 27, 10, 228, 39, 2, 235, 164, 168, 74, 8, 0, 59, }; /* ** WEBPAGE: logo **