Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Changes In Branch ge-tarfix Excluding Merge-Ins
This is equivalent to a diff from ba15af450d to 02ce8b4a46
2011-07-25
| ||
11:21 | Merge the ge-tarfix changes into trunk. This fixes tarball generation for repos that have very long filenames. check-in: a26940c22e user: drh tags: trunk | |
2011-07-24
| ||
19:47 | Use a Blob object rather than a custom printf function in order to construct the PAX header for tarballs. Closed-Leaf check-in: 02ce8b4a46 user: drh tags: ge-tarfix | |
00:36 | Improvements to tar generation. Uses the format documented in Posix.1-2008 to handle long file names and UTF-8. check-in: 2ef37b3b2a user: ge tags: ge-tarfix | |
2011-07-23
| ||
22:13 | Fix a harmless compiler warning. check-in: ba15af450d user: drh tags: trunk | |
22:05 | Improvements to the automatic background color chooser. Provide a setting which alters the algorithm to work with a light-color foreground. check-in: ce4a44d931 user: drh tags: trunk | |
Changes to src/tar.c.
25 25 /* 26 26 ** State information for the tarball builder. 27 27 */ 28 28 static struct tarball_t { 29 29 unsigned char *aHdr; /* Space for building headers */ 30 30 char *zSpaces; /* Spaces for padding */ 31 31 char *zPrevDir; /* Name of directory for previous entry */ 32 + int nPrevDirAlloc; /* size of zPrevDir */ 33 + Blob pax; /* PAX data */ 32 34 } tball; 35 + 36 + 37 +/* 38 +** field lengths of 'ustar' name and prefix fields. 39 +*/ 40 +#define USTAR_NAME_LEN 100 41 +#define USTAR_PREFIX_LEN 155 42 + 33 43 34 44 /* 35 45 ** Begin the process of generating a tarball. 36 46 ** 37 47 ** Initialize the GZIP compressor and the table of directory names. 38 48 */ 39 49 static void tar_begin(void){ 40 50 assert( tball.aHdr==0 ); 41 - tball.aHdr = fossil_malloc(512+512+256); 42 - memset(tball.aHdr, 0, 512+512+256); 51 + tball.aHdr = fossil_malloc(512+512); 52 + memset(tball.aHdr, 0, 512+512); 43 53 tball.zSpaces = (char*)&tball.aHdr[512]; 44 - tball.zPrevDir = (char*)&tball.zSpaces[512]; 54 + /* zPrevDir init */ 55 + tball.zPrevDir = NULL; 56 + tball.nPrevDirAlloc = 0; 57 + /* scratch buffer init */ 58 + blob_zero(&tball.pax); 59 + 45 60 memcpy(&tball.aHdr[108], "0000000", 8); /* Owner ID */ 46 61 memcpy(&tball.aHdr[116], "0000000", 8); /* Group ID */ 47 - memcpy(&tball.aHdr[257], "ustar ", 7); /* Format */ 62 + memcpy(&tball.aHdr[257], "ustar\00000", 8); /* POSIX.1 format */ 63 + memcpy(&tball.aHdr[265], "nobody", 7); /* Owner name */ 64 + memcpy(&tball.aHdr[297], "nobody", 7); /* Group name */ 48 65 gzip_begin(); 49 66 db_multi_exec( 50 67 "CREATE TEMP TABLE dir(name UNIQUE);" 51 68 ); 52 69 } 70 + 71 + 72 +/* 73 +** verify that lla characters in 'zName' are in the 74 +** ISO646 (=ASCII) character set. 75 +*/ 76 +static int is_iso646_name( 77 + const char *zName, /* file path */ 78 + int nName /* path length */ 79 +){ 80 + int i; 81 + for(i = 0; i < nName; i++){ 82 + unsigned char c = (unsigned char)zName[i]; 83 + if( c>0x7e ) return 0; 84 + } 85 + return 1; 86 +} 87 + 88 + 89 +/* 90 +** copy string pSrc into pDst, truncating or padding with 0 if necessary 91 +*/ 92 +static void padded_copy( 93 + char *pDest, 94 + int nDest, 95 + const char *pSrc, 96 + int nSrc 97 +){ 98 + if(nSrc >= nDest){ 99 + memcpy(pDest, pSrc, nDest); 100 + }else{ 101 + memcpy(pDest, pSrc, nSrc); 102 + memset(&pDest[nSrc], 0, nDest - nSrc); 103 + } 104 +} 105 + 106 + 107 + 108 +/****************************************************************************** 109 +** 110 +** The 'tar' format has evolved over time. Initially the name was stored 111 +** in a 100 byte null-terminated field 'name'. File path names were 112 +** limited to 99 bytes. 113 +** 114 +** The Posix.1 'ustar' format added a 155 byte field 'prefix', allowing 115 +** for up to 255 characters to be stored. The full file path is formed by 116 +** concatenating the field 'prefix', a slash, and the field 'name'. This 117 +** gives some measure of compatibility with programs that only understand 118 +** the oldest format. 119 +** 120 +** The latest Posix extension is called the 'pax Interchange Format'. 121 +** It removes all the limitations of the previous two formats by allowing 122 +** the storage of arbitrary-length attributes in a separate object that looks 123 +** like a file to programs that do not understand this extension. So the 124 +** contents of the 'name' and 'prefix' fields should contain values that allow 125 +** versions of tar that do not understand this extension to still do 126 +** something useful. 127 +** 128 +******************************************************************************/ 129 + 130 +/* 131 +** The position we use to split a file path into the 'name' and 'prefix' 132 +** fields needs to meet the following criteria: 133 +** 134 +** - not at the beginning or end of the string 135 +** - the position must contain a slash 136 +** - no more than 100 characters follow the slash 137 +** - no more than 155 characters precede it 138 +** 139 +** The routine 'find_split_pos' finds a split position. It will meet the 140 +** criteria of listed above if such a position exists. If no such 141 +** position exists it generates one that useful for generating the 142 +** values used for backward compatibility. 143 +*/ 144 +static int find_split_pos( 145 + const char *zName, /* file path */ 146 + int nName /* path length */ 147 +){ 148 + int i, split = 0; 149 + /* only search if the string needs splitting */ 150 + if(nName > USTAR_NAME_LEN){ 151 + for(i = 1; i+1 < nName; i++) 152 + if(zName[i] == '/'){ 153 + split = i+1; 154 + /* if the split position is within USTAR_NAME_LEN bytes from 155 + * the end we can quit */ 156 + if(nName - split <= USTAR_NAME_LEN) break; 157 + } 158 + } 159 + return split; 160 +} 161 + 162 + 163 +/* 164 +** attempt to split the file name path to meet 'ustar' header 165 +** criteria. 166 +*/ 167 +static int tar_split_path( 168 + const char *zName, /* path */ 169 + int nName, /* path length */ 170 + char *pName, /* name field */ 171 + char *pPrefix /* prefix field */ 172 +){ 173 + int split = find_split_pos(zName, nName); 174 + /* check whether both pieces fit */ 175 + if(nName - split > USTAR_NAME_LEN || split > USTAR_PREFIX_LEN+1){ 176 + return 0; /* no */ 177 + } 178 + 179 + /* extract name */ 180 + padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split); 181 + 182 + /* extract prefix */ 183 + padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split - 1 : 0)); 184 + 185 + return 1; /* success */ 186 +} 187 + 188 + 189 +/* 190 +** When using an extension header we still need to put something 191 +** reasonable in the name and prefix fields. This is probably as 192 +** good as it gets. 193 +*/ 194 +static void approximate_split_path( 195 + const char *zName, /* path */ 196 + int nName, /* path length */ 197 + char *pName, /* name field */ 198 + char *pPrefix, /* prefix field */ 199 + int bHeader /* is this a 'x' type tar header? */ 200 +){ 201 + int split; 202 + 203 + /* if this is a Pax Interchange header prepend "PaxHeader/" 204 + ** so we can tell files apart from metadata */ 205 + if( bHeader ){ 206 + int n; 207 + blob_reset(&tball.pax); 208 + blob_appendf(&tball.pax, "PaxHeader/%*.*s", nName, nName, zName); 209 + zName = blob_buffer(&tball.pax); 210 + nName = blob_size(&tball.pax); 211 + } 212 + 213 + /* find the split position */ 214 + split = find_split_pos(zName, nName); 215 + 216 + /* extract a name, truncate if needed */ 217 + padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split); 218 + 219 + /* extract a prefix field, truncate when needed */ 220 + padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split-1 : 0)); 221 +} 222 + 223 + 224 +/* 225 +** add a Pax Interchange header to the scratch buffer 226 +** 227 +** format: <length> <key>=<value>\n 228 +** the tricky part is that each header contains its own 229 +** size in decimal, counting that length. 230 +*/ 231 +static void add_pax_header( 232 + const char *zField, 233 + const char *zValue, 234 + int nValue 235 +){ 236 + /* calculate length without length field */ 237 + int blen = strlen(zField) + nValue + 3; 238 + /* calculate the length of the length field */ 239 + int next10 = 1; 240 + int n; 241 + for(n = blen; n > 0; ){ 242 + blen++; next10 *= 10; 243 + n /= 10; 244 + } 245 + /* adding the length extended the length field? */ 246 + if(blen > next10){ 247 + blen++; 248 + } 249 + /* build the string */ 250 + blob_appendf(&tball.pax, "%d %s=%*.*s\n", blen, zField, nValue, nValue, zValue); 251 + /* this _must_ be right */ 252 + if(blob_size(&tball.pax) != blen){ 253 + fossil_fatal("internal error: PAX tar header has bad length"); 254 + } 255 +} 256 + 257 + 258 +/* 259 +** set the header type, calculate the checksum and output 260 +** the header 261 +*/ 262 +static void cksum_and_write_header( 263 + char cType 264 +){ 265 + unsigned int cksum = 0; 266 + int i; 267 + memset(&tball.aHdr[148], ' ', 8); 268 + tball.aHdr[156] = cType; 269 + for(i=0; i<512; i++) cksum += tball.aHdr[i]; 270 + sqlite3_snprintf(8, (char*)&tball.aHdr[148], "%07o", cksum); 271 + tball.aHdr[155] = 0; 272 + gzip_step((char*)tball.aHdr, 512); 273 +} 274 + 53 275 54 276 /* 55 277 ** Build a header for a file or directory and write that header 56 278 ** into the growing tarball. 57 279 */ 58 280 static void tar_add_header( 59 281 const char *zName, /* Name of the object */ 60 282 int nName, /* Number of characters in zName */ 61 283 int iMode, /* Mode. 0644 or 0755 */ 62 284 unsigned int mTime, /* File modification time */ 63 285 int iSize, /* Size of the object in bytes */ 64 - int iType /* Type of object. 0==file. 5==directory */ 286 + char cType /* Type of object. '0'==file. '5'==directory */ 65 287 ){ 66 - unsigned int cksum = 0; 67 - int i; 68 - if( nName>100 ){ 69 - memcpy(&tball.aHdr[345], zName, nName-100); 70 - memcpy(tball.aHdr, &zName[nName-100], 100); 71 - memset(&tball.aHdr[245+nName], 0, 267-nName); 72 - }else{ 73 - memcpy(tball.aHdr, zName, nName); 74 - memset(&tball.aHdr[nName], 0, 100-nName); 75 - memset(&tball.aHdr[345], 0, 167); 76 - } 288 + /* set mode and modification time */ 77 289 sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode); 78 - sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize); 79 290 sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime); 80 - memset(&tball.aHdr[148], ' ', 8); 81 - tball.aHdr[156] = iType + '0'; 82 - for(i=0; i<512; i++) cksum += tball.aHdr[i]; 83 - sqlite3_snprintf(7, (char*)&tball.aHdr[148], "%06o", cksum); 84 - tball.aHdr[154] = 0; 85 - gzip_step((char*)tball.aHdr, 512); 291 + 292 + /* see if we need to output a Pax Interchange Header */ 293 + if( !is_iso646_name(zName, nName) || 294 + !tar_split_path(zName, nName, tball.aHdr, &tball.aHdr[345]) ){ 295 + int lastPage; 296 + /* add a file name for interoperability with older programs */ 297 + approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 1); 298 + 299 + /* generate the Pax Interchange path header */ 300 + blob_reset(&tball.pax); 301 + add_pax_header("path", zName, nName); 302 + 303 + /* set the header length, and write the header */ 304 + sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", 305 + blob_size(&tball.pax)); 306 + cksum_and_write_header('x'); 307 + 308 + /* write the Pax Interchange data */ 309 + gzip_step(blob_buffer(&tball.pax), blob_size(&tball.pax)); 310 + lastPage = blob_size(&tball.pax) % 512; 311 + if( lastPage!=0 ){ 312 + gzip_step(tball.zSpaces, 512 - lastPage); 313 + } 314 + 315 + /* generate an approximate path for the regular header */ 316 + approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 0); 317 + } 318 + /* set the size */ 319 + sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize); 320 + 321 + /* write the regular header */ 322 + cksum_and_write_header(cType); 86 323 } 324 + 87 325 88 326 /* 89 327 ** Recursively add an directory entry for the given file if those 90 328 ** directories have not previously been seen. 91 329 */ 92 330 static void tar_add_directory_of( 93 331 const char *zName, /* Name of directory including final "/" */ 94 332 int nName, /* Characters in zName */ 95 333 unsigned int mTime /* Modification time */ 96 334 ){ 97 335 int i; 98 336 for(i=nName-1; i>0 && zName[i]!='/'; i--){} 99 337 if( i<=0 ) return; 100 - if( tball.zPrevDir[i]==0 && memcmp(tball.zPrevDir, zName, i)==0 ) return; 338 + if( i < tball.nPrevDirAlloc && tball.zPrevDir[i]==0 && 339 + memcmp(tball.zPrevDir, zName, i)==0 ) return; 101 340 db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName); 102 341 if( sqlite3_changes(g.db)==0 ) return; 103 342 tar_add_directory_of(zName, i-1, mTime); 104 - tar_add_header(zName, i, 0755, mTime, 0, 5); 343 + tar_add_header(zName, i, 0755, mTime, 0, '5'); 344 + if( i >= tball.nPrevDirAlloc ){ 345 + int nsize = tball.nPrevDirAlloc * 2; 346 + if(i+1 > nsize) 347 + nsize = i+1; 348 + tball.zPrevDir = fossil_realloc(tball.zPrevDir, nsize); 349 + tball.nPrevDirAlloc = nsize; 350 + } 105 351 memcpy(tball.zPrevDir, zName, i); 106 352 tball.zPrevDir[i] = 0; 107 353 } 354 + 108 355 109 356 /* 110 357 ** Add a single file to the growing tarball. 111 358 */ 112 359 static void tar_add_file( 113 360 const char *zName, /* Name of the file. nul-terminated */ 114 361 Blob *pContent, /* Content of the file */ ................................................................................ 115 362 int isExe, /* True for executable files */ 116 363 unsigned int mTime /* Last modification time of the file */ 117 364 ){ 118 365 int nName = strlen(zName); 119 366 int n = blob_size(pContent); 120 367 int lastPage; 121 368 122 - if( nName>=250 ){ 123 - fossil_fatal("name too long for ustar format: \"%s\"", zName); 124 - } 369 + /* length check moved to tar_split_path */ 125 370 tar_add_directory_of(zName, nName, mTime); 126 - tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, 0); 371 + tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, '0'); 127 372 if( n ){ 128 373 gzip_step(blob_buffer(pContent), n); 129 374 lastPage = n % 512; 130 375 if( lastPage!=0 ){ 131 376 gzip_step(tball.zSpaces, 512 - lastPage); 132 377 } 133 378 } ................................................................................ 140 385 static void tar_finish(Blob *pOut){ 141 386 db_multi_exec("DROP TABLE dir"); 142 387 gzip_step(tball.zSpaces, 512); 143 388 gzip_step(tball.zSpaces, 512); 144 389 gzip_finish(pOut); 145 390 fossil_free(tball.aHdr); 146 391 tball.aHdr = 0; 392 + fossil_free(tball.zPrevDir); 393 + tball.zPrevDir = NULL; 394 + tball.nPrevDirAlloc = 0; 395 + blob_reset(&tball.pax); 147 396 } 148 397 149 398 150 399 /* 151 400 ** COMMAND: test-tarball 152 401 ** 153 402 ** Generate a GZIP-compresssed tarball in the file given by the first argument