Index: src/tar.c ================================================================== --- src/tar.c +++ src/tar.c @@ -27,31 +27,253 @@ */ static struct tarball_t { unsigned char *aHdr; /* Space for building headers */ char *zSpaces; /* Spaces for padding */ char *zPrevDir; /* Name of directory for previous entry */ + int nPrevDirAlloc; /* size of zPrevDir */ + Blob pax; /* PAX data */ } tball; + + +/* +** field lengths of 'ustar' name and prefix fields. +*/ +#define USTAR_NAME_LEN 100 +#define USTAR_PREFIX_LEN 155 + /* ** Begin the process of generating a tarball. ** ** Initialize the GZIP compressor and the table of directory names. */ static void tar_begin(void){ assert( tball.aHdr==0 ); - tball.aHdr = fossil_malloc(512+512+256); - memset(tball.aHdr, 0, 512+512+256); + tball.aHdr = fossil_malloc(512+512); + memset(tball.aHdr, 0, 512+512); tball.zSpaces = (char*)&tball.aHdr[512]; - tball.zPrevDir = (char*)&tball.zSpaces[512]; + /* zPrevDir init */ + tball.zPrevDir = NULL; + tball.nPrevDirAlloc = 0; + /* scratch buffer init */ + blob_zero(&tball.pax); + memcpy(&tball.aHdr[108], "0000000", 8); /* Owner ID */ memcpy(&tball.aHdr[116], "0000000", 8); /* Group ID */ - memcpy(&tball.aHdr[257], "ustar ", 7); /* Format */ + memcpy(&tball.aHdr[257], "ustar\00000", 8); /* POSIX.1 format */ + memcpy(&tball.aHdr[265], "nobody", 7); /* Owner name */ + memcpy(&tball.aHdr[297], "nobody", 7); /* Group name */ gzip_begin(); db_multi_exec( "CREATE TEMP TABLE dir(name UNIQUE);" ); } + + +/* +** verify that lla characters in 'zName' are in the +** ISO646 (=ASCII) character set. +*/ +static int is_iso646_name( + const char *zName, /* file path */ + int nName /* path length */ +){ + int i; + for(i = 0; i < nName; i++){ + unsigned char c = (unsigned char)zName[i]; + if( c>0x7e ) return 0; + } + return 1; +} + + +/* +** copy string pSrc into pDst, truncating or padding with 0 if necessary +*/ +static void padded_copy( + char *pDest, + int nDest, + const char *pSrc, + int nSrc +){ + if(nSrc >= nDest){ + memcpy(pDest, pSrc, nDest); + }else{ + memcpy(pDest, pSrc, nSrc); + memset(&pDest[nSrc], 0, nDest - nSrc); + } +} + + + +/****************************************************************************** +** +** The 'tar' format has evolved over time. Initially the name was stored +** in a 100 byte null-terminated field 'name'. File path names were +** limited to 99 bytes. +** +** The Posix.1 'ustar' format added a 155 byte field 'prefix', allowing +** for up to 255 characters to be stored. The full file path is formed by +** concatenating the field 'prefix', a slash, and the field 'name'. This +** gives some measure of compatibility with programs that only understand +** the oldest format. +** +** The latest Posix extension is called the 'pax Interchange Format'. +** It removes all the limitations of the previous two formats by allowing +** the storage of arbitrary-length attributes in a separate object that looks +** like a file to programs that do not understand this extension. So the +** contents of the 'name' and 'prefix' fields should contain values that allow +** versions of tar that do not understand this extension to still do +** something useful. +** +******************************************************************************/ + +/* +** The position we use to split a file path into the 'name' and 'prefix' +** fields needs to meet the following criteria: +** +** - not at the beginning or end of the string +** - the position must contain a slash +** - no more than 100 characters follow the slash +** - no more than 155 characters precede it +** +** The routine 'find_split_pos' finds a split position. It will meet the +** criteria of listed above if such a position exists. If no such +** position exists it generates one that useful for generating the +** values used for backward compatibility. +*/ +static int find_split_pos( + const char *zName, /* file path */ + int nName /* path length */ +){ + int i, split = 0; + /* only search if the string needs splitting */ + if(nName > USTAR_NAME_LEN){ + for(i = 1; i+1 < nName; i++) + if(zName[i] == '/'){ + split = i+1; + /* if the split position is within USTAR_NAME_LEN bytes from + * the end we can quit */ + if(nName - split <= USTAR_NAME_LEN) break; + } + } + return split; +} + + +/* +** attempt to split the file name path to meet 'ustar' header +** criteria. +*/ +static int tar_split_path( + const char *zName, /* path */ + int nName, /* path length */ + char *pName, /* name field */ + char *pPrefix /* prefix field */ +){ + int split = find_split_pos(zName, nName); + /* check whether both pieces fit */ + if(nName - split > USTAR_NAME_LEN || split > USTAR_PREFIX_LEN+1){ + return 0; /* no */ + } + + /* extract name */ + padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split); + + /* extract prefix */ + padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split - 1 : 0)); + + return 1; /* success */ +} + + +/* +** When using an extension header we still need to put something +** reasonable in the name and prefix fields. This is probably as +** good as it gets. +*/ +static void approximate_split_path( + const char *zName, /* path */ + int nName, /* path length */ + char *pName, /* name field */ + char *pPrefix, /* prefix field */ + int bHeader /* is this a 'x' type tar header? */ +){ + int split; + + /* if this is a Pax Interchange header prepend "PaxHeader/" + ** so we can tell files apart from metadata */ + if( bHeader ){ + int n; + blob_reset(&tball.pax); + blob_appendf(&tball.pax, "PaxHeader/%*.*s", nName, nName, zName); + zName = blob_buffer(&tball.pax); + nName = blob_size(&tball.pax); + } + + /* find the split position */ + split = find_split_pos(zName, nName); + + /* extract a name, truncate if needed */ + padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split); + + /* extract a prefix field, truncate when needed */ + padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split-1 : 0)); +} + + +/* +** add a Pax Interchange header to the scratch buffer +** +** format: <length> <key>=<value>\n +** the tricky part is that each header contains its own +** size in decimal, counting that length. +*/ +static void add_pax_header( + const char *zField, + const char *zValue, + int nValue +){ + /* calculate length without length field */ + int blen = strlen(zField) + nValue + 3; + /* calculate the length of the length field */ + int next10 = 1; + int n; + for(n = blen; n > 0; ){ + blen++; next10 *= 10; + n /= 10; + } + /* adding the length extended the length field? */ + if(blen > next10){ + blen++; + } + /* build the string */ + blob_appendf(&tball.pax, "%d %s=%*.*s\n", blen, zField, nValue, nValue, zValue); + /* this _must_ be right */ + if(blob_size(&tball.pax) != blen){ + fossil_fatal("internal error: PAX tar header has bad length"); + } +} + + +/* +** set the header type, calculate the checksum and output +** the header +*/ +static void cksum_and_write_header( + char cType +){ + unsigned int cksum = 0; + int i; + memset(&tball.aHdr[148], ' ', 8); + tball.aHdr[156] = cType; + for(i=0; i<512; i++) cksum += tball.aHdr[i]; + sqlite3_snprintf(8, (char*)&tball.aHdr[148], "%07o", cksum); + tball.aHdr[155] = 0; + gzip_step((char*)tball.aHdr, 512); +} + /* ** Build a header for a file or directory and write that header ** into the growing tarball. */ @@ -59,33 +281,49 @@ const char *zName, /* Name of the object */ int nName, /* Number of characters in zName */ int iMode, /* Mode. 0644 or 0755 */ unsigned int mTime, /* File modification time */ int iSize, /* Size of the object in bytes */ - int iType /* Type of object. 0==file. 5==directory */ + char cType /* Type of object. '0'==file. '5'==directory */ ){ - unsigned int cksum = 0; - int i; - if( nName>100 ){ - memcpy(&tball.aHdr[345], zName, nName-100); - memcpy(tball.aHdr, &zName[nName-100], 100); - memset(&tball.aHdr[245+nName], 0, 267-nName); - }else{ - memcpy(tball.aHdr, zName, nName); - memset(&tball.aHdr[nName], 0, 100-nName); - memset(&tball.aHdr[345], 0, 167); - } + /* set mode and modification time */ sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode); - sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize); sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime); - memset(&tball.aHdr[148], ' ', 8); - tball.aHdr[156] = iType + '0'; - for(i=0; i<512; i++) cksum += tball.aHdr[i]; - sqlite3_snprintf(7, (char*)&tball.aHdr[148], "%06o", cksum); - tball.aHdr[154] = 0; - gzip_step((char*)tball.aHdr, 512); + + /* see if we need to output a Pax Interchange Header */ + if( !is_iso646_name(zName, nName) || + !tar_split_path(zName, nName, tball.aHdr, &tball.aHdr[345]) ){ + int lastPage; + /* add a file name for interoperability with older programs */ + approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 1); + + /* generate the Pax Interchange path header */ + blob_reset(&tball.pax); + add_pax_header("path", zName, nName); + + /* set the header length, and write the header */ + sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", + blob_size(&tball.pax)); + cksum_and_write_header('x'); + + /* write the Pax Interchange data */ + gzip_step(blob_buffer(&tball.pax), blob_size(&tball.pax)); + lastPage = blob_size(&tball.pax) % 512; + if( lastPage!=0 ){ + gzip_step(tball.zSpaces, 512 - lastPage); + } + + /* generate an approximate path for the regular header */ + approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 0); + } + /* set the size */ + sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize); + + /* write the regular header */ + cksum_and_write_header(cType); } + /* ** Recursively add an directory entry for the given file if those ** directories have not previously been seen. */ @@ -95,18 +333,27 @@ unsigned int mTime /* Modification time */ ){ int i; for(i=nName-1; i>0 && zName[i]!='/'; i--){} if( i<=0 ) return; - if( tball.zPrevDir[i]==0 && memcmp(tball.zPrevDir, zName, i)==0 ) return; + if( i < tball.nPrevDirAlloc && tball.zPrevDir[i]==0 && + memcmp(tball.zPrevDir, zName, i)==0 ) return; db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName); if( sqlite3_changes(g.db)==0 ) return; tar_add_directory_of(zName, i-1, mTime); - tar_add_header(zName, i, 0755, mTime, 0, 5); + tar_add_header(zName, i, 0755, mTime, 0, '5'); + if( i >= tball.nPrevDirAlloc ){ + int nsize = tball.nPrevDirAlloc * 2; + if(i+1 > nsize) + nsize = i+1; + tball.zPrevDir = fossil_realloc(tball.zPrevDir, nsize); + tball.nPrevDirAlloc = nsize; + } memcpy(tball.zPrevDir, zName, i); tball.zPrevDir[i] = 0; } + /* ** Add a single file to the growing tarball. */ static void tar_add_file( @@ -117,15 +364,13 @@ ){ int nName = strlen(zName); int n = blob_size(pContent); int lastPage; - if( nName>=250 ){ - fossil_fatal("name too long for ustar format: \"%s\"", zName); - } + /* length check moved to tar_split_path */ tar_add_directory_of(zName, nName, mTime); - tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, 0); + tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, '0'); if( n ){ gzip_step(blob_buffer(pContent), n); lastPage = n % 512; if( lastPage!=0 ){ gzip_step(tball.zSpaces, 512 - lastPage); @@ -142,10 +387,14 @@ gzip_step(tball.zSpaces, 512); gzip_step(tball.zSpaces, 512); gzip_finish(pOut); fossil_free(tball.aHdr); tball.aHdr = 0; + fossil_free(tball.zPrevDir); + tball.zPrevDir = NULL; + tball.nPrevDirAlloc = 0; + blob_reset(&tball.pax); } /* ** COMMAND: test-tarball