Changes On Branch ge-tarfix
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch ge-tarfix Excluding Merge-Ins

This is equivalent to a diff from ba15af450d to 02ce8b4a46

2011-07-25
11:21
Merge the ge-tarfix changes into trunk. This fixes tarball generation for repos that have very long filenames. check-in: a26940c22e user: drh tags: trunk
2011-07-24
19:47
Use a Blob object rather than a custom printf function in order to construct the PAX header for tarballs. Closed-Leaf check-in: 02ce8b4a46 user: drh tags: ge-tarfix
00:36
Improvements to tar generation. Uses the format documented in Posix.1-2008 to handle long file names and UTF-8. check-in: 2ef37b3b2a user: ge tags: ge-tarfix
2011-07-23
22:13
Fix a harmless compiler warning. check-in: ba15af450d user: drh tags: trunk
22:05
Improvements to the automatic background color chooser. Provide a setting which alters the algorithm to work with a light-color foreground. check-in: ce4a44d931 user: drh tags: trunk

Changes to src/tar.c.

    25     25   /*
    26     26   ** State information for the tarball builder.
    27     27   */
    28     28   static struct tarball_t {
    29     29     unsigned char *aHdr;      /* Space for building headers */
    30     30     char *zSpaces;            /* Spaces for padding */
    31     31     char *zPrevDir;           /* Name of directory for previous entry */
           32  +  int nPrevDirAlloc;        /* size of zPrevDir */
           33  +  Blob pax;                 /* PAX data */
    32     34   } tball;
           35  +
           36  +
           37  +/*
           38  +** field lengths of 'ustar' name and prefix fields.
           39  +*/
           40  +#define USTAR_NAME_LEN    100
           41  +#define USTAR_PREFIX_LEN  155
           42  +
    33     43   
    34     44   /*
    35     45   ** Begin the process of generating a tarball.
    36     46   **
    37     47   ** Initialize the GZIP compressor and the table of directory names.
    38     48   */
    39     49   static void tar_begin(void){
    40     50     assert( tball.aHdr==0 );
    41         -  tball.aHdr = fossil_malloc(512+512+256);
    42         -  memset(tball.aHdr, 0, 512+512+256);
           51  +  tball.aHdr = fossil_malloc(512+512);
           52  +  memset(tball.aHdr, 0, 512+512);
    43     53     tball.zSpaces = (char*)&tball.aHdr[512];
    44         -  tball.zPrevDir = (char*)&tball.zSpaces[512];
           54  +  /* zPrevDir init */
           55  +  tball.zPrevDir = NULL;
           56  +  tball.nPrevDirAlloc = 0;
           57  +  /* scratch buffer init */
           58  +  blob_zero(&tball.pax);
           59  +
    45     60     memcpy(&tball.aHdr[108], "0000000", 8);  /* Owner ID */
    46     61     memcpy(&tball.aHdr[116], "0000000", 8);  /* Group ID */
    47         -  memcpy(&tball.aHdr[257], "ustar  ", 7);  /* Format */
           62  +  memcpy(&tball.aHdr[257], "ustar\00000", 8);  /* POSIX.1 format */
           63  +  memcpy(&tball.aHdr[265], "nobody", 7);   /* Owner name */
           64  +  memcpy(&tball.aHdr[297], "nobody", 7);   /* Group name */
    48     65     gzip_begin();
    49     66     db_multi_exec(
    50     67       "CREATE TEMP TABLE dir(name UNIQUE);"
    51     68     );
    52     69   }
           70  +
           71  +
           72  +/*
           73  +** verify that lla characters in 'zName' are in the
           74  +** ISO646 (=ASCII) character set.
           75  +*/
           76  +static int is_iso646_name(
           77  +  const char *zName,     /* file path */
           78  +  int nName              /* path length */
           79  +){
           80  +  int i;
           81  +  for(i = 0; i < nName; i++){
           82  +    unsigned char c = (unsigned char)zName[i];
           83  +    if( c>0x7e ) return 0;
           84  +  }
           85  +  return 1;
           86  +}
           87  +
           88  +
           89  +/*
           90  +**   copy string pSrc into pDst, truncating or padding with 0 if necessary
           91  +*/
           92  +static void padded_copy(
           93  +  char *pDest,
           94  +  int nDest,
           95  +  const char *pSrc,
           96  +  int nSrc
           97  +){
           98  +  if(nSrc >= nDest){
           99  +    memcpy(pDest, pSrc, nDest);
          100  +  }else{
          101  +    memcpy(pDest, pSrc, nSrc);
          102  +    memset(&pDest[nSrc], 0, nDest - nSrc);
          103  +  }
          104  +}
          105  +
          106  +
          107  +
          108  +/******************************************************************************
          109  +**
          110  +** The 'tar' format has evolved over time. Initially the name was stored
          111  +** in a 100 byte null-terminated field 'name'. File path names were
          112  +** limited to 99 bytes.
          113  +**
          114  +** The Posix.1 'ustar' format added a 155 byte field 'prefix', allowing
          115  +** for up to 255 characters to be stored. The full file path is formed by
          116  +** concatenating the field 'prefix', a slash, and the field 'name'. This
          117  +** gives some measure of compatibility with programs that only understand
          118  +** the oldest format.
          119  +**
          120  +** The latest Posix extension is called the 'pax Interchange Format'.
          121  +** It removes all the limitations of the previous two formats by allowing
          122  +** the storage of arbitrary-length attributes in a separate object that looks
          123  +** like a file to programs that do not understand this extension. So the
          124  +** contents of the 'name' and 'prefix' fields should contain values that allow
          125  +** versions of tar that do not understand this extension to still do
          126  +** something useful.
          127  +**
          128  +******************************************************************************/
          129  +
          130  +/*
          131  +** The position we use to split a file path into the 'name' and 'prefix'
          132  +** fields needs to meet the following criteria:
          133  +**
          134  +**   - not at the beginning or end of the string
          135  +**   - the position must contain a slash
          136  +**   - no more than 100 characters follow the slash
          137  +**   - no more than 155 characters precede it
          138  +**
          139  +** The routine 'find_split_pos' finds a split position. It will meet the
          140  +** criteria of listed above if such a position exists. If no such
          141  +** position exists it generates one that useful for generating the
          142  +** values used for backward compatibility.
          143  +*/
          144  +static int find_split_pos(
          145  +  const char *zName,     /* file path */
          146  +  int nName              /* path length */
          147  +){
          148  +  int i, split = 0;
          149  +  /* only search if the string needs splitting */
          150  +  if(nName > USTAR_NAME_LEN){
          151  +    for(i = 1; i+1 < nName; i++)
          152  +      if(zName[i] == '/'){
          153  +        split = i+1;
          154  +        /* if the split position is within USTAR_NAME_LEN bytes from
          155  +         * the end we can quit */
          156  +        if(nName - split <= USTAR_NAME_LEN) break;
          157  +      }
          158  +  }
          159  +  return split;
          160  +}
          161  +
          162  +
          163  +/*
          164  +** attempt to split the file name path to meet 'ustar' header
          165  +** criteria.
          166  +*/
          167  +static int tar_split_path(
          168  +  const char *zName,     /* path */
          169  +  int nName,             /* path length */
          170  +  char *pName,           /* name field */
          171  +  char *pPrefix          /* prefix field */
          172  +){
          173  +  int split = find_split_pos(zName, nName);
          174  +  /* check whether both pieces fit */
          175  +  if(nName - split > USTAR_NAME_LEN || split > USTAR_PREFIX_LEN+1){
          176  +    return 0; /* no */
          177  +  }
          178  +
          179  +  /* extract name */
          180  +  padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);
          181  +
          182  +  /* extract prefix */
          183  +  padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split - 1 : 0));
          184  +
          185  +  return 1; /* success */
          186  +}
          187  +
          188  +
          189  +/*
          190  +** When using an extension header we still need to put something
          191  +** reasonable in the name and prefix fields. This is probably as
          192  +** good as it gets.
          193  +*/
          194  +static void approximate_split_path(
          195  +  const char *zName,     /* path */
          196  +  int nName,             /* path length */
          197  +  char *pName,           /* name field */
          198  +  char *pPrefix,         /* prefix field */
          199  +  int bHeader            /* is this a 'x' type tar header? */
          200  +){
          201  +  int split;
          202  +
          203  +  /* if this is a Pax Interchange header prepend "PaxHeader/"
          204  +  ** so we can tell files apart from metadata */
          205  +  if( bHeader ){
          206  +    int n;
          207  +    blob_reset(&tball.pax);
          208  +    blob_appendf(&tball.pax, "PaxHeader/%*.*s", nName, nName, zName);
          209  +    zName = blob_buffer(&tball.pax);
          210  +    nName = blob_size(&tball.pax);
          211  +  }
          212  +
          213  +  /* find the split position */
          214  +  split = find_split_pos(zName, nName);
          215  +
          216  +  /* extract a name, truncate if needed */
          217  +  padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);
          218  +
          219  +  /* extract a prefix field, truncate when needed */
          220  +  padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split-1 : 0));
          221  +}
          222  +
          223  +
          224  +/*
          225  +** add a Pax Interchange header to the scratch buffer
          226  +**
          227  +** format: <length> <key>=<value>\n
          228  +** the tricky part is that each header contains its own
          229  +** size in decimal, counting that length.
          230  +*/
          231  +static void add_pax_header(
          232  +  const char *zField,
          233  +  const char *zValue,
          234  +  int nValue
          235  +){
          236  +  /* calculate length without length field */
          237  +  int blen = strlen(zField) + nValue + 3;
          238  +  /* calculate the length of the length field */
          239  +  int next10 = 1;
          240  +  int n;
          241  +  for(n = blen; n > 0; ){
          242  +    blen++; next10 *= 10;
          243  +    n /= 10;
          244  +  }
          245  +  /* adding the length extended the length field? */
          246  +  if(blen > next10){
          247  +    blen++;
          248  +  }
          249  +  /* build the string */
          250  +  blob_appendf(&tball.pax, "%d %s=%*.*s\n", blen, zField, nValue, nValue, zValue);
          251  +  /* this _must_ be right */
          252  +  if(blob_size(&tball.pax) != blen){
          253  +    fossil_fatal("internal error: PAX tar header has bad length");
          254  +  }
          255  +}
          256  +
          257  +
          258  +/*
          259  +** set the header type, calculate the checksum and output
          260  +** the header
          261  +*/
          262  +static void cksum_and_write_header(
          263  +  char cType
          264  +){
          265  +  unsigned int cksum = 0;
          266  +  int i;
          267  +  memset(&tball.aHdr[148], ' ', 8);
          268  +  tball.aHdr[156] = cType;
          269  +  for(i=0; i<512; i++) cksum += tball.aHdr[i];
          270  +  sqlite3_snprintf(8, (char*)&tball.aHdr[148], "%07o", cksum);
          271  +  tball.aHdr[155] = 0;
          272  +  gzip_step((char*)tball.aHdr, 512);
          273  +}
          274  +
    53    275   
    54    276   /*
    55    277   ** Build a header for a file or directory and write that header
    56    278   ** into the growing tarball.
    57    279   */
    58    280   static void tar_add_header(
    59    281     const char *zName,     /* Name of the object */
    60    282     int nName,             /* Number of characters in zName */
    61    283     int iMode,             /* Mode.  0644 or 0755 */
    62    284     unsigned int mTime,    /* File modification time */
    63    285     int iSize,             /* Size of the object in bytes */
    64         -  int iType              /* Type of object.  0==file.  5==directory */
          286  +  char cType             /* Type of object.  '0'==file.  '5'==directory */
    65    287   ){
    66         -  unsigned int cksum = 0;
    67         -  int i;
    68         -  if( nName>100 ){
    69         -    memcpy(&tball.aHdr[345], zName, nName-100);
    70         -    memcpy(tball.aHdr, &zName[nName-100], 100);
    71         -    memset(&tball.aHdr[245+nName], 0, 267-nName);
    72         -  }else{
    73         -    memcpy(tball.aHdr, zName, nName);
    74         -    memset(&tball.aHdr[nName], 0, 100-nName);
    75         -    memset(&tball.aHdr[345], 0, 167);
    76         -  }
          288  +  /* set mode and modification time */
    77    289     sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode);
    78         -  sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);
    79    290     sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime);
    80         -  memset(&tball.aHdr[148], ' ', 8);
    81         -  tball.aHdr[156] = iType + '0';
    82         -  for(i=0; i<512; i++) cksum += tball.aHdr[i];
    83         -  sqlite3_snprintf(7, (char*)&tball.aHdr[148], "%06o", cksum);
    84         -  tball.aHdr[154] = 0;
    85         -  gzip_step((char*)tball.aHdr, 512);
          291  +
          292  +  /* see if we need to output a Pax Interchange Header */
          293  +  if( !is_iso646_name(zName, nName) ||
          294  +            !tar_split_path(zName, nName, tball.aHdr, &tball.aHdr[345]) ){
          295  +    int lastPage;
          296  +    /* add a file name for interoperability with older programs */
          297  +    approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 1);
          298  +
          299  +    /* generate the Pax Interchange path header */
          300  +    blob_reset(&tball.pax);
          301  +    add_pax_header("path", zName, nName);
          302  +
          303  +    /* set the header length, and write the header */
          304  +    sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o",
          305  +                     blob_size(&tball.pax));
          306  +    cksum_and_write_header('x');
          307  +
          308  +    /* write the Pax Interchange data */
          309  +    gzip_step(blob_buffer(&tball.pax), blob_size(&tball.pax));
          310  +    lastPage = blob_size(&tball.pax) % 512;
          311  +    if( lastPage!=0 ){
          312  +      gzip_step(tball.zSpaces, 512 - lastPage);
          313  +    }
          314  +
          315  +    /* generate an approximate path for the regular header */
          316  +    approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 0);
          317  +  }
          318  +  /* set the size */
          319  +  sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);
          320  +
          321  +  /* write the regular header */
          322  +  cksum_and_write_header(cType);
    86    323   }
          324  +
    87    325   
    88    326   /*
    89    327   ** Recursively add an directory entry for the given file if those
    90    328   ** directories have not previously been seen.
    91    329   */
    92    330   static void tar_add_directory_of(
    93    331     const char *zName,      /* Name of directory including final "/" */
    94    332     int nName,              /* Characters in zName */
    95    333     unsigned int mTime      /* Modification time */
    96    334   ){
    97    335     int i;
    98    336     for(i=nName-1; i>0 && zName[i]!='/'; i--){}
    99    337     if( i<=0 ) return;
   100         -  if( tball.zPrevDir[i]==0 && memcmp(tball.zPrevDir, zName, i)==0 ) return;
          338  +  if( i < tball.nPrevDirAlloc && tball.zPrevDir[i]==0 &&
          339  +        memcmp(tball.zPrevDir, zName, i)==0 ) return;
   101    340     db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName);
   102    341     if( sqlite3_changes(g.db)==0 ) return;
   103    342     tar_add_directory_of(zName, i-1, mTime);
   104         -  tar_add_header(zName, i, 0755, mTime, 0, 5);
          343  +  tar_add_header(zName, i, 0755, mTime, 0, '5');
          344  +  if( i >= tball.nPrevDirAlloc ){
          345  +    int nsize = tball.nPrevDirAlloc * 2;
          346  +    if(i+1 > nsize)
          347  +      nsize = i+1;
          348  +    tball.zPrevDir = fossil_realloc(tball.zPrevDir, nsize);
          349  +    tball.nPrevDirAlloc = nsize;
          350  +  }
   105    351     memcpy(tball.zPrevDir, zName, i);
   106    352     tball.zPrevDir[i] = 0;
   107    353   }
          354  +
   108    355   
   109    356   /*
   110    357   ** Add a single file to the growing tarball.
   111    358   */
   112    359   static void tar_add_file(
   113    360     const char *zName,               /* Name of the file.  nul-terminated */
   114    361     Blob *pContent,                  /* Content of the file */
................................................................................
   115    362     int isExe,                       /* True for executable files */
   116    363     unsigned int mTime               /* Last modification time of the file */
   117    364   ){
   118    365     int nName = strlen(zName);
   119    366     int n = blob_size(pContent);
   120    367     int lastPage;
   121    368   
   122         -  if( nName>=250 ){
   123         -    fossil_fatal("name too long for ustar format: \"%s\"", zName);
   124         -  }
          369  +  /* length check moved to tar_split_path */
   125    370     tar_add_directory_of(zName, nName, mTime);
   126         -  tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, 0);
          371  +  tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, '0');
   127    372     if( n ){
   128    373       gzip_step(blob_buffer(pContent), n);
   129    374       lastPage = n % 512;
   130    375       if( lastPage!=0 ){
   131    376         gzip_step(tball.zSpaces, 512 - lastPage);
   132    377       }
   133    378     }
................................................................................
   140    385   static void tar_finish(Blob *pOut){
   141    386     db_multi_exec("DROP TABLE dir");
   142    387     gzip_step(tball.zSpaces, 512);
   143    388     gzip_step(tball.zSpaces, 512);
   144    389     gzip_finish(pOut);
   145    390     fossil_free(tball.aHdr);
   146    391     tball.aHdr = 0;
          392  +  fossil_free(tball.zPrevDir);
          393  +  tball.zPrevDir = NULL;
          394  +  tball.nPrevDirAlloc = 0;
          395  +  blob_reset(&tball.pax);
   147    396   }
   148    397   
   149    398   
   150    399   /*
   151    400   ** COMMAND: test-tarball
   152    401   **
   153    402   ** Generate a GZIP-compresssed tarball in the file given by the first argument