Fossil

Changes On Branch reconstruct-sha3
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch reconstruct-sha3 Excluding Merge-Ins

This is equivalent to a diff from 5280c1ab to 8d1ed47c

2019-02-05
15:43
Enhance the "reconstruct" command so that sets the correct hash policy for artifacts read from disk. ... (check-in: 93bb3231 user: drh tags: trunk)
2019-01-29
14:29
Add a test command to infer the hash policy from the length of path names on reconstruct (disabled by preprocessor directive). ... (Closed-Leaf check-in: 8d1ed47c user: florian tags: reconstruct-sha3)
14:09
Calculate hash lengths with skipped directory slashes. ... (check-in: c47adb91 user: florian tags: reconstruct-sha3)
2019-01-28
17:54
Expanded the section on --with-openssl=none in www/build.wiki to explain why adding that option is a bad idea, what to do instead, and to point to the newly expanded OpenSSL discussion in www/ssl.wiki for more information. ... (check-in: 4f810279 user: wyoung tags: trunk)
10:12
Enhance the 'reconstruct' command to set the correct hash policy (SHA1 or SHA3-256) for artifacts read from disk, inferred from the length of the path name. Also enhance the 'deconstruct' and 'reconstruct' commands with an option to ensure the artifact with RID=1 is a valid manifest. See the wiki page linked to this branch for more information and tests. ... (check-in: 62a00bc7 user: florian tags: reconstruct-sha3)
2019-01-27
19:32
Update the built-in SQLite to the latest 3.27.0 alpha. Updates to the change log. ... (check-in: 5280c1ab user: drh tags: trunk)
19:19
Change the "reparent" command so that it only works within an open checkout. Documentation improvements, especially add documentation to about the FOSSIL_SECURITY_LEVEL environment variable. ... (check-in: d168be0c user: drh tags: trunk)

Changes to src/rebuild.c.

180
181
182
183
184
185
186


187

188
189
190
191
192
193
194
*/
static int totalSize;       /* Total number of artifacts to process */
static int processCnt;      /* Number processed so far */
static int ttyOutput;       /* Do progress output */
static Bag bagDone;         /* Bag of records rebuilt */

static char *zFNameFormat;  /* Format string for filenames on deconstruct */


static int prefixLength;    /* Length of directory prefix for deconstruct */



/*
** Draw the percent-complete message.
** The input is actually the permill complete.
*/
static void percent_complete(int permill){







>
>

>







180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
*/
static int totalSize;       /* Total number of artifacts to process */
static int processCnt;      /* Number processed so far */
static int ttyOutput;       /* Do progress output */
static Bag bagDone;         /* Bag of records rebuilt */

static char *zFNameFormat;  /* Format string for filenames on deconstruct */
static int cchFNamePrefix;  /* Length of directory prefix in zFNameFormat */
static char *zDestDir;      /* Destination directory on deconstruct */
static int prefixLength;    /* Length of directory prefix for deconstruct */
static int fKeepRid1;       /* Flag to preserve RID=1 on de- and reconstruct */


/*
** Draw the percent-complete message.
** The input is actually the permill complete.
*/
static void percent_complete(int permill){
272
273
274
275
276
277
278











279
280
281
282
283
284
285
      manifest_crosslink(rid, pUse, MC_NONE);
    }else{
      /* We are doing "fossil deconstruct" */
      char *zUuid = db_text(0, "SELECT uuid FROM blob WHERE rid=%d", rid);
      char *zFile = mprintf(zFNameFormat /*works-like:"%s:%s"*/,
                            zUuid, zUuid+prefixLength);
      blob_write_to_file(pUse,zFile);











      free(zFile);
      free(zUuid);
      blob_reset(pUse);
    }
    assert( blob_is_reset(pUse) );
    rebuild_step_done(rid);








>
>
>
>
>
>
>
>
>
>
>







275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
      manifest_crosslink(rid, pUse, MC_NONE);
    }else{
      /* We are doing "fossil deconstruct" */
      char *zUuid = db_text(0, "SELECT uuid FROM blob WHERE rid=%d", rid);
      char *zFile = mprintf(zFNameFormat /*works-like:"%s:%s"*/,
                            zUuid, zUuid+prefixLength);
      blob_write_to_file(pUse,zFile);
      if( rid==1 && fKeepRid1!=0 ){
        char *zFnDotRid1 = mprintf("%s/.rid1", zDestDir);
        char *zFnRid1 = zFile + cchFNamePrefix + 1; /* Skip directory slash */
        Blob bFileContents = empty_blob;
        blob_appendf(&bFileContents,
          "# The file holding the artifact with RID=1\n"
          "%s\n", zFnRid1);
        blob_write_to_file(&bFileContents, zFnDotRid1);
        blob_reset(&bFileContents);
        free(zFnDotRid1);
      }
      free(zFile);
      free(zUuid);
      blob_reset(pUse);
    }
    assert( blob_is_reset(pUse) );
    rebuild_step_done(rid);

929
930
931
932
933
934
935



936




































937
938
939
940
941
942
943
void recon_read_dir(char *zPath){
  DIR *d;
  struct dirent *pEntry;
  Blob aContent; /* content of the just read artifact */
  static int nFileRead = 0;
  void *zUnicodePath;
  char *zUtf8Name;








































  zUnicodePath = fossil_utf8_to_path(zPath, 1);
  d = opendir(zUnicodePath);
  if( d ){
    while( (pEntry=readdir(d))!=0 ){
      Blob path;
      char *zSubpath;








>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
void recon_read_dir(char *zPath){
  DIR *d;
  struct dirent *pEntry;
  Blob aContent; /* content of the just read artifact */
  static int nFileRead = 0;
  void *zUnicodePath;
  char *zUtf8Name;
  static int recursionLevel = 0;  /* Bookkeeping about the recursion level */
  static char *zFnRid1 = 0;       /* The file holding the artifact with RID=1 */
  static int cchPathInitial = 0;  /* The length of zPath on first recursion */

  recursionLevel++;
  if( recursionLevel==1 ){
    cchPathInitial = strlen(zPath);
    if( fKeepRid1!=0 ){
      char *zFnDotRid1 = mprintf("%s/.rid1", zPath);
      Blob bFileContents;
      if( blob_read_from_file(&bFileContents, zFnDotRid1, ExtFILE)!=-1 ){
        Blob line, value;
        while( blob_line(&bFileContents, &line)>0 ){
          if( blob_token(&line, &value)==0 ) continue;  /* Empty line */
          if( blob_buffer(&value)[0]=='#' ) continue;   /* Comment */
          blob_trim(&value);
          zFnRid1 = mprintf("%s/%s", zPath, blob_str(&value));
          break;
        }
        blob_reset(&bFileContents);
        if( zFnRid1 ){
          if( blob_read_from_file(&aContent, zFnRid1, ExtFILE)==-1 ){
            fossil_fatal("some unknown error occurred while reading \"%s\"",
                         zFnRid1);
          }else{
            recon_set_hash_policy(0, zFnRid1);
            content_put(&aContent);
            recon_restore_hash_policy();
            blob_reset(&aContent);
            fossil_print("\r%d", ++nFileRead);
            fflush(stdout);
          }
        }else{
          fossil_fatal("an error occurred while reading or parsing \"%s\"",
                       zFnDotRid1);
        }
      }
      free(zFnDotRid1);
    }
  }
  zUnicodePath = fossil_utf8_to_path(zPath, 1);
  d = opendir(zUnicodePath);
  if( d ){
    while( (pEntry=readdir(d))!=0 ){
      Blob path;
      char *zSubpath;

951
952
953
954
955
956
957
958
959
960
961
962
963
964

965

966
967
968
969
970
971
972
973
974
975
976
977
978


979





































































































980
981
982
983
984
985
986
987
988
989
990




991
992
993
994

995
996
997
998
999
1000
1001
      if( (pEntry->d_type==DT_UNKNOWN || pEntry->d_type==DT_LNK)
          ? (file_isdir(zSubpath, ExtFILE)==1) : (pEntry->d_type==DT_DIR) )
#else
      if( file_isdir(zSubpath, ExtFILE)==1 )
#endif
      {
        recon_read_dir(zSubpath);
      }else{
        blob_init(&path, 0, 0);
        blob_appendf(&path, "%s", zSubpath);
        if( blob_read_from_file(&aContent, blob_str(&path), ExtFILE)==-1 ){
          fossil_fatal("some unknown error occurred while reading \"%s\"",
                       blob_str(&path));
        }

        content_put(&aContent);

        blob_reset(&path);
        blob_reset(&aContent);
        fossil_print("\r%d", ++nFileRead);
        fflush(stdout);
      }
      free(zSubpath);
    }
    closedir(d);
  }else {
    fossil_fatal("encountered error %d while trying to open \"%s\".",
                  errno, g.argv[3]);
  }
  fossil_path_free(zUnicodePath);


}






































































































/*
** COMMAND: reconstruct*
**
** Usage: %fossil reconstruct FILENAME DIRECTORY
**
** This command studies the artifacts (files) in DIRECTORY and
** reconstructs the fossil record from them. It places the new
** fossil repository in FILENAME. Subdirectories are read, files
** with leading '.' in the filename are ignored.
**




** See also: deconstruct, rebuild
*/
void reconstruct_cmd(void) {
  char *zPassword;

  if( g.argc!=4 ){
    usage("FILENAME DIRECTORY");
  }
  if( file_isdir(g.argv[3], ExtFILE)!=1 ){
    fossil_print("\"%s\" is not a directory\n\n", g.argv[3]);
    usage("FILENAME DIRECTORY");
  }







|






>

>













>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




|






>
>
>
>




>







1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
      if( (pEntry->d_type==DT_UNKNOWN || pEntry->d_type==DT_LNK)
          ? (file_isdir(zSubpath, ExtFILE)==1) : (pEntry->d_type==DT_DIR) )
#else
      if( file_isdir(zSubpath, ExtFILE)==1 )
#endif
      {
        recon_read_dir(zSubpath);
      }else if( fossil_strcmp(zSubpath, zFnRid1)!=0 ){
        blob_init(&path, 0, 0);
        blob_appendf(&path, "%s", zSubpath);
        if( blob_read_from_file(&aContent, blob_str(&path), ExtFILE)==-1 ){
          fossil_fatal("some unknown error occurred while reading \"%s\"",
                       blob_str(&path));
        }
        recon_set_hash_policy(cchPathInitial, blob_str(&path));
        content_put(&aContent);
        recon_restore_hash_policy();
        blob_reset(&path);
        blob_reset(&aContent);
        fossil_print("\r%d", ++nFileRead);
        fflush(stdout);
      }
      free(zSubpath);
    }
    closedir(d);
  }else {
    fossil_fatal("encountered error %d while trying to open \"%s\".",
                  errno, g.argv[3]);
  }
  fossil_path_free(zUnicodePath);
  if( recursionLevel==1 && zFnRid1!=0 ) free(zFnRid1);
  recursionLevel--;
}

/*
** Helper functions called from recon_read_dir() to set and restore the correct
** hash policy for an artifact read from disk, inferred from the length of the
** path name.
*/
static int saved_eHashPolicy = -1;

void recon_set_hash_policy(
  const int cchPathPrefix,    /* Directory prefix length for zUuidAsFilePath */
  const char *zUuidAsFilePath /* Relative, well-formed, from recon_read_dir() */
){
  int cchUuidAsFilePath;
  const char *zHashPart;
  int cchHashPart = 0;
  int new_eHashPolicy = -1;
  assert( HNAME_COUNT==2 ); /* Review function if new hashes are implemented. */
  if( zUuidAsFilePath==0 ) return;
  cchUuidAsFilePath = strlen(zUuidAsFilePath);
  if( cchUuidAsFilePath==0 ) return;
  if( cchPathPrefix>=cchUuidAsFilePath ) return;
  for( zHashPart = zUuidAsFilePath + cchPathPrefix; *zHashPart; zHashPart++ ){
    if( *zHashPart!='/' ) cchHashPart++;
  }
  if( cchHashPart>=HNAME_LEN_K256 ){
    new_eHashPolicy = HPOLICY_SHA3;
  }else if( cchHashPart>=HNAME_LEN_SHA1 ){
    new_eHashPolicy = HPOLICY_SHA1;
  }
  if( new_eHashPolicy!=-1 ){
    saved_eHashPolicy = g.eHashPolicy;
    g.eHashPolicy = new_eHashPolicy;
  }
}

void recon_restore_hash_policy(){
  if( saved_eHashPolicy!=-1 ){
    g.eHashPolicy = saved_eHashPolicy;
    saved_eHashPolicy = -1;
  }
}

#if 0
/*
** COMMAND: test-hash-from-path*
**
** Usage: %fossil test-hash-from-path ?OPTIONS? DESTINATION UUID
**
** Generate a sample path name from DESTINATION and UUID, as the `deconstruct'
** command would do.  Then try to guess the hash policy from the path name, as
** the `reconstruct' command would do.
**
** No files or directories will be created.
**
** Options:
**   -L|--prefixlength N     Set the length of the names of the DESTINATION
**                           subdirectories to N.
*/
void test_hash_from_path_cmd(void) {
  char *zDest;
  char *zUuid;
  char *zFile;
  const char *zHashPolicy = "unknown";
  const char *zPrefixOpt = find_option("prefixlength","L",1);
  int iPrefixLength;
  if( !zPrefixOpt ){
    iPrefixLength = 2;
  }else{
    iPrefixLength = atoi(zPrefixOpt);
    if( iPrefixLength<0 || iPrefixLength>9 ){
      fossil_fatal("N(%s) is not a valid prefix length!",zPrefixOpt);
    }
  }
  if( g.argc!=4 ){
    usage ("?OPTIONS? DESTINATION UUID");
  }
  zDest = g.argv[2];
  zUuid = g.argv[3];
  if( iPrefixLength ){
    zFNameFormat = mprintf("%s/%%.%ds/%%s",zDest,iPrefixLength);
  }else{
    zFNameFormat = mprintf("%s/%%s",zDest);
  }
  cchFNamePrefix = strlen(zDest);
  zFile = mprintf(zFNameFormat /*works-like:"%s:%s"*/,
                  zUuid, zUuid+iPrefixLength);
  recon_set_hash_policy(cchFNamePrefix,zFile);
  if( saved_eHashPolicy!=-1 ){
    zHashPolicy = hpolicy_name();
  }
  recon_restore_hash_policy();
  fossil_print(
    "\nPath Name:   %s"
    "\nHash Policy: %s\n",
    zFile,zHashPolicy);
  free(zFile);
  free(zFNameFormat);
  zFNameFormat = 0;
  cchFNamePrefix = 0;
}
#endif

/*
** COMMAND: reconstruct*
**
** Usage: %fossil reconstruct ?OPTIONS? FILENAME DIRECTORY
**
** This command studies the artifacts (files) in DIRECTORY and
** reconstructs the fossil record from them. It places the new
** fossil repository in FILENAME. Subdirectories are read, files
** with leading '.' in the filename are ignored.
**
** Options:
**    -K|--keep-rid1    Read the filename of the artifact with
**                      RID=1 from the file .rid in DIRECTORY.
**
** See also: deconstruct, rebuild
*/
void reconstruct_cmd(void) {
  char *zPassword;
  fKeepRid1 = find_option("keep-rid1","K",0)!=0;
  if( g.argc!=4 ){
    usage("FILENAME DIRECTORY");
  }
  if( file_isdir(g.argv[3], ExtFILE)!=1 ){
    fossil_print("\"%s\" is not a directory\n\n", g.argv[3]);
    usage("FILENAME DIRECTORY");
  }
1039
1040
1041
1042
1043
1044
1045
1046


1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058

1059
1060
1061
1062
1063
1064
1065
** writes all artifacts to the file system. The DESTINATION directory
** will be populated with subdirectories AA and files AA/BBBBBBBBB.., where
** AABBBBBBBBB.. is the 40+ character artifact ID, AA the first 2 characters.
** If -L|--prefixlength is given, the length (default 2) of the directory
** prefix can be set to 0,1,..,9 characters.
**
** Options:
**   -R|--repository REPOSITORY  deconstruct given REPOSITORY


**   -L|--prefixlength N         set the length of the names of the DESTINATION
**                               subdirectories to N
**   --private                   Include private artifacts.
**
** See also: rebuild, reconstruct
*/
void deconstruct_cmd(void){
  const char *zDestDir;
  const char *zPrefixOpt;
  Stmt        s;
  int privateFlag;


  /* get and check prefix length argument and build format string */
  zPrefixOpt=find_option("prefixlength","L",1);
  if( !zPrefixOpt ){
    prefixLength = 2;
  }else{
    if( zPrefixOpt[0]>='0' && zPrefixOpt[0]<='9' && !zPrefixOpt[1] ){
      prefixLength = (int)(*zPrefixOpt-'0');







|
>
>
|
|





<




>







1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218

1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
** writes all artifacts to the file system. The DESTINATION directory
** will be populated with subdirectories AA and files AA/BBBBBBBBB.., where
** AABBBBBBBBB.. is the 40+ character artifact ID, AA the first 2 characters.
** If -L|--prefixlength is given, the length (default 2) of the directory
** prefix can be set to 0,1,..,9 characters.
**
** Options:
**   -R|--repository REPOSITORY  Deconstruct given REPOSITORY.
**   -K|--keep-rid1              Save the filename of the artifact with RID=1 to
**                               the file .rid1 in the DESTINATION directory.
**   -L|--prefixlength N         Set the length of the names of the DESTINATION
**                               subdirectories to N.
**   --private                   Include private artifacts.
**
** See also: rebuild, reconstruct
*/
void deconstruct_cmd(void){

  const char *zPrefixOpt;
  Stmt        s;
  int privateFlag;

  fKeepRid1 = find_option("keep-rid1","K",0)!=0;
  /* get and check prefix length argument and build format string */
  zPrefixOpt=find_option("prefixlength","L",1);
  if( !zPrefixOpt ){
    prefixLength = 2;
  }else{
    if( zPrefixOpt[0]>='0' && zPrefixOpt[0]<='9' && !zPrefixOpt[1] ){
      prefixLength = (int)(*zPrefixOpt-'0');
1090
1091
1092
1093
1094
1095
1096

1097
1098
1099
1100
1101
1102
1103
  */
#endif
  if( prefixLength ){
    zFNameFormat = mprintf("%s/%%.%ds/%%s",zDestDir,prefixLength);
  }else{
    zFNameFormat = mprintf("%s/%%s",zDestDir);
  }


  bag_init(&bagDone);
  ttyOutput = 1;
  processCnt = 0;
  if (!g.fQuiet) {
    fossil_print("0 (0%%)...\r");
    fflush(stdout);







>







1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
  */
#endif
  if( prefixLength ){
    zFNameFormat = mprintf("%s/%%.%ds/%%s",zDestDir,prefixLength);
  }else{
    zFNameFormat = mprintf("%s/%%s",zDestDir);
  }
  cchFNamePrefix = strlen(zDestDir);

  bag_init(&bagDone);
  ttyOutput = 1;
  processCnt = 0;
  if (!g.fQuiet) {
    fossil_print("0 (0%%)...\r");
    fflush(stdout);