/* * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README */ #include <stdlib.h> #include <stdio.h> #include <string.h> #include <math.h> #include <sys/types.h> #include <unistd.h> #include <sys/stat.h> #include <fcntl.h> #include <errno.h> char tdir[256]; char path[256]; long stats = 0; void print_usage() { printf(" This program creates files in a tree of random depth and branching. Files vary in size randomly according to a distribution function which seems to model real file systems. This distribution function has a median size of median_file_size (Median file size is hypothesized to be proportional to the average per file space wastage. Notice how that implies that with a more efficient file system file size usage patterns will in the long term move to a lower median file size), and a maximum size of max_file_size. Directories vary in size according to the same distribution function but with separate parameters to control median and maximum size for the number of files within them, and the number of subdirectories within them. This program prunes some empty subdirectories in a way that causes parents of leaf directories to branch less than median_dir_branching. To avoid having one large file distort the results such that you have to benchmark many times set max_file_size to not more than bytes_to_consume/10. If maximum/median is a small integer, then randomness is very poor. This is a bug, Nikita, please find some clever way to fix it. If it is 0, then the program crashes.... For isolating performance consequences of design variations on particular file or directory size ranges, try setting their median size and max_size to both equal the max size of the file size range you want to test. To avoid having one large file distort the results set max_file_size to not more than bytes_to_consume/10. Using a distribution function for the sizes of writes would be a natural next step in developing this program.\n\n"); printf ("Usage: reiser_fract_tree bytes_to_consume median_file_size max_file_size median_dir_nr_files max_directory_nr_files median_dir_branching max_dir_branching write_buffer_size /testfs_mount_point print_stats_flag\n\n"); } /* #define DEBUG */ char *write_buffer; /* buffer from which we write */ int write_buffer_size = 0; /* gets reset to an argv */ static int already_whined = 0; /* keep out of disk space errors from being endless by tracking whether we already printed the message */ long bytes_to_consume = 0; /* create files until their total number of bytes exceeds this number, but not by more than 1/10th */ long byte_total = 0; /* bytes created so far */ /* statistics on sizes of files we attempted to create */ int fsz_0_100 = 0; int fsz_100_1k = 0; int fsz_1k_10k = 0; int fsz_10k_100k = 0; int fsz_100k_1m = 0; int fsz_1m_10m = 0; int fsz_10m_larger = 0; void chngdir(char *name) { int i; if (name[0] == '.' && name[1] == '.') { for (i = strlen(path); i > 0; i--) { if (path[i] == '/') { path[i] = 0; break; } } } else { strcat(path, "/"); strcat(path, name); } } /* this is the core statistical distribution function, and it is used for file sizes, directory sizes, etc. */ int determine_size(double median_size, double max_size /* The maximal value of size */ ) { /* when x is half of its random range (max_size/median_size), result is median_size */ int nr_random, granularity_reducer; double size, double_nr_random; /* it is a feature for us that this repeats identically every time it is run, as otherwise meaningless variances would affect our results and require us to use a higher number of benchmarks to achieve low noise results. */ nr_random = rand(); median_size++; /* avoids divide by zero errors */ /* this code does poorly when max_size is not a lot more than median size, and that needs fixing */ /* THE NEXT 2 LINES ARE THE HEART OF THE PROGRAM */ /* keep x below the value that when multiplied by median size on the next line will equal max_size */ /* the granularity_reducer is to handle the case where max_size is near median_size, since '%' can only take ints, we need this complicated what of handling that for small values of max_size/median_size by making large ints out of small ints temporarily. */ if (max_size / median_size < 1024) granularity_reducer = 1024 * 1024; else granularity_reducer = 1; nr_random = nr_random % ((int) (granularity_reducer * (((double)max_size) / ((double)median_size)))); double_nr_random = ((double)nr_random) / (granularity_reducer); size = median_size * (1 / (1 - (double_nr_random) / (((double)max_size) / ((double)median_size))) - 1); return ((int)size); } /* generate a unique filename */ void get_name_by_number(long this_files_number, char *str) { sprintf(str, "%lu", this_files_number); } /* make a file of a specified size */ void make_file(int size) { char string[128] = { 0 }; char *str = string; char fname[256]; int fd = 0; int error; static long this_files_number = 1; /* collect statistics about the size of files created, or more precisely, the size of files that we will attempt to create. */ if (size <= 100) fsz_0_100++; else if (size <= 1000) fsz_100_1k++; else if (size <= 10 * 1000) fsz_1k_10k++; else if (size <= 100 * 1000) fsz_10k_100k++; else if (size <= 1000 * 1000) fsz_100k_1m++; else if (size <= 10 * 1000 * 1000) fsz_1m_10m++; else fsz_10m_larger++; /* construct a name for the file */ get_name_by_number(this_files_number++, str); strcpy(fname, path); strcat(fname, "/"); strcat(fname, str); /* open the file, and deal with the various errors that can occur */ if ((fd = open(fname, O_CREAT | O_EXCL | O_RDWR, 0777)) == -1) { if (errno == ENOSPC) { if (!already_whined) { printf ("reiser-2021A: out of disk (or inodes) space, will keep trying\n"); already_whined = 1; /* we continue other file creation in out of space conditions */ } return; } /* it is sometimes useful to be able to run this program more than once inside the same directory, and that means skipping over filenames that already exist. Thus we ignore EEXIST, and pay attention to all else. */ if (errno == EEXIST) { /* just skip existing file */ return; } perror("open"); exit(errno); } /* write to the file until it is the right size, handling the various error conditions appropriately */ while (size > 0) { size -= (error = write(fd, write_buffer, (size < write_buffer_size - 1) ? size : (write_buffer_size - 1))); if (error == -1) { if (errno == ENOSPC) { if (!already_whined) { printf ("reiser-2022: out of disk space, will keep trying\n"); already_whined = 1; } close(fd); return; } perror("write() failed"); exit(errno); } } /* close the file */ if (close(fd)) { perror("close() failed"); exit(errno); } } /* print the statistics on how many files were created of what size */ void print_stats() { if (!stats) return; printf("\n"); printf("File stats: Units are decimal (1k = 1000)\n"); printf("files 0-100 : %i\n", fsz_0_100); printf("files 100-1K : %i\n", fsz_100_1k); printf("files 1K-10K : %i\n", fsz_1k_10k); printf("files 10K-100K : %i\n", fsz_10k_100k); printf("files 100K-1M : %i\n", fsz_100k_1m); printf("files 1M-10M : %i\n", fsz_1m_10m); printf("files 10M-larger : %i\n", fsz_10m_larger); printf("total bytes written : %lu\n", byte_total); } /* predict the number of files that will be created before max_bytes total length of files is reached */ long determine_nr_of_files(int median_file_size, double max_file_size, long bytes_to_consume) { long nr_of_files = 0, byte_total = 0; /* the next line is not necessary as 1 is the default, it is just cautious coding */ srand(1); while (byte_total < bytes_to_consume) { byte_total += determine_size(median_file_size, max_file_size); nr_of_files++; } /* reset the random number generator so that when we determine_size() of the files later they will be created with the same "random" sequence used in this calculation */ srand(1); #ifdef DEBUG printf("number of files is %d\n", (int)nr_of_files); #endif /* DEBUG */ fflush(NULL); return nr_of_files; } /* fill the current working directory with nr_files_this_directory number of files*/ void fill_this_directory(long nr_files_this_directory, long median_file_size, long maximum_size) { long size; #ifdef DEBUG printf("filling with %lu files, ", nr_files_this_directory); #endif while (nr_files_this_directory--) { size = determine_size(median_file_size, maximum_size); byte_total += size; make_file(size); } } /* this will unfortunately handle out of disk space by forever trying */ /* What we should do in out of space situaltion ? I think we must skip this directory and continue files/dirs creation process. Error value (!= 0) indicates that we can't go to this directory. -zam */ int make_directory(char *dirname) { static long this_directory_number = 0; strcpy(tdir, path); strcat(tdir, "/"); strcat(tdir, dirname); if (mkdir(tdir, 0755) == -1) { if (errno == ENOSPC) { if (!already_whined) { printf("reiser-2021: out of disk space, "); already_whined = 1; } return errno; } /* it is sometimes useful to be able to run this program more than once inside the same directory, and that means skipping over filenames that already exist. Thus we ignore EEXIST, and pay attention to all else. */ if (errno != EEXIST) { perror("mkdir"); exit(errno); } } sprintf(dirname, "d%lu", this_directory_number++); strcpy(tdir, path); strcat(tdir, "/"); strcat(tdir, dirname); return 0; } /* assumes we are already chdir'd into a directory that the subtree is rooted at. Fills the directory with files and subdirectories, cd's into those subdirectories, and recurses upon itself */ void do_subtree( /* the start and end of the portion of the directory sizes array which corresponds to the sizes of the directories composing this subtree */ /* sizes_end minus sizes_start is equal to the number of directories in this subtree */ long *sizes_start, long *sizes_end, long median_file_size, long maximum_file_size, long median_dir_branching, long max_dir_branching) { long *p; long *sub_start; long *sub_end; int index_subdirectory_to_add_directory_to; long *dirs_in_subtrees; char *subtree_name; long *sizes_index = sizes_start; char subtree_name_array[128]; long this_directory_branching; static long this_directorys_number; subtree_name = subtree_name_array; /* fill this directory with its number of files */ fill_this_directory(*sizes_index, median_file_size, maximum_file_size); sizes_index++; /* ok, now randomly assign directories (and their number of files) among the subdirectories that will be created if at least one directory is assigned to it */ /* this will cause the random number sequence to not match the one used in determine_nr_files() I need to accumulate my values in an array beforehand. I'll code that later. */ /* worry about whether 0 or 1 is a problem value */ this_directory_branching = determine_size(median_dir_branching, max_dir_branching) + 1; /* create an array holding the number of directories assigned to each potential subdirectory */ dirs_in_subtrees = calloc(this_directory_branching, sizeof(long)); while (sizes_index <= sizes_end) { index_subdirectory_to_add_directory_to = (rand() % this_directory_branching); (* (dirs_in_subtrees + index_subdirectory_to_add_directory_to))++; sizes_index++; } /* the +1 is for the fill_directory() we did above */ sizes_index = sizes_start + 1; /* go through each potential subdirectory, and if at least one directory has been assigned to it, create it and recurse */ for (p = dirs_in_subtrees; p < (dirs_in_subtrees + this_directory_branching); p++) { if (*p) { int nocd; sprintf(subtree_name, "d%lu", this_directorys_number++); nocd = make_directory(subtree_name); /* if make_dir.. may fails (in out of space situation), we continue creation process in same dir */ if (!nocd) chngdir(subtree_name); sub_start = sizes_index; /* the minus one is because *p is the number of elements and arrays start at 0 */ sub_end = (sizes_index + (*p - 1)); #ifdef DEBUG /* comment this back in if the array logic has you going cross-eyed */ /* printf ("sizes_start is %p, sizes_index is %p, sizes_index+p is %p, sizes_end is %p\n", sizes_start, sub_start, sub_end, sizes_end); */ #endif do_subtree(sub_start, sub_end, median_file_size, maximum_file_size, median_dir_branching, max_dir_branching); if (!nocd) chngdir(".."); } sizes_index += *p; } } /* We have already determined that nr_files can fit in bytes_to_consume space. Fill the sizes array with the number of files to be in each directory, and then call do_subtree to fill the tree with files and directories. */ void make_fractal_tree(long median_file_size, long maximum_file_size, long median_dir_nr_files, long max_dir_nr_files, long median_dir_branching, long max_dir_branching, long nr_files) { long *sizes_start; long *sizes_end; long *sizes_index; long remaining_files = nr_files; /* collect together array of directory sizes for whole filesystem. This cannot easily be done recursively without distorting the directory sizes and making deeper directories smaller. Send me the code if you disagree.:-) */ /* we almost certainly don't need this much space, but so what.... */ sizes_index = sizes_start = malloc(nr_files * sizeof(long)); for (; remaining_files > 0;) { *sizes_index = determine_size(median_dir_nr_files, max_dir_nr_files); // we alloc space for nr_files, so we should avoid // number of files in directory = 0 -grev. if (*sizes_index == 0) *sizes_index = 1; *sizes_index = (*sizes_index < remaining_files) ? *sizes_index : remaining_files; #ifdef DEBUG printf("*sizes_index == %lu, ", *sizes_index); #endif remaining_files -= *sizes_index; sizes_index++; } /* don't decrement below sizes_start if nr_files is 0 */ sizes_end = (sizes_index-- > sizes_start) ? sizes_index : sizes_start; sizes_index = sizes_start; srand(1); do_subtree(sizes_start, sizes_end, median_file_size, maximum_file_size, median_dir_branching, max_dir_branching); } int main(int argc, char *argv[]) { /* initialized from argv[] */ long median_file_size, median_dir_branching, median_dir_nr_files, max_dir_nr_files, max_dir_branching, max_file_size; long nr_of_files = 0; /* files to be created */ if (argc != 11) { print_usage(); exit(1); } write_buffer_size = atoi(argv[8]); write_buffer = malloc(write_buffer_size); memset(write_buffer, 'a', write_buffer_size); /* the number of bytes that we desire this tree to consume. It will actually consume more, because the last file will overshoot by a random amount, and because the directories and metadata will consume space. */ bytes_to_consume = atol(argv[1]); max_file_size = atol(argv[3]); median_file_size = atol(argv[2]); /* Figure out how many random files will fit into bytes_to_consume bytes. We depend on resetting rand() to get the same result later. */ nr_of_files = determine_nr_of_files(median_file_size, max_file_size, bytes_to_consume); strcpy(path, argv[9]); mkdir(path, 0755); stats = atol(argv[10]); median_dir_branching = atol(argv[6]); max_dir_branching = atol(argv[7]); median_dir_nr_files = atol(argv[4]); max_dir_nr_files = atol(argv[5]); make_fractal_tree(median_file_size, max_file_size, median_dir_nr_files, max_dir_nr_files, median_dir_branching, max_dir_branching, nr_of_files); print_stats(); if (stats) printf("\nreiser_fract_tree finished\n"); return 0; }