From 1b1c9d2bb60555ecdd65c21a889b0deb56ba8221 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Sun, 21 Jun 2020 14:48:43 +0530 Subject: [PATCH 1/6] commit-graph: introduce metadata chunk, generation data chunk --- commit-graph.c | 337 +++++++++++++++++++++++++++++++++++++++++++++++-- commit-graph.h | 6 +- commit.h | 2 + 3 files changed, 334 insertions(+), 11 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index fdd1c4fa7c5451..4c38a42349c1a5 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -42,7 +42,30 @@ void git_test_write_commit_graph_or_die(void) #define GRAPH_CHUNKID_BLOOMINDEXES 0x42494458 /* "BIDX" */ #define GRAPH_CHUNKID_BLOOMDATA 0x42444154 /* "BDAT" */ #define GRAPH_CHUNKID_BASE 0x42415345 /* "BASE" */ -#define MAX_NUM_CHUNKS 7 +#define GRAPH_CHUNKID_METADATA 0x4d455441 /* "META" */ +#define GRAPH_CHUNKID_GENERATION_DATA 0x47444154 /* "GDAT" */ +#define MAX_NUM_CHUNKS 8 + + +/* + * V0 - Topological level (0, 0, 0, 0, 0) + * + * Metadata Chunk: + * V1 - Corrected Commit Date Offsets in CDAT (1, 0, 0, 1, X) + * + * Generation Data Chunk: + * V2 - Corrected Commit Date Offsets in CDAT, Corrected Date With + * Monotonic Offsets in GDAT (0, 1, 0, 1, X) + * V3 - Topological Levels in CDAT, Corrected Commit Date in GDAT (0, 1, 1, 0, 1) + * V4 - GENERATION_NUMBERS_MAX in CDAT, Corrected Commit Date in GDAT (0, 1, 1, 0, 0) + */ + +#define GRAPH_METADATA_CHUNK_ENABLED 0 + +#define GRAPH_GENERATION_DATA_CHUNK_ENABLED 1 +#define GENERATION_NUMBER_V3 1 +#define GENERATION_NUMBER_V5 0 +#define GENERATION_COMPUTE_TOPOLOGICAL_LEVEL 1 #define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16) @@ -99,7 +122,7 @@ uint32_t commit_graph_position(const struct commit *c) return data ? data->graph_pos : COMMIT_NOT_FROM_GRAPH; } -uint32_t commit_graph_generation(const struct commit *c) +timestamp_t commit_graph_generation(const struct commit *c) { struct commit_graph_data *data = commit_graph_data_slab_peek(&commit_graph_data_slab, c); @@ -372,6 +395,20 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size) graph->chunk_commit_data = data + chunk_offset; break; + case GRAPH_CHUNKID_METADATA: + if (graph->chunk_metadata) + chunk_repeated = 1; + else + graph->chunk_metadata = data + chunk_offset; + break; + + case GRAPH_CHUNKID_GENERATION_DATA: + if (graph->chunk_generation_data) + chunk_repeated = 1; + else + graph->chunk_generation_data = data + chunk_offset; + break; + case GRAPH_CHUNKID_EXTRAEDGES: if (graph->chunk_extra_edges) chunk_repeated = 1; @@ -410,6 +447,7 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size) graph->bloom_filter_settings->bits_per_entry = get_be32(data + chunk_offset + 8); } break; + } if (chunk_repeated) { @@ -733,16 +771,42 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, const unsigned char *commit_data; struct commit_graph_data *graph_data; uint32_t lex_index; + timestamp_t generation; while (pos < g->num_commits_in_base) g = g->base_graph; lex_index = pos - g->num_commits_in_base; - commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * lex_index; graph_data = commit_graph_data_at(item); graph_data->graph_pos = pos; - graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2; + + if (g->chunk_generation_data) + /* + * Load corrected commit date (or corrected commit date + * with monotonic offset) from GDAT + */ + generation = get_be64(g->chunk_generation_data + sizeof(timestamp_t) * lex_index); + else if (g->chunk_metadata) + { + /* Load corrected commit date offset from CDAT and add to commit date */ + uint64_t offset, date_low, date_high; + + commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * lex_index; + offset = get_be32(commit_data + g->hash_len + 8) >> 2; + date_high = get_be32(commit_data + g->hash_len + 8) & 0x3; + date_low = get_be32(commit_data + g->hash_len + 12); + + generation = offset + (date_high << 32 | date_low); + } + else + { + /* Load topological level from CDAT */ + commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * lex_index; + generation = get_be32(commit_data + g->hash_len + 8) >> 2; + } + + graph_data->generation = generation; } static inline void set_commit_tree(struct commit *c, struct tree *t) @@ -760,7 +824,8 @@ static int fill_commit_in_graph(struct repository *r, struct commit_list **pptr; struct commit_graph_data *graph_data; const unsigned char *commit_data; - uint32_t lex_index; + uint32_t lex_index, offset; + timestamp_t generation; while (pos < g->num_commits_in_base) g = g->base_graph; @@ -786,7 +851,23 @@ static int fill_commit_in_graph(struct repository *r, date_low = get_be32(commit_data + g->hash_len + 12); item->date = (timestamp_t)((date_high << 32) | date_low); - graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2; + if (g->chunk_generation_data) + /* + * Load corrected commit date (or corrected commit date + * with monotonic offset) from GDAT + */ + generation = get_be64(g->chunk_generation_data + sizeof(timestamp_t) * lex_index); + else if (g->chunk_metadata) + /* Load corrected commit date offset from CDAT and add to commit date */ + { + offset = get_be32(commit_data + g->hash_len + 8) >> 2; + generation = offset + (date_high << 2 | date_low); + } + else + /* Load topological level from CDAT */ + generation = get_be32(commit_data + g->hash_len + 8) >> 2; + + graph_data->generation = generation; pptr = &item->parents; @@ -1081,7 +1162,24 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, else packedDate[0] = 0; - packedDate[0] |= htonl(commit_graph_data_at(*list)->generation << 2); + if (GRAPH_METADATA_CHUNK_ENABLED) + /* Copy corrected date offsets into CDAT */ + packedDate[0] |= htonl(commit_graph_data_at(*list)->generation << 2); + if (GRAPH_GENERATION_DATA_CHUNK_ENABLED) { + if (GENERATION_NUMBER_V5) + /* Copy corrected date offset into CDAT when using V5 */ + packedDate[0] |= htonl(commit_graph_data_at(*list)->generation << 2); + else if (GENERATION_NUMBER_V3) + { + if (GENERATION_COMPUTE_TOPOLOGICAL_LEVEL) + /* Copy topological levels if calculated when using V3 */ + packedDate[0] |= htonl(commit_graph_data_at(*list)->graph_pos << 2); + else + packedDate[0] |= htonl(GENERATION_NUMBER_INFINITY << 2); + } + } + else + packedDate[0] |= htonl(commit_graph_data_at(*list)->generation << 2); packedDate[1] = htonl((*list)->date); hashwrite(f, packedDate, 8); @@ -1090,6 +1188,34 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, } } +static void write_graph_chunk_metadata(struct write_commit_graph_context *ctx) +{ + ctx->progress_cnt += ctx->commits.nr; + display_progress(ctx->progress, ctx->progress_cnt); +} + +static void write_graph_chunk_generation_data(struct hashfile *f, + struct write_commit_graph_context *ctx) +{ + struct commit **list = ctx->commits.list; + struct commit **last = ctx->commits.list + ctx->commits.nr; + + while (list < last) { + timestamp_t cdate; + uint32_t packedDate[2]; + + /* Add corrected date offset to commit date */ + cdate = (*list)->date + commit_graph_data_at(*list)->generation; + + packedDate[0] = htonl(cdate >> 32); + packedDate[1] = htonl(cdate); + + display_progress(ctx->progress, ++ctx->progress_cnt); + hashwrite(f, packedDate, sizeof(timestamp_t)); + list++; + } +} + static void write_graph_chunk_extra_edges(struct hashfile *f, struct write_commit_graph_context *ctx) { @@ -1302,11 +1428,171 @@ static void close_reachable(struct write_commit_graph_context *ctx) stop_progress(&ctx->progress); } -static void compute_generation_numbers(struct write_commit_graph_context *ctx) +/* + * Store the corrected commit date in commit_graph_data->generation. + * + * If GENERATION_COMPUTE_TOPOLOGICAL_LEVEL is set, calculate and store + * topological levels in commit_graph_data->graph_pos + */ +static void compute_corrected_commit_dates(struct write_commit_graph_context *ctx) +{ + int i; + struct commit_list *list = NULL; + + if (ctx->report_progress) + ctx->progress = start_delayed_progress( + _("Computing commit graph generation numbers"), + ctx->commits.nr); + + for (i = 0; i < ctx->commits.nr; i++) { + struct commit_graph_data *data; + uint32_t offset, level, max_level = 0; + + data = commit_graph_data_at(ctx->commits.list[i]); + offset = data->generation; + + display_progress(ctx->progress, i + 1); + + if (offset == GENERATION_NUMBER_INFINITY || + offset == GENERATION_NUMBER_ZERO) + continue; + + commit_list_insert(ctx->commits.list[i], &list); + while (list) { + struct commit *current = list->item; + struct commit_list *parent; + int all_parents_computed = 1; + timestamp_t max_timestamp = current->date; + + for (parent = current->parents; parent; parent = parent->next) { + timestamp_t parent_timestamp; + data = commit_graph_data_at(parent->item); + offset = data->generation; + + if (offset != GENERATION_NUMBER_INFINITY && + offset != GENERATION_NUMBER_ZERO) { + all_parents_computed = 0; + commit_list_insert(parent->item, &list); + break; + } + + parent_timestamp = parent->item->date + offset; + + if (max_timestamp < parent_timestamp) + max_timestamp = parent_timestamp + 1; + + if (GENERATION_COMPUTE_TOPOLOGICAL_LEVEL) { + level = data->graph_pos; + + if (max_level < level) { + max_level = level; + } + } + } + + if (all_parents_computed) { + data = commit_graph_data_at(current); + + data->generation = (max_timestamp - current->date) + 1; + + if (GENERATION_COMPUTE_TOPOLOGICAL_LEVEL) { + data->graph_pos = max_level + 1; + } + + pop_commit(&list); + + if (data->generation > GENERATION_NUMBER_MAX) + data->generation = GENERATION_NUMBER_MAX; + + if (GENERATION_COMPUTE_TOPOLOGICAL_LEVEL) { + if (data->graph_pos > GENERATION_NUMBER_MAX) + data->graph_pos = GENERATION_NUMBER_MAX; + } + } + } + } + + stop_progress(&ctx->progress); +} + +/* + * Store the corrected date offset in commit_graph_data->generation. + * + * For both metadata and generation data chunk approaches, write the + * corrected date offset into CDAT. + * + * For generation data chunk, write out corrected date (c->date + c->generation) + * into GDAT as well. + */ +static void compute_corrected_commit_date_offsets(struct write_commit_graph_context *ctx) { int i; struct commit_list *list = NULL; + if (ctx->report_progress) + ctx->progress = start_delayed_progress( + _("Computing commit graph generation numbers"), + ctx->commits.nr); + for (i = 0; i < ctx->commits.nr; i++) { + uint32_t offset = commit_graph_data_at(ctx->commits.list[i])->generation; + + display_progress(ctx->progress, i + 1); + if (offset != GENERATION_NUMBER_INFINITY && + offset != GENERATION_NUMBER_ZERO) + continue; + + commit_list_insert(ctx->commits.list[i], &list); + while (list) { + struct commit *current = list->item; + struct commit_list *parent; + int all_parents_computed = 1; + uint32_t max_offset = 0; + timestamp_t max_timestamp = current->date; + + for (parent = current->parents; parent; parent = parent->next) { + offset = commit_graph_data_at(parent->item)->generation; + + if (offset == GENERATION_NUMBER_INFINITY || + offset == GENERATION_NUMBER_ZERO) { + all_parents_computed = 0; + commit_list_insert(parent->item, &list); + break; + } else { + timestamp_t timestamp = parent->item->date + offset; + + if (offset > max_offset) + max_offset = offset; + + if (timestamp > max_timestamp) + max_timestamp = timestamp + 1; + } + } + + if (all_parents_computed) { + struct commit_graph_data *data = commit_graph_data_at(current); + + data->generation = (max_timestamp - current->date) + 1; + pop_commit(&list); + + if (data->generation < max_offset + 1) + data->generation = max_offset + 1; + + if (data->generation > GENERATION_NUMBER_MAX) + data->generation = GENERATION_NUMBER_MAX; + } + } + } + stop_progress(&ctx->progress); +} + +/* + * Store topological level in commit_graph_data->generation + */ +static void compute_generation_numbers(struct write_commit_graph_context *ctx) +{ + int i, max_gen = 0; + struct commit_list *list = NULL; + if (ctx->report_progress) ctx->progress = start_delayed_progress( _("Computing commit graph generation numbers"), @@ -1347,6 +1633,9 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) if (data->generation > GENERATION_NUMBER_MAX) data->generation = GENERATION_NUMBER_MAX; + + if (data->generation > max_gen) + max_gen = data->generation; } } } @@ -1663,6 +1952,14 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; chunk_ids[2] = GRAPH_CHUNKID_DATA; + if (GRAPH_METADATA_CHUNK_ENABLED) { + chunk_ids[num_chunks] = GRAPH_CHUNKID_METADATA; + num_chunks++; + } + if (GRAPH_GENERATION_DATA_CHUNK_ENABLED) { + chunk_ids[num_chunks] = GRAPH_CHUNKID_GENERATION_DATA; + num_chunks++; + } if (ctx->num_extra_edges) { chunk_ids[num_chunks] = GRAPH_CHUNKID_EXTRAEDGES; num_chunks++; @@ -1686,6 +1983,19 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr; num_chunks = 3; + if (GRAPH_METADATA_CHUNK_ENABLED) { + /* + * Existence of "META" in chunk lookup is enough for + * proof of concept + */ + chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks]; + num_chunks++; + } + if (GRAPH_GENERATION_DATA_CHUNK_ENABLED) { + chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + + sizeof(timestamp_t) * ctx->commits.nr; + num_chunks++; + } if (ctx->num_extra_edges) { chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + 4 * ctx->num_extra_edges; @@ -1735,6 +2045,10 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) write_graph_chunk_fanout(f, ctx); write_graph_chunk_oids(f, hashsz, ctx); write_graph_chunk_data(f, hashsz, ctx); + if (GRAPH_METADATA_CHUNK_ENABLED) + write_graph_chunk_metadata(ctx); + if (GRAPH_GENERATION_DATA_CHUNK_ENABLED) + write_graph_chunk_generation_data(f, ctx); if (ctx->num_extra_edges) write_graph_chunk_extra_edges(f, ctx); if (ctx->changed_paths) { @@ -2179,7 +2493,12 @@ int write_commit_graph(struct object_directory *odb, } else ctx->num_commit_graphs_after = 1; - compute_generation_numbers(ctx); + if (GENERATION_NUMBER_V3) + compute_corrected_commit_dates(ctx); + else if (GENERATION_NUMBER_V5) + compute_corrected_commit_date_offsets(ctx); + else + compute_generation_numbers(ctx); if (ctx->changed_paths) compute_bloom_filters(ctx); diff --git a/commit-graph.h b/commit-graph.h index 28f89cdf3e5775..88aa69554b248d 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -67,6 +67,8 @@ struct commit_graph { const uint32_t *chunk_oid_fanout; const unsigned char *chunk_oid_lookup; const unsigned char *chunk_commit_data; + const unsigned char *chunk_metadata; + const unsigned char *chunk_generation_data; const unsigned char *chunk_extra_edges; const unsigned char *chunk_base_graphs; const unsigned char *chunk_bloom_indexes; @@ -137,12 +139,12 @@ void disable_commit_graph(struct repository *r); struct commit_graph_data { uint32_t graph_pos; - uint32_t generation; + timestamp_t generation; }; /* * Commits should be parsed before accessing generation, graph positions. */ -uint32_t commit_graph_generation(const struct commit *); +timestamp_t commit_graph_generation(const struct commit *); uint32_t commit_graph_position(const struct commit *); #endif diff --git a/commit.h b/commit.h index e90153890954a4..16ee039ba9ad0a 100644 --- a/commit.h +++ b/commit.h @@ -13,6 +13,8 @@ #define COMMIT_NOT_FROM_GRAPH 0xFFFFFFFF #define GENERATION_NUMBER_INFINITY 0xFFFFFFFF #define GENERATION_NUMBER_MAX 0x3FFFFFFF +#define GENERATION_NUMBER_V2_MAX 0xFFFFFFFFFFFFFFFF /* 64-bits */ +#define GENERATION_NUMBER_V2_INFINITY GENERATION_NUMBER_V2_MAX #define GENERATION_NUMBER_ZERO 0 struct commit_list { From b52679d9bc4f8e97ef733c0e4cad2afbf7be3ada Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Sun, 28 Jun 2020 12:47:47 +0530 Subject: [PATCH 2/6] commit-graph: add trace2 to find the largest odate --- commit-graph.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/commit-graph.c b/commit-graph.c index 4c38a42349c1a5..b3f99d1391d70d 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1527,8 +1527,11 @@ static void compute_corrected_commit_dates(struct write_commit_graph_context *ct static void compute_corrected_commit_date_offsets(struct write_commit_graph_context *ctx) { int i; + uint32_t max_odate = 0; struct commit_list *list = NULL; + trace2_region_enter("commit-graph", "compute_corrected_commit_date_offsets", ctx->r); + if (ctx->report_progress) ctx->progress = start_delayed_progress( _("Computing commit graph generation numbers"), @@ -1579,10 +1582,16 @@ static void compute_corrected_commit_date_offsets(struct write_commit_graph_cont if (data->generation > GENERATION_NUMBER_MAX) data->generation = GENERATION_NUMBER_MAX; + + if (max_odate < data->generation) + max_odate = data->generation; } } } stop_progress(&ctx->progress); + + trace2_data_intmax("commit-graph", ctx->r, "max_odata", max_odate); + trace2_region_leave("commit-graph", "compute_corrected_commit_date_offsets", ctx->r); } /* From b610788a2d8d3efd944e22830b95c9cd5ad69583 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Sun, 28 Jun 2020 13:19:32 +0530 Subject: [PATCH 3/6] commit-graph: use trace2 to track number of commits walked --- builtin/log.c | 2 ++ commit-reach.c | 27 +++++++++++++++++++++++---- revision.c | 20 ++++++++++++++++++++ revision.h | 1 + 4 files changed, 46 insertions(+), 4 deletions(-) diff --git a/builtin/log.c b/builtin/log.c index d104d5c6889ba2..3afb72ff748d07 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -444,6 +444,8 @@ static int cmd_log_walk(struct rev_info *rev) rev->diffopt.flags.check_failed) { return 02; } + + log_topo_stats(rev); return diff_result_code(&rev->diffopt, 0); } diff --git a/commit-reach.c b/commit-reach.c index a7a0b8726310e1..3eb34aae3ae1dc 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -38,9 +38,9 @@ static struct commit_list *paint_down_to_common(struct repository *r, struct commit_list *result = NULL; int i; uint32_t last_gen = GENERATION_NUMBER_INFINITY; + uint32_t num_walked = 0; - if (!min_generation) - queue.compare = compare_commits_by_commit_date; + trace2_region_enter("commit-reach", "paint_down_to_common", r); one->object.flags |= PARENT1; if (!n) { @@ -60,6 +60,8 @@ static struct commit_list *paint_down_to_common(struct repository *r, int flags; uint32_t generation = commit_graph_generation(commit); + num_walked++; + if (min_generation && generation > last_gen) BUG("bad generation skip %8x > %8x at %s", generation, last_gen, @@ -92,6 +94,9 @@ static struct commit_list *paint_down_to_common(struct repository *r, } clear_prio_queue(&queue); + + trace2_data_intmax("commit-reach", r, "num_walked", num_walked); + trace2_region_leave("commit-reach", "paint_down_to_common", r); return result; } @@ -510,7 +515,7 @@ static enum contains_result contains_tag_algo(struct commit *candidate, { struct contains_stack contains_stack = { 0, 0, NULL }; enum contains_result result; - uint32_t cutoff = GENERATION_NUMBER_INFINITY; + uint32_t cutoff = GENERATION_NUMBER_INFINITY, num_walked = 0; const struct commit_list *p; for (p = want; p; p = p->next) { @@ -526,12 +531,15 @@ static enum contains_result contains_tag_algo(struct commit *candidate, if (result != CONTAINS_UNKNOWN) return result; + trace2_region_enter("commit-reach", "contains_tag_algo", the_repository); push_to_contains_stack(candidate, &contains_stack); while (contains_stack.nr) { struct contains_stack_entry *entry = &contains_stack.contains_stack[contains_stack.nr - 1]; struct commit *commit = entry->commit; struct commit_list *parents = entry->parents; + num_walked++; + if (!parents) { *contains_cache_at(cache, commit) = CONTAINS_NO; contains_stack.nr--; @@ -554,7 +562,11 @@ static enum contains_result contains_tag_algo(struct commit *candidate, } } free(contains_stack.contains_stack); - return contains_test(candidate, want, cache, cutoff); + + result = contains_test(candidate, want, cache, cutoff); + + trace2_data_intmax("commit-reach", the_repository, "num_walked", num_walked); + return result; } int commit_contains(struct ref_filter *filter, struct commit *commit, @@ -590,6 +602,7 @@ int can_all_from_reach_with_flag(struct object_array *from, int i; int nr_commits; int result = 1; + uint32_t num_walked = 0; ALLOC_ARRAY(list, from->nr); nr_commits = 0; @@ -622,6 +635,8 @@ int can_all_from_reach_with_flag(struct object_array *from, nr_commits++; } + trace2_region_enter("commit-reach", "can_all_from_reach_flag", the_repository); + QSORT(list, nr_commits, compare_commits_by_gen); for (i = 0; i < nr_commits; i++) { @@ -641,6 +656,8 @@ int can_all_from_reach_with_flag(struct object_array *from, continue; } + num_walked++; + for (parent = stack->item->parents; parent; parent = parent->next) { if (parent->item->object.flags & (with_flag | RESULT)) stack->item->object.flags |= RESULT; @@ -675,6 +692,8 @@ int can_all_from_reach_with_flag(struct object_array *from, for (i = 0; i < from->nr; i++) from->objects[i].item->flags &= ~assign_flag; + trace2_data_intmax("commit-reach", the_repository, "num_walked", num_walked); + trace2_region_leave("commit-reach", "can_all_from_reach_flag", the_repository); return result; } diff --git a/revision.c b/revision.c index 32be93f404b182..420049dd3a1fa3 100644 --- a/revision.c +++ b/revision.c @@ -3275,6 +3275,7 @@ struct topo_walk_info { struct prio_queue topo_queue; struct indegree_slab indegree; struct author_date_slab author_date; + uint32_t num_walked_explore, num_walked_indegree, num_walked_topo; }; static inline void test_flag_and_insert(struct prio_queue *q, struct commit *c, int flag) @@ -3282,6 +3283,7 @@ static inline void test_flag_and_insert(struct prio_queue *q, struct commit *c, if (c->object.flags & flag) return; + c->object.flags |= flag; prio_queue_put(q, c); } @@ -3295,6 +3297,8 @@ static void explore_walk_step(struct rev_info *revs) if (!c) return; + info->num_walked_explore++; + if (parse_commit_gently(c, 1) < 0) return; @@ -3333,6 +3337,8 @@ static void indegree_walk_step(struct rev_info *revs) if (!c) return; + info->num_walked_indegree++; + if (parse_commit_gently(c, 1) < 0) return; @@ -3455,6 +3461,8 @@ static struct commit *next_topo_commit(struct rev_info *revs) /* pop next off of topo_queue */ c = prio_queue_get(&info->topo_queue); + info->num_walked_topo++; + if (c) *(indegree_slab_at(&info->indegree, c)) = 0; @@ -4175,3 +4183,15 @@ void put_revision_mark(const struct rev_info *revs, const struct commit *commit) fputs(mark, stdout); putchar(' '); } + +void log_topo_stats(struct rev_info *revs) +{ + struct topo_walk_info *info = revs->topo_walk_info; + + if (!info) + return ; + + trace2_data_intmax("revision", the_repository, "num_walked_explore", info->num_walked_explore); + trace2_data_intmax("revision", the_repository, "num_walked_indegree", info->num_walked_indegree); + trace2_data_intmax("revision", the_repository, "num_walked_topo", info->num_walked_topo); +} diff --git a/revision.h b/revision.h index 93491b79d475ad..72397ac1403bc9 100644 --- a/revision.h +++ b/revision.h @@ -452,4 +452,5 @@ int rewrite_parents(struct rev_info *revs, */ struct commit_list *get_saved_parents(struct rev_info *revs, const struct commit *commit); +void log_topo_stats(struct rev_info *revs); #endif From 763cbf80f5e9f6f6161e9cfb7991d42f63926228 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Fri, 3 Jul 2020 07:49:04 +0530 Subject: [PATCH 4/6] commit-graph: trace the largest offset for corrected commit dates --- commit-graph.c | 62 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index b3f99d1391d70d..431df227b3e012 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -62,10 +62,10 @@ void git_test_write_commit_graph_or_die(void) #define GRAPH_METADATA_CHUNK_ENABLED 0 -#define GRAPH_GENERATION_DATA_CHUNK_ENABLED 1 -#define GENERATION_NUMBER_V3 1 +#define GRAPH_GENERATION_DATA_CHUNK_ENABLED 0 +#define GENERATION_NUMBER_V3 0 #define GENERATION_NUMBER_V5 0 -#define GENERATION_COMPUTE_TOPOLOGICAL_LEVEL 1 +#define GENERATION_COMPUTE_TOPOLOGICAL_LEVEL 0 #define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16) @@ -1429,7 +1429,10 @@ static void close_reachable(struct write_commit_graph_context *ctx) } /* - * Store the corrected commit date in commit_graph_data->generation. + * CCD_3(C) = Date(C) + Offset(C) such that CCD_3(C) > CCD_3(P) for + * all parents P of C. + * + * Store the Offset(C) in commit_graph_data->generation. * * If GENERATION_COMPUTE_TOPOLOGICAL_LEVEL is set, calculate and store * topological levels in commit_graph_data->graph_pos @@ -1438,6 +1441,9 @@ static void compute_corrected_commit_dates(struct write_commit_graph_context *ct { int i; struct commit_list *list = NULL; + uint32_t max_odate = 0; + + trace2_region_enter("commit-graph", "compute_corrected_commit_dates", ctx->r); if (ctx->report_progress) ctx->progress = start_delayed_progress( @@ -1445,19 +1451,17 @@ static void compute_corrected_commit_dates(struct write_commit_graph_context *ct ctx->commits.nr); for (i = 0; i < ctx->commits.nr; i++) { - struct commit_graph_data *data; - uint32_t offset, level, max_level = 0; - - data = commit_graph_data_at(ctx->commits.list[i]); - offset = data->generation; + uint32_t level, max_level = 0; + struct commit_graph_data *data = commit_graph_data_at(ctx->commits.list[i]); display_progress(ctx->progress, i + 1); - if (offset == GENERATION_NUMBER_INFINITY || - offset == GENERATION_NUMBER_ZERO) + if (data->generation != GENERATION_NUMBER_INFINITY && + data->generation != GENERATION_NUMBER_ZERO) continue; commit_list_insert(ctx->commits.list[i], &list); + while (list) { struct commit *current = list->item; struct commit_list *parent; @@ -1467,25 +1471,24 @@ static void compute_corrected_commit_dates(struct write_commit_graph_context *ct for (parent = current->parents; parent; parent = parent->next) { timestamp_t parent_timestamp; data = commit_graph_data_at(parent->item); - offset = data->generation; - if (offset != GENERATION_NUMBER_INFINITY && - offset != GENERATION_NUMBER_ZERO) { + if (data->generation == GENERATION_NUMBER_INFINITY || + data->generation == GENERATION_NUMBER_ZERO) { all_parents_computed = 0; commit_list_insert(parent->item, &list); break; - } - - parent_timestamp = parent->item->date + offset; + } else { + parent_timestamp = parent->item->date + data->generation; - if (max_timestamp < parent_timestamp) - max_timestamp = parent_timestamp + 1; + if (parent_timestamp > max_timestamp) + max_timestamp = parent_timestamp + 1; - if (GENERATION_COMPUTE_TOPOLOGICAL_LEVEL) { - level = data->graph_pos; + if (GENERATION_COMPUTE_TOPOLOGICAL_LEVEL) { + level = data->graph_pos; - if (max_level < level) { - max_level = level; + if (max_level < level) { + max_level = level; + } } } } @@ -1493,7 +1496,7 @@ static void compute_corrected_commit_dates(struct write_commit_graph_context *ct if (all_parents_computed) { data = commit_graph_data_at(current); - data->generation = (max_timestamp - current->date) + 1; + data->generation = (uint32_t) (max_timestamp - current->date) + 1; if (GENERATION_COMPUTE_TOPOLOGICAL_LEVEL) { data->graph_pos = max_level + 1; @@ -1504,6 +1507,9 @@ static void compute_corrected_commit_dates(struct write_commit_graph_context *ct if (data->generation > GENERATION_NUMBER_MAX) data->generation = GENERATION_NUMBER_MAX; + if (data->generation > max_odate) + max_odate = data->generation; + if (GENERATION_COMPUTE_TOPOLOGICAL_LEVEL) { if (data->graph_pos > GENERATION_NUMBER_MAX) data->graph_pos = GENERATION_NUMBER_MAX; @@ -1512,11 +1518,17 @@ static void compute_corrected_commit_dates(struct write_commit_graph_context *ct } } + trace2_data_intmax("commit-graph", ctx->r, "max_odate3", max_odate); + trace2_region_leave("commit-graph", "compute_corrected_commit_dates", ctx->r); + stop_progress(&ctx->progress); } /* - * Store the corrected date offset in commit_graph_data->generation. + * CCD_5(C) = Date(C) + Offset(C) such that for all parents P of C, + * CCD_5(C) > CCD_5(P) and Offset(C) > Offset(P) + * + * Store Offset(C) in commit_graph_data->generation. * * For both metadata and generation data chunk approaches, write the * corrected date offset into CDAT. From d4d60cc26e11a9e5e56bcd7fb2f0175eb4ad1fc8 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Fri, 3 Jul 2020 08:05:33 +0530 Subject: [PATCH 5/6] commit-graph: use env variables --- commit-graph.c | 41 +++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 431df227b3e012..e0c1fd0c39d259 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -60,13 +60,6 @@ void git_test_write_commit_graph_or_die(void) * V4 - GENERATION_NUMBERS_MAX in CDAT, Corrected Commit Date in GDAT (0, 1, 1, 0, 0) */ -#define GRAPH_METADATA_CHUNK_ENABLED 0 - -#define GRAPH_GENERATION_DATA_CHUNK_ENABLED 0 -#define GENERATION_NUMBER_V3 0 -#define GENERATION_NUMBER_V5 0 -#define GENERATION_COMPUTE_TOPOLOGICAL_LEVEL 0 - #define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16) #define GRAPH_VERSION_1 0x1 @@ -1162,16 +1155,16 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, else packedDate[0] = 0; - if (GRAPH_METADATA_CHUNK_ENABLED) + if (git_env_bool("GIT_METADATA_CHUNK_ENABLED", 0)) /* Copy corrected date offsets into CDAT */ packedDate[0] |= htonl(commit_graph_data_at(*list)->generation << 2); - if (GRAPH_GENERATION_DATA_CHUNK_ENABLED) { - if (GENERATION_NUMBER_V5) + else if (git_env_bool("GIT_GENERATION_DATA_CHUNK_ENABLED", 0)) { + if (git_env_bool("GIT_GENERATION_NUMBER_V5", 0)) /* Copy corrected date offset into CDAT when using V5 */ packedDate[0] |= htonl(commit_graph_data_at(*list)->generation << 2); - else if (GENERATION_NUMBER_V3) + else if (git_env_bool("GIT_GENERATION_NUMBER_V3", 0)) { - if (GENERATION_COMPUTE_TOPOLOGICAL_LEVEL) + if (git_env_bool("GIT_COMPUTE_TOPOLOGICAL_LEVEL", 0)) /* Copy topological levels if calculated when using V3 */ packedDate[0] |= htonl(commit_graph_data_at(*list)->graph_pos << 2); else @@ -1434,7 +1427,7 @@ static void close_reachable(struct write_commit_graph_context *ctx) * * Store the Offset(C) in commit_graph_data->generation. * - * If GENERATION_COMPUTE_TOPOLOGICAL_LEVEL is set, calculate and store + * If $GIT_COMPUTE_TOPOLOGICAL_LEVEL is set, calculate and store * topological levels in commit_graph_data->graph_pos */ static void compute_corrected_commit_dates(struct write_commit_graph_context *ctx) @@ -1483,7 +1476,7 @@ static void compute_corrected_commit_dates(struct write_commit_graph_context *ct if (parent_timestamp > max_timestamp) max_timestamp = parent_timestamp + 1; - if (GENERATION_COMPUTE_TOPOLOGICAL_LEVEL) { + if (git_env_bool("GIT_COMPUTE_TOPOLOGICAL_LEVEL", 0)) { level = data->graph_pos; if (max_level < level) { @@ -1498,7 +1491,7 @@ static void compute_corrected_commit_dates(struct write_commit_graph_context *ct data->generation = (uint32_t) (max_timestamp - current->date) + 1; - if (GENERATION_COMPUTE_TOPOLOGICAL_LEVEL) { + if (git_env_bool("GIT_COMPUTE_TOPOLOGICAL_LEVEL", 0)) { data->graph_pos = max_level + 1; } @@ -1510,7 +1503,7 @@ static void compute_corrected_commit_dates(struct write_commit_graph_context *ct if (data->generation > max_odate) max_odate = data->generation; - if (GENERATION_COMPUTE_TOPOLOGICAL_LEVEL) { + if (git_env_bool("GIT_COMPUTE_TOPOLOGICAL_LEVEL", 0)) { if (data->graph_pos > GENERATION_NUMBER_MAX) data->graph_pos = GENERATION_NUMBER_MAX; } @@ -1973,11 +1966,11 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; chunk_ids[2] = GRAPH_CHUNKID_DATA; - if (GRAPH_METADATA_CHUNK_ENABLED) { + if (git_env_bool("GIT_METADATA_CHUNK_ENABLED", 0)) { chunk_ids[num_chunks] = GRAPH_CHUNKID_METADATA; num_chunks++; } - if (GRAPH_GENERATION_DATA_CHUNK_ENABLED) { + if (git_env_bool("GIT_GENERATION_DATA_CHUNK_ENABLED", 0)) { chunk_ids[num_chunks] = GRAPH_CHUNKID_GENERATION_DATA; num_chunks++; } @@ -2004,7 +1997,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr; num_chunks = 3; - if (GRAPH_METADATA_CHUNK_ENABLED) { + if (git_env_bool("GIT_METADATA_CHUNK_ENABLED", 0)) { /* * Existence of "META" in chunk lookup is enough for * proof of concept @@ -2012,7 +2005,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks]; num_chunks++; } - if (GRAPH_GENERATION_DATA_CHUNK_ENABLED) { + if (git_env_bool("GIT_GENERATION_DATA_CHUNK_ENABLED", 0)) { chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + sizeof(timestamp_t) * ctx->commits.nr; num_chunks++; @@ -2066,9 +2059,9 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) write_graph_chunk_fanout(f, ctx); write_graph_chunk_oids(f, hashsz, ctx); write_graph_chunk_data(f, hashsz, ctx); - if (GRAPH_METADATA_CHUNK_ENABLED) + if (git_env_bool("GIT_METADATA_CHUNK_ENABLED", 0)) write_graph_chunk_metadata(ctx); - if (GRAPH_GENERATION_DATA_CHUNK_ENABLED) + if (git_env_bool("GIT_GENERATION_DATA_CHUNK_ENABLED", 0)) write_graph_chunk_generation_data(f, ctx); if (ctx->num_extra_edges) write_graph_chunk_extra_edges(f, ctx); @@ -2514,9 +2507,9 @@ int write_commit_graph(struct object_directory *odb, } else ctx->num_commit_graphs_after = 1; - if (GENERATION_NUMBER_V3) + if (git_env_bool("GIT_GENERATION_NUMBER_V3", 0)) compute_corrected_commit_dates(ctx); - else if (GENERATION_NUMBER_V5) + else if (git_env_bool("GIT_GENERATION_NUMBER_V5", 0)) compute_corrected_commit_date_offsets(ctx); else compute_generation_numbers(ctx); From fa9f3f1dd6f25e60e34481d50b0e3310c0c46c8e Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Fri, 3 Jul 2020 13:30:17 +0530 Subject: [PATCH 6/6] Add helper scripts --- gen_perf.sh | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++ read_time | 43 +++++++++++++++++++++++++++ 2 files changed, 126 insertions(+) create mode 100755 gen_perf.sh create mode 100755 read_time diff --git a/gen_perf.sh b/gen_perf.sh new file mode 100755 index 00000000000000..c84695ffa26184 --- /dev/null +++ b/gen_perf.sh @@ -0,0 +1,83 @@ +#!/usr/bin/bash + +git_dir=$PWD +git_exec="$git_dir/git" +linux_dir="/home/abhishek/github_repos/linux" +write_graph_command="commit-graph write --no-progress" +commands=("log --topo-order -10000" "log --topo-order -100 v5.4 v5.5" "log --topo-order -100 v4.8 v4.9" "merge-base v5.4 v5.5" "merge-base v4.8 v4.9") + +# Python script to parse output of time and generate mean, standard deviation. +read_time () { + $git_dir/read_time $1 + rm $1 +} + +measure_time () { + # Write commit graph a few times to avoid cold start + for i in {1..3} + do + $git_exec $write_graph_command + done + + # Note: Writing commit graph takes the longest, around 7 minutes in total. + # Remove if not needed. + for i in {1..3} + do + { time $git_exec $write_graph_command; } 2>> output_file + done + + echo $write_graph_command + + read_time output_file + + for command in "${commands[@]}" + do + echo "$command" + for i in {1..25} + do + { time $git_exec $command; } 1> /dev/null 2>> output_file + done + + read_time output_file + done +} + +measure_perf () { + export GIT_TRACE2_PERF="$git_dir/$1" + $git_exec $write_graph_command + + for command in "${commands[@]}" + do + $git_exec $command 1> /dev/null + done + unset GIT_TRACE2_PERF +} + +cd $linux_dir + +echo "## Master" + +measure_time +measure_perf 'master.perf' + +echo "## Corrected Commit Dates with Monotonically Increasing offset, Metadata Chunk" +export GIT_METADATA_CHUNK_ENABLED=1 +export GIT_GENERATION_NUMBER_V5=1 + +measure_time +measure_perf 'gen_v5.perf' + +unset GIT_METADATA_CHUNK_ENABLED +unset GIT_GENERATION_NUMBER_V5 + +echo "## Corrected Commit Dates, Dates into Generation Data Chunk" +export GIT_GENERATION_DATA_CHUNK_ENABLED=1 +export GIT_GENERATION_NUMBER_V3=1 + +measure_time +measure_perf 'gen_v3.perf' + +unset GIT_GENERATION_DATA_CHUNK_ENABLED +unset GIT_GENERATION_NUMBER_V3 + +cd $git_dir diff --git a/read_time b/read_time new file mode 100755 index 00000000000000..f194eddebf9e58 --- /dev/null +++ b/read_time @@ -0,0 +1,43 @@ +#!/usr/bin/python + +# Python Script to parse the output of time and generate mean, standard deviation + +import sys, math + +def mean(rows): + return sum(rows)/len(rows) + +def std_dev(rows): + total = 0 + avg = mean(rows) + + for x in rows: + total = total + abs(x - avg) ** 2 + + return math.sqrt(total/len(rows)) + + +with open(sys.argv[1]) as f: + rows = f.read().split() + +real_times = [] +user_times = [] +sys_times = [] + +for i in range(len(rows)): + rem = i % 6 + + if i % 2 == 0: + continue + + val = float(rows[i][2:-1]) + if rem == 1: + real_times.append(val) + elif rem == 3: + user_times.append(val) + elif rem == 5: + sys_times.append(val) + +print("Real Time: %2.3f±%0.3f" % (mean(real_times), std_dev(real_times))) +print("User Time: %2.3f±%0.3f" % (mean(user_times), std_dev(user_times))) +print("Sys Time: %2.3f±%0.3f" % (mean(sys_times), std_dev(sys_times)))