summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLibravatar Elijah Newren <newren@gmail.com>2020-12-13 08:04:08 +0000
committerLibravatar Junio C Hamano <gitster@pobox.com>2020-12-13 14:18:19 -0800
commit5b59c3db059d85306ebeb680c4d322a69ee29fde (patch)
treed2a388c7a2426f6cdc0b57fb9a4151db0d29e9f4
parentMerge branch 'en/strmap' into en/merge-ort-impl (diff)
downloadtgif-5b59c3db059d85306ebeb680c4d322a69ee29fde.tar.xz
merge-ort: setup basic internal data structures
Set up some basic internal data structures. The only carry-over from merge-recursive.c is call_depth, though needed_rename_limit will be added later. The central piece of data will definitely be the strmap "paths", which will map every relevant pathname under consideration to either a merged_info or a conflict_info. ("conflicted" is a strmap that is a subset of "paths".) merged_info contains all relevant information for a non-conflicted entry. conflict_info contains a merged_info, plus any additional information about a conflict such as the higher orders stages involved and the names of the paths those came from (handy once renames get involved). If an entry remains conflicted, the merged_info portion of a conflict_info will later be filled with whatever version of the file should be placed in the working directory (e.g. an as-merged-as-possible variation that contains conflict markers). Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--merge-ort.c147
1 files changed, 147 insertions, 0 deletions
diff --git a/merge-ort.c b/merge-ort.c
index b487901d3e..3325c9c0a2 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -17,6 +17,153 @@
#include "cache.h"
#include "merge-ort.h"
+#include "strmap.h"
+
+struct merge_options_internal {
+ /*
+ * paths: primary data structure in all of merge ort.
+ *
+ * The keys of paths:
+ * * are full relative paths from the toplevel of the repository
+ * (e.g. "drivers/firmware/raspberrypi.c").
+ * * store all relevant paths in the repo, both directories and
+ * files (e.g. drivers, drivers/firmware would also be included)
+ * * these keys serve to intern all the path strings, which allows
+ * us to do pointer comparison on directory names instead of
+ * strcmp; we just have to be careful to use the interned strings.
+ *
+ * The values of paths:
+ * * either a pointer to a merged_info, or a conflict_info struct
+ * * merged_info contains all relevant information for a
+ * non-conflicted entry.
+ * * conflict_info contains a merged_info, plus any additional
+ * information about a conflict such as the higher orders stages
+ * involved and the names of the paths those came from (handy
+ * once renames get involved).
+ * * a path may start "conflicted" (i.e. point to a conflict_info)
+ * and then a later step (e.g. three-way content merge) determines
+ * it can be cleanly merged, at which point it'll be marked clean
+ * and the algorithm will ignore any data outside the contained
+ * merged_info for that entry
+ * * If an entry remains conflicted, the merged_info portion of a
+ * conflict_info will later be filled with whatever version of
+ * the file should be placed in the working directory (e.g. an
+ * as-merged-as-possible variation that contains conflict markers).
+ */
+ struct strmap paths;
+
+ /*
+ * conflicted: a subset of keys->values from "paths"
+ *
+ * conflicted is basically an optimization between process_entries()
+ * and record_conflicted_index_entries(); the latter could loop over
+ * ALL the entries in paths AGAIN and look for the ones that are
+ * still conflicted, but since process_entries() has to loop over
+ * all of them, it saves the ones it couldn't resolve in this strmap
+ * so that record_conflicted_index_entries() can iterate just the
+ * relevant entries.
+ */
+ struct strmap conflicted;
+
+ /*
+ * current_dir_name: temporary var used in collect_merge_info_callback()
+ *
+ * Used to set merged_info.directory_name; see documentation for that
+ * variable and the requirements placed on that field.
+ */
+ const char *current_dir_name;
+
+ /* call_depth: recursion level counter for merging merge bases */
+ int call_depth;
+};
+
+struct version_info {
+ struct object_id oid;
+ unsigned short mode;
+};
+
+struct merged_info {
+ /* if is_null, ignore result. otherwise result has oid & mode */
+ struct version_info result;
+ unsigned is_null:1;
+
+ /*
+ * clean: whether the path in question is cleanly merged.
+ *
+ * see conflict_info.merged for more details.
+ */
+ unsigned clean:1;
+
+ /*
+ * basename_offset: offset of basename of path.
+ *
+ * perf optimization to avoid recomputing offset of final '/'
+ * character in pathname (0 if no '/' in pathname).
+ */
+ size_t basename_offset;
+
+ /*
+ * directory_name: containing directory name.
+ *
+ * Note that we assume directory_name is constructed such that
+ * strcmp(dir1_name, dir2_name) == 0 iff dir1_name == dir2_name,
+ * i.e. string equality is equivalent to pointer equality. For this
+ * to hold, we have to be careful setting directory_name.
+ */
+ const char *directory_name;
+};
+
+struct conflict_info {
+ /*
+ * merged: the version of the path that will be written to working tree
+ *
+ * WARNING: It is critical to check merged.clean and ensure it is 0
+ * before reading any conflict_info fields outside of merged.
+ * Allocated merge_info structs will always have clean set to 1.
+ * Allocated conflict_info structs will have merged.clean set to 0
+ * initially. The merged.clean field is how we know if it is safe
+ * to access other parts of conflict_info besides merged; if a
+ * conflict_info's merged.clean is changed to 1, the rest of the
+ * algorithm is not allowed to look at anything outside of the
+ * merged member anymore.
+ */
+ struct merged_info merged;
+
+ /* oids & modes from each of the three trees for this path */
+ struct version_info stages[3];
+
+ /* pathnames for each stage; may differ due to rename detection */
+ const char *pathnames[3];
+
+ /* Whether this path is/was involved in a directory/file conflict */
+ unsigned df_conflict:1;
+
+ /*
+ * For filemask and dirmask, the ith bit corresponds to whether the
+ * ith entry is a file (filemask) or a directory (dirmask). Thus,
+ * filemask & dirmask is always zero, and filemask | dirmask is at
+ * most 7 but can be less when a path does not appear as either a
+ * file or a directory on at least one side of history.
+ *
+ * Note that these masks are related to enum merge_side, as the ith
+ * entry corresponds to side i.
+ *
+ * These values come from a traverse_trees() call; more info may be
+ * found looking at tree-walk.h's struct traverse_info,
+ * particularly the documentation above the "fn" member (note that
+ * filemask = mask & ~dirmask from that documentation).
+ */
+ unsigned filemask:3;
+ unsigned dirmask:3;
+
+ /*
+ * Optimization to track which stages match, to avoid the need to
+ * recompute it in multiple steps. Either 0 or at least 2 bits are
+ * set; if at least 2 bits are set, their corresponding stages match.
+ */
+ unsigned match_mask:3;
+};
+
void merge_switch_to_result(struct merge_options *opt,
struct tree *head,
struct merge_result *result,