From 136f2e548a34f1f504b0f062f87ddf33e8d6e83b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 29 May 2006 12:16:12 -0700 Subject: Make "struct tree" contain the pointer to the tree buffer This allows us to avoid allocating information for names etc, because we can just use the information from the tree buffer directly. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- tree.c | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) (limited to 'tree.c') diff --git a/tree.c b/tree.c index d599fb5e1a..1e76d9cc11 100644 --- a/tree.c +++ b/tree.c @@ -3,6 +3,7 @@ #include "blob.h" #include "commit.h" #include "tag.h" +#include "tree-walk.h" #include const char *tree_type = "tree"; @@ -145,46 +146,45 @@ struct tree *lookup_tree(const unsigned char *sha1) int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size) { - void *bufptr = buffer; + struct tree_desc desc; struct tree_entry_list **list_p; int n_refs = 0; if (item->object.parsed) return 0; item->object.parsed = 1; + item->buffer = buffer; + item->size = size; + + desc.buf = buffer; + desc.size = size; + list_p = &item->entries; - while (size) { - struct object *obj; + while (desc.size) { + unsigned mode; + const char *path; + const unsigned char *sha1; struct tree_entry_list *entry; - int len = 1+strlen(bufptr); - unsigned char *file_sha1 = bufptr + len; - char *path = strchr(bufptr, ' '); - unsigned int mode; - if (size < len + 20 || !path || - sscanf(bufptr, "%o", &mode) != 1) - return -1; + + sha1 = tree_entry_extract(&desc, &path, &mode); entry = xmalloc(sizeof(struct tree_entry_list)); - entry->name = strdup(path + 1); + entry->name = path; + entry->mode = mode; entry->directory = S_ISDIR(mode) != 0; entry->executable = (mode & S_IXUSR) != 0; entry->symlink = S_ISLNK(mode) != 0; - entry->zeropad = *(char *)bufptr == '0'; - entry->mode = mode; + entry->zeropad = *(const char *)(desc.buf) == '0'; entry->next = NULL; - bufptr += len + 20; - size -= len + 20; + update_tree_entry(&desc); if (entry->directory) { - entry->item.tree = lookup_tree(file_sha1); - obj = &entry->item.tree->object; + entry->item.tree = lookup_tree(sha1); } else { - entry->item.blob = lookup_blob(file_sha1); - obj = &entry->item.blob->object; + entry->item.blob = lookup_blob(sha1); } - if (obj) - n_refs++; + n_refs++; *list_p = entry; list_p = &entry->next; } @@ -206,7 +206,6 @@ int parse_tree(struct tree *item) char type[20]; void *buffer; unsigned long size; - int ret; if (item->object.parsed) return 0; @@ -219,9 +218,7 @@ int parse_tree(struct tree *item) return error("Object %s not a tree", sha1_to_hex(item->object.sha1)); } - ret = parse_tree_buffer(item, buffer, size); - free(buffer); - return ret; + return parse_tree_buffer(item, buffer, size); } struct tree *parse_tree_indirect(const unsigned char *sha1) -- cgit v1.2.3 From 3a7c352bd0ecac4b4c96c0995d61de9ef8d814f9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 29 May 2006 12:16:46 -0700 Subject: Make "tree_entry" have a SHA1 instead of a union of object pointers This is preparatory work for further cleanups, where we try to make tree_entry look more like the more efficient tree-walk descriptor. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- tree.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'tree.c') diff --git a/tree.c b/tree.c index 1e76d9cc11..88c2219030 100644 --- a/tree.c +++ b/tree.c @@ -8,7 +8,7 @@ const char *tree_type = "tree"; -static int read_one_entry(unsigned char *sha1, const char *base, int baselen, const char *pathname, unsigned mode, int stage) +static int read_one_entry(const unsigned char *sha1, const char *base, int baselen, const char *pathname, unsigned mode, int stage) { int len; unsigned int size; @@ -89,7 +89,7 @@ int read_tree_recursive(struct tree *tree, current->mode, match)) continue; - switch (fn(current->item.any->sha1, base, baselen, + switch (fn(current->sha1, base, baselen, current->name, current->mode, stage)) { case 0: continue; @@ -107,7 +107,7 @@ int read_tree_recursive(struct tree *tree, memcpy(newbase, base, baselen); memcpy(newbase + baselen, current->name, pathlen); newbase[baselen + pathlen] = '/'; - retval = read_tree_recursive(current->item.tree, + retval = read_tree_recursive(lookup_tree(current->sha1), newbase, baselen + pathlen + 1, stage, match, fn); @@ -170,6 +170,7 @@ int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size) entry = xmalloc(sizeof(struct tree_entry_list)); entry->name = path; + entry->sha1 = sha1; entry->mode = mode; entry->directory = S_ISDIR(mode) != 0; entry->executable = (mode & S_IXUSR) != 0; @@ -178,12 +179,6 @@ int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size) entry->next = NULL; update_tree_entry(&desc); - - if (entry->directory) { - entry->item.tree = lookup_tree(sha1); - } else { - entry->item.blob = lookup_blob(sha1); - } n_refs++; *list_p = entry; list_p = &entry->next; @@ -193,8 +188,16 @@ int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size) struct tree_entry_list *entry; unsigned i = 0; struct object_refs *refs = alloc_object_refs(n_refs); - for (entry = item->entries; entry; entry = entry->next) - refs->ref[i++] = entry->item.any; + for (entry = item->entries; entry; entry = entry->next) { + struct object *obj; + + if (entry->directory) + obj = &lookup_tree(entry->sha1)->object; + else + obj = &lookup_blob(entry->sha1)->object; + refs->ref[i++] = obj; + } + set_object_refs(&item->object, refs); } -- cgit v1.2.3 From 0790a42a502701c7b58e9ad4123e46bf46bbf319 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 29 May 2006 12:17:28 -0700 Subject: Switch "read_tree_recursive()" over to tree-walk functionality Don't use the tree_entry list any more. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- tree.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'tree.c') diff --git a/tree.c b/tree.c index 88c2219030..88f8fd5892 100644 --- a/tree.c +++ b/tree.c @@ -78,19 +78,26 @@ int read_tree_recursive(struct tree *tree, int stage, const char **match, read_tree_fn_t fn) { - struct tree_entry_list *list; + struct tree_desc desc; + if (parse_tree(tree)) return -1; - list = tree->entries; - while (list) { - struct tree_entry_list *current = list; - list = list->next; - if (!match_tree_entry(base, baselen, current->name, - current->mode, match)) + + desc.buf = tree->buffer; + desc.size = tree->size; + + while (desc.size) { + unsigned mode; + const char *name; + const unsigned char *sha1; + + sha1 = tree_entry_extract(&desc, &name, &mode); + update_tree_entry(&desc); + + if (!match_tree_entry(base, baselen, name, mode, match)) continue; - switch (fn(current->sha1, base, baselen, - current->name, current->mode, stage)) { + switch (fn(sha1, base, baselen, name, mode, stage)) { case 0: continue; case READ_TREE_RECURSIVE: @@ -98,16 +105,16 @@ int read_tree_recursive(struct tree *tree, default: return -1; } - if (current->directory) { + if (S_ISDIR(mode)) { int retval; - int pathlen = strlen(current->name); + int pathlen = strlen(name); char *newbase; newbase = xmalloc(baselen + 1 + pathlen); memcpy(newbase, base, baselen); - memcpy(newbase + baselen, current->name, pathlen); + memcpy(newbase + baselen, name, pathlen); newbase[baselen + pathlen] = '/'; - retval = read_tree_recursive(lookup_tree(current->sha1), + retval = read_tree_recursive(lookup_tree(sha1), newbase, baselen + pathlen + 1, stage, match, fn); -- cgit v1.2.3 From 2d9c58c69d1bab601e67b036d0546e85abcee7eb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 29 May 2006 12:18:33 -0700 Subject: Remove "tree->entries" tree-entry list from tree parser Instead, just use the tree buffer directly, and use the tree-walk infrastructure to walk the buffers instead of the tree-entry list. The tree-entry list is inefficient, and generates tons of small allocations for no good reason. The tree-walk infrastructure is generally no harder to use than following a linked list, and allows us to do most tree parsing in-place. Some programs still use the old tree-entry lists, and are a bit painful to convert without major surgery. For them we have a helper function that creates a temporary tree-entry list on demand. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- tree.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 24 deletions(-) (limited to 'tree.c') diff --git a/tree.c b/tree.c index 88f8fd5892..db6e59f20e 100644 --- a/tree.c +++ b/tree.c @@ -151,22 +151,65 @@ struct tree *lookup_tree(const unsigned char *sha1) return (struct tree *) obj; } -int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size) +static int track_tree_refs(struct tree *item) { + int n_refs = 0, i; + struct object_refs *refs; struct tree_desc desc; - struct tree_entry_list **list_p; - int n_refs = 0; + /* Count how many entries there are.. */ + desc.buf = item->buffer; + desc.size = item->size; + while (desc.size) { + n_refs++; + update_tree_entry(&desc); + } + + /* Allocate object refs and walk it again.. */ + i = 0; + refs = alloc_object_refs(n_refs); + desc.buf = item->buffer; + desc.size = item->size; + while (desc.size) { + unsigned mode; + const char *name; + const unsigned char *sha1; + struct object *obj; + + sha1 = tree_entry_extract(&desc, &name, &mode); + update_tree_entry(&desc); + if (S_ISDIR(mode)) + obj = &lookup_tree(sha1)->object; + else + obj = &lookup_blob(sha1)->object; + refs->ref[i++] = obj; + } + set_object_refs(&item->object, refs); + return 0; +} + +int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size) +{ if (item->object.parsed) return 0; item->object.parsed = 1; item->buffer = buffer; item->size = size; - desc.buf = buffer; - desc.size = size; + if (track_object_refs) + track_tree_refs(item); + return 0; +} + +struct tree_entry_list *create_tree_entry_list(struct tree *tree) +{ + struct tree_desc desc; + struct tree_entry_list *ret = NULL; + struct tree_entry_list **list_p = &ret; + + desc.buf = tree->buffer; + desc.size = tree->size; - list_p = &item->entries; while (desc.size) { unsigned mode; const char *path; @@ -186,29 +229,19 @@ int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size) entry->next = NULL; update_tree_entry(&desc); - n_refs++; *list_p = entry; list_p = &entry->next; } + return ret; +} - if (track_object_refs) { - struct tree_entry_list *entry; - unsigned i = 0; - struct object_refs *refs = alloc_object_refs(n_refs); - for (entry = item->entries; entry; entry = entry->next) { - struct object *obj; - - if (entry->directory) - obj = &lookup_tree(entry->sha1)->object; - else - obj = &lookup_blob(entry->sha1)->object; - refs->ref[i++] = obj; - } - - set_object_refs(&item->object, refs); +void free_tree_entry_list(struct tree_entry_list *list) +{ + while (list) { + struct tree_entry_list *next = list->next; + free(list); + list = next; } - - return 0; } int parse_tree(struct tree *item) -- cgit v1.2.3 From 3bc1eca91e5230739cfb488e63fae35a166a07de Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 29 May 2006 12:19:37 -0700 Subject: Remove unused "zeropad" entry from tree_list_entry That was a hack, only needed because 'git fsck-objects' didn't look at the raw tree format. Now that fsck traverses the tree itself, we can drop it. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- tree.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'tree.c') diff --git a/tree.c b/tree.c index db6e59f20e..47318ef890 100644 --- a/tree.c +++ b/tree.c @@ -217,6 +217,7 @@ struct tree_entry_list *create_tree_entry_list(struct tree *tree) struct tree_entry_list *entry; sha1 = tree_entry_extract(&desc, &path, &mode); + update_tree_entry(&desc); entry = xmalloc(sizeof(struct tree_entry_list)); entry->name = path; @@ -225,10 +226,8 @@ struct tree_entry_list *create_tree_entry_list(struct tree *tree) entry->directory = S_ISDIR(mode) != 0; entry->executable = (mode & S_IXUSR) != 0; entry->symlink = S_ISLNK(mode) != 0; - entry->zeropad = *(const char *)(desc.buf) == '0'; entry->next = NULL; - update_tree_entry(&desc); *list_p = entry; list_p = &entry->next; } -- cgit v1.2.3 From 15b5536ee47c6684806edd7725adbbdede9fb95c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 29 May 2006 12:21:28 -0700 Subject: Remove last vestiges of generic tree_entry_list The old tree_entry_list is dead, long live the unified single tree parser. Yes, we now still have a compatibility function to create a bogus tree_entry_list in builtin-read-tree.c, but that is now entirely local to that very messy piece of code. I'd love to clean read-tree.c up too, but I'm too scared right now, so the best I can do is to just contain the damage, and try to make sure that no new users of the tree_entry_list sprout up by not having it as an exported interface any more. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- tree.c | 42 ------------------------------------------ 1 file changed, 42 deletions(-) (limited to 'tree.c') diff --git a/tree.c b/tree.c index 47318ef890..fb18724259 100644 --- a/tree.c +++ b/tree.c @@ -201,48 +201,6 @@ int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size) return 0; } -struct tree_entry_list *create_tree_entry_list(struct tree *tree) -{ - struct tree_desc desc; - struct tree_entry_list *ret = NULL; - struct tree_entry_list **list_p = &ret; - - desc.buf = tree->buffer; - desc.size = tree->size; - - while (desc.size) { - unsigned mode; - const char *path; - const unsigned char *sha1; - struct tree_entry_list *entry; - - sha1 = tree_entry_extract(&desc, &path, &mode); - update_tree_entry(&desc); - - entry = xmalloc(sizeof(struct tree_entry_list)); - entry->name = path; - entry->sha1 = sha1; - entry->mode = mode; - entry->directory = S_ISDIR(mode) != 0; - entry->executable = (mode & S_IXUSR) != 0; - entry->symlink = S_ISLNK(mode) != 0; - entry->next = NULL; - - *list_p = entry; - list_p = &entry->next; - } - return ret; -} - -void free_tree_entry_list(struct tree_entry_list *list) -{ - while (list) { - struct tree_entry_list *next = list->next; - free(list); - list = next; - } -} - int parse_tree(struct tree *item) { char type[20]; -- cgit v1.2.3 From 4c068a983150b740c3fcf6a33f342ac923abd3f4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 30 May 2006 09:45:45 -0700 Subject: tree_entry(): new tree-walking helper function This adds a "tree_entry()" function that combines the common operation of doing a "tree_entry_extract()" + "update_tree_entry()". It also has a simplified calling convention, designed for simple loops that traverse over a whole tree: the arguments are pointers to the tree descriptor and a name_entry structure to fill in, and it returns a boolean "true" if there was an entry left to be gotten in the tree. This allows tree traversal with struct tree_desc desc; struct name_entry entry; desc.buf = tree->buffer; desc.size = tree->size; while (tree_entry(&desc, &entry) { ... use "entry.{path, sha1, mode, pathlen}" ... } which is not only shorter than writing it out in full, it's hopefully less error prone too. [ It's actually a tad faster too - we don't need to recalculate the entry pathlength in both extract and update, but need to do it only once. Also, some callers can avoid doing a "strlen()" on the result, since it's returned as part of the name_entry structure. However, by now we're talking just 1% speedup on "git-rev-list --objects --all", and we're definitely at the point where tree walking is no longer the issue any more. ] NOTE! Not everybody wants to use this new helper function, since some of the tree walkers very much on purpose do the descriptor update separately from the entry extraction. So the "extract + update" sequence still remains as the core sequence, this is just a simplified interface. We should probably add a silly two-line inline helper function for initializing the descriptor from the "struct tree" too, just to cut down on the noise from that common "desc" initializer. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- tree.c | 41 +++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 26 deletions(-) (limited to 'tree.c') diff --git a/tree.c b/tree.c index fb18724259..9bbe2da37b 100644 --- a/tree.c +++ b/tree.c @@ -79,6 +79,7 @@ int read_tree_recursive(struct tree *tree, read_tree_fn_t fn) { struct tree_desc desc; + struct name_entry entry; if (parse_tree(tree)) return -1; @@ -86,18 +87,11 @@ int read_tree_recursive(struct tree *tree, desc.buf = tree->buffer; desc.size = tree->size; - while (desc.size) { - unsigned mode; - const char *name; - const unsigned char *sha1; - - sha1 = tree_entry_extract(&desc, &name, &mode); - update_tree_entry(&desc); - - if (!match_tree_entry(base, baselen, name, mode, match)) + while (tree_entry(&desc, &entry)) { + if (!match_tree_entry(base, baselen, entry.path, entry.mode, match)) continue; - switch (fn(sha1, base, baselen, name, mode, stage)) { + switch (fn(entry.sha1, base, baselen, entry.path, entry.mode, stage)) { case 0: continue; case READ_TREE_RECURSIVE: @@ -105,18 +99,17 @@ int read_tree_recursive(struct tree *tree, default: return -1; } - if (S_ISDIR(mode)) { + if (S_ISDIR(entry.mode)) { int retval; - int pathlen = strlen(name); char *newbase; - newbase = xmalloc(baselen + 1 + pathlen); + newbase = xmalloc(baselen + 1 + entry.pathlen); memcpy(newbase, base, baselen); - memcpy(newbase + baselen, name, pathlen); - newbase[baselen + pathlen] = '/'; - retval = read_tree_recursive(lookup_tree(sha1), + memcpy(newbase + baselen, entry.path, entry.pathlen); + newbase[baselen + entry.pathlen] = '/'; + retval = read_tree_recursive(lookup_tree(entry.sha1), newbase, - baselen + pathlen + 1, + baselen + entry.pathlen + 1, stage, match, fn); free(newbase); if (retval) @@ -156,6 +149,7 @@ static int track_tree_refs(struct tree *item) int n_refs = 0, i; struct object_refs *refs; struct tree_desc desc; + struct name_entry entry; /* Count how many entries there are.. */ desc.buf = item->buffer; @@ -170,18 +164,13 @@ static int track_tree_refs(struct tree *item) refs = alloc_object_refs(n_refs); desc.buf = item->buffer; desc.size = item->size; - while (desc.size) { - unsigned mode; - const char *name; - const unsigned char *sha1; + while (tree_entry(&desc, &entry)) { struct object *obj; - sha1 = tree_entry_extract(&desc, &name, &mode); - update_tree_entry(&desc); - if (S_ISDIR(mode)) - obj = &lookup_tree(sha1)->object; + if (S_ISDIR(entry.mode)) + obj = &lookup_tree(entry.sha1)->object; else - obj = &lookup_blob(sha1)->object; + obj = &lookup_blob(entry.sha1)->object; refs->ref[i++] = obj; } set_object_refs(&item->object, refs); -- cgit v1.2.3