From f05951fe3ffe461eb9cb715bf7cd08535657f29c Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Sun, 9 Dec 2007 15:43:34 +0100 Subject: Add compat/regex.[ch] and compat/fnmatch.[ch]. We don't have fnmatch and regular expressions on Windows. We borrow fnmatch.[ch] from the GNU C library (license is LGPL 2 or later) and GNU regexp (regexp.c[ch], license is GPL 2 or later). Note that regexp.c was changed slightly to avoid warnings with gcc. We make the addition of these files an extra commit so as not to clutter the next commits. Signed-off-by: Johannes Sixt --- compat/fnmatch.c | 488 ++++++ compat/fnmatch.h | 84 + compat/regex.c | 4927 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ compat/regex.h | 490 ++++++ 4 files changed, 5989 insertions(+) create mode 100644 compat/fnmatch.c create mode 100644 compat/fnmatch.h create mode 100644 compat/regex.c create mode 100644 compat/regex.h diff --git a/compat/fnmatch.c b/compat/fnmatch.c new file mode 100644 index 0000000000..1f4ead5f98 --- /dev/null +++ b/compat/fnmatch.c @@ -0,0 +1,488 @@ +/* Copyright (C) 1991, 92, 93, 96, 97, 98, 99 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#if HAVE_CONFIG_H +# include +#endif + +/* Enable GNU extensions in fnmatch.h. */ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif + +#include +#include +#include + +#if HAVE_STRING_H || defined _LIBC +# include +#else +# include +#endif + +#if defined STDC_HEADERS || defined _LIBC +# include +#endif + +/* For platform which support the ISO C amendement 1 functionality we + support user defined character classes. */ +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) +/* Solaris 2.5 has a bug: must be included before . */ +# include +# include +#endif + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#if defined _LIBC || !defined __GNU_LIBRARY__ + + +# if defined STDC_HEADERS || !defined isascii +# define ISASCII(c) 1 +# else +# define ISASCII(c) isascii(c) +# endif + +# ifdef isblank +# define ISBLANK(c) (ISASCII (c) && isblank (c)) +# else +# define ISBLANK(c) ((c) == ' ' || (c) == '\t') +# endif +# ifdef isgraph +# define ISGRAPH(c) (ISASCII (c) && isgraph (c)) +# else +# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) +# endif + +# define ISPRINT(c) (ISASCII (c) && isprint (c)) +# define ISDIGIT(c) (ISASCII (c) && isdigit (c)) +# define ISALNUM(c) (ISASCII (c) && isalnum (c)) +# define ISALPHA(c) (ISASCII (c) && isalpha (c)) +# define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) +# define ISLOWER(c) (ISASCII (c) && islower (c)) +# define ISPUNCT(c) (ISASCII (c) && ispunct (c)) +# define ISSPACE(c) (ISASCII (c) && isspace (c)) +# define ISUPPER(c) (ISASCII (c) && isupper (c)) +# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) + +# define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) + +# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) +/* The GNU C library provides support for user-defined character classes + and the functions from ISO C amendement 1. */ +# ifdef CHARCLASS_NAME_MAX +# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX +# else +/* This shouldn't happen but some implementation might still have this + problem. Use a reasonable default value. */ +# define CHAR_CLASS_MAX_LENGTH 256 +# endif + +# ifdef _LIBC +# define IS_CHAR_CLASS(string) __wctype (string) +# else +# define IS_CHAR_CLASS(string) wctype (string) +# endif +# else +# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ + +# define IS_CHAR_CLASS(string) \ + (STREQ (string, "alpha") || STREQ (string, "upper") \ + || STREQ (string, "lower") || STREQ (string, "digit") \ + || STREQ (string, "alnum") || STREQ (string, "xdigit") \ + || STREQ (string, "space") || STREQ (string, "print") \ + || STREQ (string, "punct") || STREQ (string, "graph") \ + || STREQ (string, "cntrl") || STREQ (string, "blank")) +# endif + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +# if !defined _LIBC && !defined getenv +extern char *getenv (); +# endif + +# ifndef errno +extern int errno; +# endif + +/* This function doesn't exist on most systems. */ + +# if !defined HAVE___STRCHRNUL && !defined _LIBC +static char * +__strchrnul (s, c) + const char *s; + int c; +{ + char *result = strchr (s, c); + if (result == NULL) + result = strchr (s, '\0'); + return result; +} +# endif + +# ifndef internal_function +/* Inside GNU libc we mark some function in a special way. In other + environments simply ignore the marking. */ +# define internal_function +# endif + +/* Match STRING against the filename pattern PATTERN, returning zero if + it matches, nonzero if not. */ +static int internal_fnmatch __P ((const char *pattern, const char *string, + int no_leading_period, int flags)) + internal_function; +static int +internal_function +internal_fnmatch (pattern, string, no_leading_period, flags) + const char *pattern; + const char *string; + int no_leading_period; + int flags; +{ + register const char *p = pattern, *n = string; + register unsigned char c; + +/* Note that this evaluates C many times. */ +# ifdef _LIBC +# define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c)) +# else +# define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? tolower (c) : (c)) +# endif + + while ((c = *p++) != '\0') + { + c = FOLD (c); + + switch (c) + { + case '?': + if (*n == '\0') + return FNM_NOMATCH; + else if (*n == '/' && (flags & FNM_FILE_NAME)) + return FNM_NOMATCH; + else if (*n == '.' && no_leading_period + && (n == string + || (n[-1] == '/' && (flags & FNM_FILE_NAME)))) + return FNM_NOMATCH; + break; + + case '\\': + if (!(flags & FNM_NOESCAPE)) + { + c = *p++; + if (c == '\0') + /* Trailing \ loses. */ + return FNM_NOMATCH; + c = FOLD (c); + } + if (FOLD ((unsigned char) *n) != c) + return FNM_NOMATCH; + break; + + case '*': + if (*n == '.' && no_leading_period + && (n == string + || (n[-1] == '/' && (flags & FNM_FILE_NAME)))) + return FNM_NOMATCH; + + for (c = *p++; c == '?' || c == '*'; c = *p++) + { + if (*n == '/' && (flags & FNM_FILE_NAME)) + /* A slash does not match a wildcard under FNM_FILE_NAME. */ + return FNM_NOMATCH; + else if (c == '?') + { + /* A ? needs to match one character. */ + if (*n == '\0') + /* There isn't another character; no match. */ + return FNM_NOMATCH; + else + /* One character of the string is consumed in matching + this ? wildcard, so *??? won't match if there are + less than three characters. */ + ++n; + } + } + + if (c == '\0') + /* The wildcard(s) is/are the last element of the pattern. + If the name is a file name and contains another slash + this does mean it cannot match. */ + return ((flags & FNM_FILE_NAME) && strchr (n, '/') != NULL + ? FNM_NOMATCH : 0); + else + { + const char *endp; + + endp = __strchrnul (n, (flags & FNM_FILE_NAME) ? '/' : '\0'); + + if (c == '[') + { + int flags2 = ((flags & FNM_FILE_NAME) + ? flags : (flags & ~FNM_PERIOD)); + + for (--p; n < endp; ++n) + if (internal_fnmatch (p, n, + (no_leading_period + && (n == string + || (n[-1] == '/' + && (flags + & FNM_FILE_NAME)))), + flags2) + == 0) + return 0; + } + else if (c == '/' && (flags & FNM_FILE_NAME)) + { + while (*n != '\0' && *n != '/') + ++n; + if (*n == '/' + && (internal_fnmatch (p, n + 1, flags & FNM_PERIOD, + flags) == 0)) + return 0; + } + else + { + int flags2 = ((flags & FNM_FILE_NAME) + ? flags : (flags & ~FNM_PERIOD)); + + if (c == '\\' && !(flags & FNM_NOESCAPE)) + c = *p; + c = FOLD (c); + for (--p; n < endp; ++n) + if (FOLD ((unsigned char) *n) == c + && (internal_fnmatch (p, n, + (no_leading_period + && (n == string + || (n[-1] == '/' + && (flags + & FNM_FILE_NAME)))), + flags2) == 0)) + return 0; + } + } + + /* If we come here no match is possible with the wildcard. */ + return FNM_NOMATCH; + + case '[': + { + /* Nonzero if the sense of the character class is inverted. */ + static int posixly_correct; + register int not; + char cold; + + if (posixly_correct == 0) + posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; + + if (*n == '\0') + return FNM_NOMATCH; + + if (*n == '.' && no_leading_period && (n == string + || (n[-1] == '/' + && (flags + & FNM_FILE_NAME)))) + return FNM_NOMATCH; + + if (*n == '/' && (flags & FNM_FILE_NAME)) + /* `/' cannot be matched. */ + return FNM_NOMATCH; + + not = (*p == '!' || (posixly_correct < 0 && *p == '^')); + if (not) + ++p; + + c = *p++; + for (;;) + { + unsigned char fn = FOLD ((unsigned char) *n); + + if (!(flags & FNM_NOESCAPE) && c == '\\') + { + if (*p == '\0') + return FNM_NOMATCH; + c = FOLD ((unsigned char) *p); + ++p; + + if (c == fn) + goto matched; + } + else if (c == '[' && *p == ':') + { + /* Leave room for the null. */ + char str[CHAR_CLASS_MAX_LENGTH + 1]; + size_t c1 = 0; +# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) + wctype_t wt; +# endif + const char *startp = p; + + for (;;) + { + if (c1 == CHAR_CLASS_MAX_LENGTH) + /* The name is too long and therefore the pattern + is ill-formed. */ + return FNM_NOMATCH; + + c = *++p; + if (c == ':' && p[1] == ']') + { + p += 2; + break; + } + if (c < 'a' || c >= 'z') + { + /* This cannot possibly be a character class name. + Match it as a normal range. */ + p = startp; + c = '['; + goto normal_bracket; + } + str[c1++] = c; + } + str[c1] = '\0'; + +# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) + wt = IS_CHAR_CLASS (str); + if (wt == 0) + /* Invalid character class name. */ + return FNM_NOMATCH; + + if (__iswctype (__btowc ((unsigned char) *n), wt)) + goto matched; +# else + if ((STREQ (str, "alnum") && ISALNUM ((unsigned char) *n)) + || (STREQ (str, "alpha") && ISALPHA ((unsigned char) *n)) + || (STREQ (str, "blank") && ISBLANK ((unsigned char) *n)) + || (STREQ (str, "cntrl") && ISCNTRL ((unsigned char) *n)) + || (STREQ (str, "digit") && ISDIGIT ((unsigned char) *n)) + || (STREQ (str, "graph") && ISGRAPH ((unsigned char) *n)) + || (STREQ (str, "lower") && ISLOWER ((unsigned char) *n)) + || (STREQ (str, "print") && ISPRINT ((unsigned char) *n)) + || (STREQ (str, "punct") && ISPUNCT ((unsigned char) *n)) + || (STREQ (str, "space") && ISSPACE ((unsigned char) *n)) + || (STREQ (str, "upper") && ISUPPER ((unsigned char) *n)) + || (STREQ (str, "xdigit") && ISXDIGIT ((unsigned char) *n))) + goto matched; +# endif + } + else if (c == '\0') + /* [ (unterminated) loses. */ + return FNM_NOMATCH; + else + { + normal_bracket: + if (FOLD (c) == fn) + goto matched; + + cold = c; + c = *p++; + + if (c == '-' && *p != ']') + { + /* It is a range. */ + unsigned char cend = *p++; + if (!(flags & FNM_NOESCAPE) && cend == '\\') + cend = *p++; + if (cend == '\0') + return FNM_NOMATCH; + + if (cold <= fn && fn <= FOLD (cend)) + goto matched; + + c = *p++; + } + } + + if (c == ']') + break; + } + + if (!not) + return FNM_NOMATCH; + break; + + matched: + /* Skip the rest of the [...] that already matched. */ + while (c != ']') + { + if (c == '\0') + /* [... (unterminated) loses. */ + return FNM_NOMATCH; + + c = *p++; + if (!(flags & FNM_NOESCAPE) && c == '\\') + { + if (*p == '\0') + return FNM_NOMATCH; + /* XXX 1003.2d11 is unclear if this is right. */ + ++p; + } + else if (c == '[' && *p == ':') + { + do + if (*++p == '\0') + return FNM_NOMATCH; + while (*p != ':' || p[1] == ']'); + p += 2; + c = *p; + } + } + if (not) + return FNM_NOMATCH; + } + break; + + default: + if (c != FOLD ((unsigned char) *n)) + return FNM_NOMATCH; + } + + ++n; + } + + if (*n == '\0') + return 0; + + if ((flags & FNM_LEADING_DIR) && *n == '/') + /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ + return 0; + + return FNM_NOMATCH; + +# undef FOLD +} + + +int +fnmatch (pattern, string, flags) + const char *pattern; + const char *string; + int flags; +{ + return internal_fnmatch (pattern, string, flags & FNM_PERIOD, flags); +} + +#endif /* _LIBC or not __GNU_LIBRARY__. */ diff --git a/compat/fnmatch.h b/compat/fnmatch.h new file mode 100644 index 0000000000..cc3ec37940 --- /dev/null +++ b/compat/fnmatch.h @@ -0,0 +1,84 @@ +/* Copyright (C) 1991, 92, 93, 96, 97, 98, 99 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#ifndef _FNMATCH_H +#define _FNMATCH_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined __cplusplus || (defined __STDC__ && __STDC__) || defined WINDOWS32 +# if !defined __GLIBC__ || !defined __P +# undef __P +# define __P(protos) protos +# endif +#else /* Not C++ or ANSI C. */ +# undef __P +# define __P(protos) () +/* We can get away without defining `const' here only because in this file + it is used only inside the prototype for `fnmatch', which is elided in + non-ANSI C where `const' is problematical. */ +#endif /* C++ or ANSI C. */ + +#ifndef const +# if (defined __STDC__ && __STDC__) || defined __cplusplus +# define __const const +# else +# define __const +# endif +#endif + +/* We #undef these before defining them because some losing systems + (HP-UX A.08.07 for example) define these in . */ +#undef FNM_PATHNAME +#undef FNM_NOESCAPE +#undef FNM_PERIOD + +/* Bits set in the FLAGS argument to `fnmatch'. */ +#define FNM_PATHNAME (1 << 0) /* No wildcard can ever match `/'. */ +#define FNM_NOESCAPE (1 << 1) /* Backslashes don't quote special chars. */ +#define FNM_PERIOD (1 << 2) /* Leading `.' is matched only explicitly. */ + +#if !defined _POSIX_C_SOURCE || _POSIX_C_SOURCE < 2 || defined _GNU_SOURCE +# define FNM_FILE_NAME FNM_PATHNAME /* Preferred GNU name. */ +# define FNM_LEADING_DIR (1 << 3) /* Ignore `/...' after a match. */ +# define FNM_CASEFOLD (1 << 4) /* Compare without regard to case. */ +#endif + +/* Value returned by `fnmatch' if STRING does not match PATTERN. */ +#define FNM_NOMATCH 1 + +/* This value is returned if the implementation does not support + `fnmatch'. Since this is not the case here it will never be + returned but the conformance test suites still require the symbol + to be defined. */ +#ifdef _XOPEN_SOURCE +# define FNM_NOSYS (-1) +#endif + +/* Match NAME against the filename pattern PATTERN, + returning zero if it matches, FNM_NOMATCH if not. */ +extern int fnmatch __P ((__const char *__pattern, __const char *__name, + int __flags)); + +#ifdef __cplusplus +} +#endif + +#endif /* fnmatch.h */ diff --git a/compat/regex.c b/compat/regex.c new file mode 100644 index 0000000000..87b33e4669 --- /dev/null +++ b/compat/regex.c @@ -0,0 +1,4927 @@ +/* Extended regular expression matching and search library, + version 0.12. + (Implements POSIX draft P10003.2/D11.2, except for + internationalization features.) + + Copyright (C) 1993 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* AIX requires this to be the first thing in the file. */ +#if defined (_AIX) && !defined (REGEX_MALLOC) + #pragma alloca +#endif + +#define _GNU_SOURCE + +/* We need this for `regex.h', and perhaps for the Emacs include files. */ +#include + +/* We used to test for `BSTRING' here, but only GCC and Emacs define + `BSTRING', as far as I know, and neither of them use this code. */ +#include +#ifndef bcmp +#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) +#endif +#ifndef bcopy +#define bcopy(s, d, n) memcpy ((d), (s), (n)) +#endif +#ifndef bzero +#define bzero(s, n) memset ((s), 0, (n)) +#endif + +#include + + +/* Define the syntax stuff for \<, \>, etc. */ + +/* This must be nonzero for the wordchar and notwordchar pattern + commands in re_match_2. */ +#ifndef Sword +#define Sword 1 +#endif + +#ifdef SYNTAX_TABLE + +extern char *re_syntax_table; + +#else /* not SYNTAX_TABLE */ + +/* How many characters in the character set. */ +#define CHAR_SET_SIZE 256 + +static char re_syntax_table[CHAR_SET_SIZE]; + +static void +init_syntax_once () +{ + register int c; + static int done = 0; + + if (done) + return; + + bzero (re_syntax_table, sizeof re_syntax_table); + + for (c = 'a'; c <= 'z'; c++) + re_syntax_table[c] = Sword; + + for (c = 'A'; c <= 'Z'; c++) + re_syntax_table[c] = Sword; + + for (c = '0'; c <= '9'; c++) + re_syntax_table[c] = Sword; + + re_syntax_table['_'] = Sword; + + done = 1; +} + +#endif /* not SYNTAX_TABLE */ + +#define SYNTAX(c) re_syntax_table[c] + + +/* Get the interface, including the syntax bits. */ +#include "regex.h" + +/* isalpha etc. are used for the character classes. */ +#include + +#ifndef isascii +#define isascii(c) 1 +#endif + +#ifdef isblank +#define ISBLANK(c) (isascii (c) && isblank (c)) +#else +#define ISBLANK(c) ((c) == ' ' || (c) == '\t') +#endif +#ifdef isgraph +#define ISGRAPH(c) (isascii (c) && isgraph (c)) +#else +#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c)) +#endif + +#define ISPRINT(c) (isascii (c) && isprint (c)) +#define ISDIGIT(c) (isascii (c) && isdigit (c)) +#define ISALNUM(c) (isascii (c) && isalnum (c)) +#define ISALPHA(c) (isascii (c) && isalpha (c)) +#define ISCNTRL(c) (isascii (c) && iscntrl (c)) +#define ISLOWER(c) (isascii (c) && islower (c)) +#define ISPUNCT(c) (isascii (c) && ispunct (c)) +#define ISSPACE(c) (isascii (c) && isspace (c)) +#define ISUPPER(c) (isascii (c) && isupper (c)) +#define ISXDIGIT(c) (isascii (c) && isxdigit (c)) + +#ifndef NULL +#define NULL 0 +#endif + +/* We remove any previous definition of `SIGN_EXTEND_CHAR', + since ours (we hope) works properly with all combinations of + machines, compilers, `char' and `unsigned char' argument types. + (Per Bothner suggested the basic approach.) */ +#undef SIGN_EXTEND_CHAR +#if __STDC__ +#define SIGN_EXTEND_CHAR(c) ((signed char) (c)) +#else /* not __STDC__ */ +/* As in Harbison and Steele. */ +#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) +#endif + +/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we + use `alloca' instead of `malloc'. This is because using malloc in + re_search* or re_match* could cause memory leaks when C-g is used in + Emacs; also, malloc is slower and causes storage fragmentation. On + the other hand, malloc is more portable, and easier to debug. + + Because we sometimes use alloca, some routines have to be macros, + not functions -- `alloca'-allocated space disappears at the end of the + function it is called in. */ + +#ifdef REGEX_MALLOC + +#define REGEX_ALLOCATE malloc +#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) + +#else /* not REGEX_MALLOC */ + +/* Emacs already defines alloca, sometimes. */ +#ifndef alloca + +/* Make alloca work the best possible way. */ +#ifdef __GNUC__ +#define alloca __builtin_alloca +#else /* not __GNUC__ */ +#if HAVE_ALLOCA_H +#include +#else /* not __GNUC__ or HAVE_ALLOCA_H */ +#ifndef _AIX /* Already did AIX, up at the top. */ +char *alloca (); +#endif /* not _AIX */ +#endif /* not HAVE_ALLOCA_H */ +#endif /* not __GNUC__ */ + +#endif /* not alloca */ + +#define REGEX_ALLOCATE alloca + +/* Assumes a `char *destination' variable. */ +#define REGEX_REALLOCATE(source, osize, nsize) \ + (destination = (char *) alloca (nsize), \ + bcopy (source, destination, osize), \ + destination) + +#endif /* not REGEX_MALLOC */ + + +/* True if `size1' is non-NULL and PTR is pointing anywhere inside + `string1' or just past its end. This works if PTR is NULL, which is + a good thing. */ +#define FIRST_STRING_P(ptr) \ + (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) + +/* (Re)Allocate N items of type T using malloc, or fail. */ +#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) +#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) +#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) + +#define BYTEWIDTH 8 /* In bits. */ + +#define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +typedef char boolean; +#define false 0 +#define true 1 + +/* These are the command codes that appear in compiled regular + expressions. Some opcodes are followed by argument bytes. A + command code can specify any interpretation whatsoever for its + arguments. Zero bytes may appear in the compiled regular expression. + + The value of `exactn' is needed in search.c (search_buffer) in Emacs. + So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of + `exactn' we use here must also be 1. */ + +typedef enum +{ + no_op = 0, + + /* Followed by one byte giving n, then by n literal bytes. */ + exactn = 1, + + /* Matches any (more or less) character. */ + anychar, + + /* Matches any one char belonging to specified set. First + following byte is number of bitmap bytes. Then come bytes + for a bitmap saying which chars are in. Bits in each byte + are ordered low-bit-first. A character is in the set if its + bit is 1. A character too large to have a bit in the map is + automatically not in the set. */ + charset, + + /* Same parameters as charset, but match any character that is + not one of those specified. */ + charset_not, + + /* Start remembering the text that is matched, for storing in a + register. Followed by one byte with the register number, in + the range 0 to one less than the pattern buffer's re_nsub + field. Then followed by one byte with the number of groups + inner to this one. (This last has to be part of the + start_memory only because we need it in the on_failure_jump + of re_match_2.) */ + start_memory, + + /* Stop remembering the text that is matched and store it in a + memory register. Followed by one byte with the register + number, in the range 0 to one less than `re_nsub' in the + pattern buffer, and one byte with the number of inner groups, + just like `start_memory'. (We need the number of inner + groups here because we don't have any easy way of finding the + corresponding start_memory when we're at a stop_memory.) */ + stop_memory, + + /* Match a duplicate of something remembered. Followed by one + byte containing the register number. */ + duplicate, + + /* Fail unless at beginning of line. */ + begline, + + /* Fail unless at end of line. */ + endline, + + /* Succeeds if at beginning of buffer (if emacs) or at beginning + of string to be matched (if not). */ + begbuf, + + /* Analogously, for end of buffer/string. */ + endbuf, + + /* Followed by two byte relative address to which to jump. */ + jump, + + /* Same as jump, but marks the end of an alternative. */ + jump_past_alt, + + /* Followed by two-byte relative address of place to resume at + in case of failure. */ + on_failure_jump, + + /* Like on_failure_jump, but pushes a placeholder instead of the + current string position when executed. */ + on_failure_keep_string_jump, + + /* Throw away latest failure point and then jump to following + two-byte relative address. */ + pop_failure_jump, + + /* Change to pop_failure_jump if know won't have to backtrack to + match; otherwise change to jump. This is used to jump + back to the beginning of a repeat. If what follows this jump + clearly won't match what the repeat does, such that we can be + sure that there is no use backtracking out of repetitions + already matched, then we change it to a pop_failure_jump. + Followed by two-byte address. */ + maybe_pop_jump, + + /* Jump to following two-byte address, and push a dummy failure + point. This failure point will be thrown away if an attempt + is made to use it for a failure. A `+' construct makes this + before the first repeat. Also used as an intermediary kind + of jump when compiling an alternative. */ + dummy_failure_jump, + + /* Push a dummy failure point and continue. Used at the end of + alternatives. */ + push_dummy_failure, + + /* Followed by two-byte relative address and two-byte number n. + After matching N times, jump to the address upon failure. */ + succeed_n, + + /* Followed by two-byte relative address, and two-byte number n. + Jump to the address N times, then fail. */ + jump_n, + + /* Set the following two-byte relative address to the + subsequent two-byte number. The address *includes* the two + bytes of number. */ + set_number_at, + + wordchar, /* Matches any word-constituent character. */ + notwordchar, /* Matches any char that is not a word-constituent. */ + + wordbeg, /* Succeeds if at word beginning. */ + wordend, /* Succeeds if at word end. */ + + wordbound, /* Succeeds if at a word boundary. */ + notwordbound /* Succeeds if not at a word boundary. */ + +#ifdef emacs + ,before_dot, /* Succeeds if before point. */ + at_dot, /* Succeeds if at point. */ + after_dot, /* Succeeds if after point. */ + + /* Matches any character whose syntax is specified. Followed by + a byte which contains a syntax code, e.g., Sword. */ + syntaxspec, + + /* Matches any character whose syntax is not that specified. */ + notsyntaxspec +#endif /* emacs */ +} re_opcode_t; + +/* Common operations on the compiled pattern. */ + +/* Store NUMBER in two contiguous bytes starting at DESTINATION. */ + +#define STORE_NUMBER(destination, number) \ + do { \ + (destination)[0] = (number) & 0377; \ + (destination)[1] = (number) >> 8; \ + } while (0) + +/* Same as STORE_NUMBER, except increment DESTINATION to + the byte after where the number is stored. Therefore, DESTINATION + must be an lvalue. */ + +#define STORE_NUMBER_AND_INCR(destination, number) \ + do { \ + STORE_NUMBER (destination, number); \ + (destination) += 2; \ + } while (0) + +/* Put into DESTINATION a number stored in two contiguous bytes starting + at SOURCE. */ + +#define EXTRACT_NUMBER(destination, source) \ + do { \ + (destination) = *(source) & 0377; \ + (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ + } while (0) + +#ifdef DEBUG +static void +extract_number (dest, source) + int *dest; + unsigned char *source; +{ + int temp = SIGN_EXTEND_CHAR (*(source + 1)); + *dest = *source & 0377; + *dest += temp << 8; +} + +#ifndef EXTRACT_MACROS /* To debug the macros. */ +#undef EXTRACT_NUMBER +#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) +#endif /* not EXTRACT_MACROS */ + +#endif /* DEBUG */ + +/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. + SOURCE must be an lvalue. */ + +#define EXTRACT_NUMBER_AND_INCR(destination, source) \ + do { \ + EXTRACT_NUMBER (destination, source); \ + (source) += 2; \ + } while (0) + +#ifdef DEBUG +static void +extract_number_and_incr (destination, source) + int *destination; + unsigned char **source; +{ + extract_number (destination, *source); + *source += 2; +} + +#ifndef EXTRACT_MACROS +#undef EXTRACT_NUMBER_AND_INCR +#define EXTRACT_NUMBER_AND_INCR(dest, src) \ + extract_number_and_incr (&dest, &src) +#endif /* not EXTRACT_MACROS */ + +#endif /* DEBUG */ + +/* If DEBUG is defined, Regex prints many voluminous messages about what + it is doing (if the variable `debug' is nonzero). If linked with the + main program in `iregex.c', you can enter patterns and strings + interactively. And if linked with the main program in `main.c' and + the other test files, you can run the already-written tests. */ + +#ifdef DEBUG + +/* We use standard I/O for debugging. */ +#include + +/* It is useful to test things that ``must'' be true when debugging. */ +#include + +static int debug = 0; + +#define DEBUG_STATEMENT(e) e +#define DEBUG_PRINT1(x) if (debug) printf (x) +#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) +#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) +#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) +#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ + if (debug) print_partial_compiled_pattern (s, e) +#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ + if (debug) print_double_string (w, s1, sz1, s2, sz2) + + +extern void printchar (); + +/* Print the fastmap in human-readable form. */ + +void +print_fastmap (fastmap) + char *fastmap; +{ + unsigned was_a_range = 0; + unsigned i = 0; + + while (i < (1 << BYTEWIDTH)) + { + if (fastmap[i++]) + { + was_a_range = 0; + printchar (i - 1); + while (i < (1 << BYTEWIDTH) && fastmap[i]) + { + was_a_range = 1; + i++; + } + if (was_a_range) + { + printf ("-"); + printchar (i - 1); + } + } + } + putchar ('\n'); +} + + +/* Print a compiled pattern string in human-readable form, starting at + the START pointer into it and ending just before the pointer END. */ + +void +print_partial_compiled_pattern (start, end) + unsigned char *start; + unsigned char *end; +{ + int mcnt, mcnt2; + unsigned char *p = start; + unsigned char *pend = end; + + if (start == NULL) + { + printf ("(null)\n"); + return; + } + + /* Loop over pattern commands. */ + while (p < pend) + { + switch ((re_opcode_t) *p++) + { + case no_op: + printf ("/no_op"); + break; + + case exactn: + mcnt = *p++; + printf ("/exactn/%d", mcnt); + do + { + putchar ('/'); + printchar (*p++); + } + while (--mcnt); + break; + + case start_memory: + mcnt = *p++; + printf ("/start_memory/%d/%d", mcnt, *p++); + break; + + case stop_memory: + mcnt = *p++; + printf ("/stop_memory/%d/%d", mcnt, *p++); + break; + + case duplicate: + printf ("/duplicate/%d", *p++); + break; + + case anychar: + printf ("/anychar"); + break; + + case charset: + case charset_not: + { + register int c; + + printf ("/charset%s", + (re_opcode_t) *(p - 1) == charset_not ? "_not" : ""); + + assert (p + *p < pend); + + for (c = 0; c < *p; c++) + { + unsigned bit; + unsigned char map_byte = p[1 + c]; + + putchar ('/'); + + for (bit = 0; bit < BYTEWIDTH; bit++) + if (map_byte & (1 << bit)) + printchar (c * BYTEWIDTH + bit); + } + p += 1 + *p; + break; + } + + case begline: + printf ("/begline"); + break; + + case endline: + printf ("/endline"); + break; + + case on_failure_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/on_failure_jump/0/%d", mcnt); + break; + + case on_failure_keep_string_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/on_failure_keep_string_jump/0/%d", mcnt); + break; + + case dummy_failure_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/dummy_failure_jump/0/%d", mcnt); + break; + + case push_dummy_failure: + printf ("/push_dummy_failure"); + break; + + case maybe_pop_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/maybe_pop_jump/0/%d", mcnt); + break; + + case pop_failure_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/pop_failure_jump/0/%d", mcnt); + break; + + case jump_past_alt: + extract_number_and_incr (&mcnt, &p); + printf ("/jump_past_alt/0/%d", mcnt); + break; + + case jump: + extract_number_and_incr (&mcnt, &p); + printf ("/jump/0/%d", mcnt); + break; + + case succeed_n: + extract_number_and_incr (&mcnt, &p); + extract_number_and_incr (&mcnt2, &p); + printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2); + break; + + case jump_n: + extract_number_and_incr (&mcnt, &p); + extract_number_and_incr (&mcnt2, &p); + printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2); + break; + + case set_number_at: + extract_number_and_incr (&mcnt, &p); + extract_number_and_incr (&mcnt2, &p); + printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2); + break; + + case wordbound: + printf ("/wordbound"); + break; + + case notwordbound: + printf ("/notwordbound"); + break; + + case wordbeg: + printf ("/wordbeg"); + break; + + case wordend: + printf ("/wordend"); + +#ifdef emacs + case before_dot: + printf ("/before_dot"); + break; + + case at_dot: + printf ("/at_dot"); + break; + + case after_dot: + printf ("/after_dot"); + break; + + case syntaxspec: + printf ("/syntaxspec"); + mcnt = *p++; + printf ("/%d", mcnt); + break; + + case notsyntaxspec: + printf ("/notsyntaxspec"); + mcnt = *p++; + printf ("/%d", mcnt); + break; +#endif /* emacs */ + + case wordchar: + printf ("/wordchar"); + break; + + case notwordchar: + printf ("/notwordchar"); + break; + + case begbuf: + printf ("/begbuf"); + break; + + case endbuf: + printf ("/endbuf"); + break; + + default: + printf ("?%d", *(p-1)); + } + } + printf ("/\n"); +} + + +void +print_compiled_pattern (bufp) + struct re_pattern_buffer *bufp; +{ + unsigned char *buffer = bufp->buffer; + + print_partial_compiled_pattern (buffer, buffer + bufp->used); + printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated); + + if (bufp->fastmap_accurate && bufp->fastmap) + { + printf ("fastmap: "); + print_fastmap (bufp->fastmap); + } + + printf ("re_nsub: %d\t", bufp->re_nsub); + printf ("regs_alloc: %d\t", bufp->regs_allocated); + printf ("can_be_null: %d\t", bufp->can_be_null); + printf ("newline_anchor: %d\n", bufp->newline_anchor); + printf ("no_sub: %d\t", bufp->no_sub); + printf ("not_bol: %d\t", bufp->not_bol); + printf ("not_eol: %d\t", bufp->not_eol); + printf ("syntax: %d\n", bufp->syntax); + /* Perhaps we should print the translate table? */ +} + + +void +print_double_string (where, string1, size1, string2, size2) + const char *where; + const char *string1; + const char *string2; + int size1; + int size2; +{ + unsigned this_char; + + if (where == NULL) + printf ("(null)"); + else + { + if (FIRST_STRING_P (where)) + { + for (this_char = where - string1; this_char < size1; this_char++) + printchar (string1[this_char]); + + where = string2; + } + + for (this_char = where - string2; this_char < size2; this_char++) + printchar (string2[this_char]); + } +} + +#else /* not DEBUG */ + +#undef assert +#define assert(e) + +#define DEBUG_STATEMENT(e) +#define DEBUG_PRINT1(x) +#define DEBUG_PRINT2(x1, x2) +#define DEBUG_PRINT3(x1, x2, x3) +#define DEBUG_PRINT4(x1, x2, x3, x4) +#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) +#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) + +#endif /* not DEBUG */ + +/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can + also be assigned to arbitrarily: each pattern buffer stores its own + syntax, so it can be changed between regex compilations. */ +reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS; + + +/* Specify the precise syntax of regexps for compilation. This provides + for compatibility for various utilities which historically have + different, incompatible syntaxes. + + The argument SYNTAX is a bit mask comprised of the various bits + defined in regex.h. We return the old syntax. */ + +reg_syntax_t +re_set_syntax (syntax) + reg_syntax_t syntax; +{ + reg_syntax_t ret = re_syntax_options; + + re_syntax_options = syntax; + return ret; +} + +/* This table gives an error message for each of the error codes listed + in regex.h. Obviously the order here has to be same as there. */ + +static const char *re_error_msg[] = + { NULL, /* REG_NOERROR */ + "No match", /* REG_NOMATCH */ + "Invalid regular expression", /* REG_BADPAT */ + "Invalid collation character", /* REG_ECOLLATE */ + "Invalid character class name", /* REG_ECTYPE */ + "Trailing backslash", /* REG_EESCAPE */ + "Invalid back reference", /* REG_ESUBREG */ + "Unmatched [ or [^", /* REG_EBRACK */ + "Unmatched ( or \\(", /* REG_EPAREN */ + "Unmatched \\{", /* REG_EBRACE */ + "Invalid content of \\{\\}", /* REG_BADBR */ + "Invalid range end", /* REG_ERANGE */ + "Memory exhausted", /* REG_ESPACE */ + "Invalid preceding regular expression", /* REG_BADRPT */ + "Premature end of regular expression", /* REG_EEND */ + "Regular expression too big", /* REG_ESIZE */ + "Unmatched ) or \\)", /* REG_ERPAREN */ + }; + +/* Subroutine declarations and macros for regex_compile. */ + +static void store_op1 (), store_op2 (); +static void insert_op1 (), insert_op2 (); +static boolean at_begline_loc_p (), at_endline_loc_p (); +static boolean group_in_compile_stack (); +static reg_errcode_t compile_range (); + +/* Fetch the next character in the uncompiled pattern---translating it + if necessary. Also cast from a signed character in the constant + string passed to us by the user to an unsigned char that we can use + as an array index (in, e.g., `translate'). */ +#define PATFETCH(c) \ + do {if (p == pend) return REG_EEND; \ + c = (unsigned char) *p++; \ + if (translate) c = translate[c]; \ + } while (0) + +/* Fetch the next character in the uncompiled pattern, with no + translation. */ +#define PATFETCH_RAW(c) \ + do {if (p == pend) return REG_EEND; \ + c = (unsigned char) *p++; \ + } while (0) + +/* Go backwards one character in the pattern. */ +#define PATUNFETCH p-- + + +/* If `translate' is non-null, return translate[D], else just D. We + cast the subscript to translate because some data is declared as + `char *', to avoid warnings when a string constant is passed. But + when we use a character as a subscript we must make it unsigned. */ +#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d)) + + +/* Macros for outputting the compiled pattern into `buffer'. */ + +/* If the buffer isn't allocated when it comes in, use this. */ +#define INIT_BUF_SIZE 32 + +/* Make sure we have at least N more bytes of space in buffer. */ +#define GET_BUFFER_SPACE(n) \ + while (b - bufp->buffer + (n) > bufp->allocated) \ + EXTEND_BUFFER () + +/* Make sure we have one more byte of buffer space and then add C to it. */ +#define BUF_PUSH(c) \ + do { \ + GET_BUFFER_SPACE (1); \ + *b++ = (unsigned char) (c); \ + } while (0) + + +/* Ensure we have two more bytes of buffer space and then append C1 and C2. */ +#define BUF_PUSH_2(c1, c2) \ + do { \ + GET_BUFFER_SPACE (2); \ + *b++ = (unsigned char) (c1); \ + *b++ = (unsigned char) (c2); \ + } while (0) + + +/* As with BUF_PUSH_2, except for three bytes. */ +#define BUF_PUSH_3(c1, c2, c3) \ + do { \ + GET_BUFFER_SPACE (3); \ + *b++ = (unsigned char) (c1); \ + *b++ = (unsigned char) (c2); \ + *b++ = (unsigned char) (c3); \ + } while (0) + + +/* Store a jump with opcode OP at LOC to location TO. We store a + relative address offset by the three bytes the jump itself occupies. */ +#define STORE_JUMP(op, loc, to) \ + store_op1 (op, loc, (to) - (loc) - 3) + +/* Likewise, for a two-argument jump. */ +#define STORE_JUMP2(op, loc, to, arg) \ + store_op2 (op, loc, (to) - (loc) - 3, arg) + +/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ +#define INSERT_JUMP(op, loc, to) \ + insert_op1 (op, loc, (to) - (loc) - 3, b) + +/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ +#define INSERT_JUMP2(op, loc, to, arg) \ + insert_op2 (op, loc, (to) - (loc) - 3, arg, b) + + +/* This is not an arbitrary limit: the arguments which represent offsets + into the pattern are two bytes long. So if 2^16 bytes turns out to + be too small, many things would have to change. */ +#define MAX_BUF_SIZE (1L << 16) + + +/* Extend the buffer by twice its current size via realloc and + reset the pointers that pointed into the old block to point to the + correct places in the new one. If extending the buffer results in it + being larger than MAX_BUF_SIZE, then flag memory exhausted. */ +#define EXTEND_BUFFER() \ + do { \ + unsigned char *old_buffer = bufp->buffer; \ + if (bufp->allocated == MAX_BUF_SIZE) \ + return REG_ESIZE; \ + bufp->allocated <<= 1; \ + if (bufp->allocated > MAX_BUF_SIZE) \ + bufp->allocated = MAX_BUF_SIZE; \ + bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\ + if (bufp->buffer == NULL) \ + return REG_ESPACE; \ + /* If the buffer moved, move all the pointers into it. */ \ + if (old_buffer != bufp->buffer) \ + { \ + b = (b - old_buffer) + bufp->buffer; \ + begalt = (begalt - old_buffer) + bufp->buffer; \ + if (fixup_alt_jump) \ + fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ + if (laststart) \ + laststart = (laststart - old_buffer) + bufp->buffer; \ + if (pending_exact) \ + pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ + } \ + } while (0) + + +/* Since we have one byte reserved for the register number argument to + {start,stop}_memory, the maximum number of groups we can report + things about is what fits in that byte. */ +#define MAX_REGNUM 255 + +/* But patterns can have more than `MAX_REGNUM' registers. We just + ignore the excess. */ +typedef unsigned regnum_t; + + +/* Macros for the compile stack. */ + +/* Since offsets can go either forwards or backwards, this type needs to + be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ +typedef int pattern_offset_t; + +typedef struct +{ + pattern_offset_t begalt_offset; + pattern_offset_t fixup_alt_jump; + pattern_offset_t inner_group_offset; + pattern_offset_t laststart_offset; + regnum_t regnum; +} compile_stack_elt_t; + + +typedef struct +{ + compile_stack_elt_t *stack; + unsigned size; + unsigned avail; /* Offset of next open position. */ +} compile_stack_type; + + +#define INIT_COMPILE_STACK_SIZE 32 + +#define COMPILE_STACK_EMPTY (compile_stack.avail == 0) +#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) + +/* The next available element. */ +#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) + + +/* Set the bit for character C in a list. */ +#define SET_LIST_BIT(c) \ + (b[((unsigned char) (c)) / BYTEWIDTH] \ + |= 1 << (((unsigned char) c) % BYTEWIDTH)) + + +/* Get the next unsigned number in the uncompiled pattern. */ +#define GET_UNSIGNED_NUMBER(num) \ + { if (p != pend) \ + { \ + PATFETCH (c); \ + while (ISDIGIT (c)) \ + { \ + if (num < 0) \ + num = 0; \ + num = num * 10 + c - '0'; \ + if (p == pend) \ + break; \ + PATFETCH (c); \ + } \ + } \ + } + +#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ + +#define IS_CHAR_CLASS(string) \ + (STREQ (string, "alpha") || STREQ (string, "upper") \ + || STREQ (string, "lower") || STREQ (string, "digit") \ + || STREQ (string, "alnum") || STREQ (string, "xdigit") \ + || STREQ (string, "space") || STREQ (string, "print") \ + || STREQ (string, "punct") || STREQ (string, "graph") \ + || STREQ (string, "cntrl") || STREQ (string, "blank")) + +/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. + Returns one of error codes defined in `regex.h', or zero for success. + + Assumes the `allocated' (and perhaps `buffer') and `translate' + fields are set in BUFP on entry. + + If it succeeds, results are put in BUFP (if it returns an error, the + contents of BUFP are undefined): + `buffer' is the compiled pattern; + `syntax' is set to SYNTAX; + `used' is set to the length of the compiled pattern; + `fastmap_accurate' is zero; + `re_nsub' is the number of subexpressions in PATTERN; + `not_bol' and `not_eol' are zero; + + The `fastmap' and `newline_anchor' fields are neither + examined nor set. */ + +static reg_errcode_t +regex_compile (pattern, size, syntax, bufp) + const char *pattern; + int size; + reg_syntax_t syntax; + struct re_pattern_buffer *bufp; +{ + /* We fetch characters from PATTERN here. Even though PATTERN is + `char *' (i.e., signed), we declare these variables as unsigned, so + they can be reliably used as array indices. */ + register unsigned char c, c1; + + /* A random tempory spot in PATTERN. */ + const char *p1; + + /* Points to the end of the buffer, where we should append. */ + register unsigned char *b; + + /* Keeps track of unclosed groups. */ + compile_stack_type compile_stack; + + /* Points to the current (ending) position in the pattern. */ + const char *p = pattern; + const char *pend = pattern + size; + + /* How to translate the characters in the pattern. */ + char *translate = bufp->translate; + + /* Address of the count-byte of the most recently inserted `exactn' + command. This makes it possible to tell if a new exact-match + character can be added to that command or if the character requires + a new `exactn' command. */ + unsigned char *pending_exact = 0; + + /* Address of start of the most recently finished expression. + This tells, e.g., postfix * where to find the start of its + operand. Reset at the beginning of groups and alternatives. */ + unsigned char *laststart = 0; + + /* Address of beginning of regexp, or inside of last group. */ + unsigned char *begalt; + + /* Place in the uncompiled pattern (i.e., the {) to + which to go back if the interval is invalid. */ + const char *beg_interval; + + /* Address of the place where a forward jump should go to the end of + the containing expression. Each alternative of an `or' -- except the + last -- ends with a forward jump of this sort. */ + unsigned char *fixup_alt_jump = 0; + + /* Counts open-groups as they are encountered. Remembered for the + matching close-group on the compile stack, so the same register + number is put in the stop_memory as the start_memory. */ + regnum_t regnum = 0; + +#ifdef DEBUG + DEBUG_PRINT1 ("\nCompiling pattern: "); + if (debug) + { + unsigned debug_count; + + for (debug_count = 0; debug_count < size; debug_count++) + printchar (pattern[debug_count]); + putchar ('\n'); + } +#endif /* DEBUG */ + + /* Initialize the compile stack. */ + compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); + if (compile_stack.stack == NULL) + return REG_ESPACE; + + compile_stack.size = INIT_COMPILE_STACK_SIZE; + compile_stack.avail = 0; + + /* Initialize the pattern buffer. */ + bufp->syntax = syntax; + bufp->fastmap_accurate = 0; + bufp->not_bol = bufp->not_eol = 0; + + /* Set `used' to zero, so that if we return an error, the pattern + printer (for debugging) will think there's no pattern. We reset it + at the end. */ + bufp->used = 0; + + /* Always count groups, whether or not bufp->no_sub is set. */ + bufp->re_nsub = 0; + +#if !defined (emacs) && !defined (SYNTAX_TABLE) + /* Initialize the syntax table. */ + init_syntax_once (); +#endif + + if (bufp->allocated == 0) + { + if (bufp->buffer) + { /* If zero allocated, but buffer is non-null, try to realloc + enough space. This loses if buffer's address is bogus, but + that is the user's responsibility. */ + RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); + } + else + { /* Caller did not allocate a buffer. Do it for them. */ + bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); + } + if (!bufp->buffer) return REG_ESPACE; + + bufp->allocated = INIT_BUF_SIZE; + } + + begalt = b = bufp->buffer; + + /* Loop through the uncompiled pattern until we're at the end. */ + while (p != pend) + { + PATFETCH (c); + + switch (c) + { + case '^': + { + if ( /* If at start of pattern, it's an operator. */ + p == pattern + 1 + /* If context independent, it's an operator. */ + || syntax & RE_CONTEXT_INDEP_ANCHORS + /* Otherwise, depends on what's come before. */ + || at_begline_loc_p (pattern, p, syntax)) + BUF_PUSH (begline); + else + goto normal_char; + } + break; + + + case '$': + { + if ( /* If at end of pattern, it's an operator. */ + p == pend + /* If context independent, it's an operator. */ + || syntax & RE_CONTEXT_INDEP_ANCHORS + /* Otherwise, depends on what's next. */ + || at_endline_loc_p (p, pend, syntax)) + BUF_PUSH (endline); + else + goto normal_char; + } + break; + + + case '+': + case '?': + if ((syntax & RE_BK_PLUS_QM) + || (syntax & RE_LIMITED_OPS)) + goto normal_char; + handle_plus: + case '*': + /* If there is no previous pattern... */ + if (!laststart) + { + if (syntax & RE_CONTEXT_INVALID_OPS) + return REG_BADRPT; + else if (!(syntax & RE_CONTEXT_INDEP_OPS)) + goto normal_char; + } + + { + /* Are we optimizing this jump? */ + boolean keep_string_p = false; + + /* 1 means zero (many) matches is allowed. */ + char zero_times_ok = 0, many_times_ok = 0; + + /* If there is a sequence of repetition chars, collapse it + down to just one (the right one). We can't combine + interval operators with these because of, e.g., `a{2}*', + which should only match an even number of `a's. */ + + for (;;) + { + zero_times_ok |= c != '+'; + many_times_ok |= c != '?'; + + if (p == pend) + break; + + PATFETCH (c); + + if (c == '*' + || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) + ; + + else if (syntax & RE_BK_PLUS_QM && c == '\\') + { + if (p == pend) return REG_EESCAPE; + + PATFETCH (c1); + if (!(c1 == '+' || c1 == '?')) + { + PATUNFETCH; + PATUNFETCH; + break; + } + + c = c1; + } + else + { + PATUNFETCH; + break; + } + + /* If we get here, we found another repeat character. */ + } + + /* Star, etc. applied to an empty pattern is equivalent + to an empty pattern. */ + if (!laststart) + break; + + /* Now we know whether or not zero matches is allowed + and also whether or not two or more matches is allowed. */ + if (many_times_ok) + { /* More than one repetition is allowed, so put in at the + end a backward relative jump from `b' to before the next + jump we're going to put in below (which jumps from + laststart to after this jump). + + But if we are at the `*' in the exact sequence `.*\n', + insert an unconditional jump backwards to the ., + instead of the beginning of the loop. This way we only + push a failure point once, instead of every time + through the loop. */ + assert (p - 1 > pattern); + + /* Allocate the space for the jump. */ + GET_BUFFER_SPACE (3); + + /* We know we are not at the first character of the pattern, + because laststart was nonzero. And we've already + incremented `p', by the way, to be the character after + the `*'. Do we have to do something analogous here + for null bytes, because of RE_DOT_NOT_NULL? */ + if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') + && zero_times_ok + && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') + && !(syntax & RE_DOT_NEWLINE)) + { /* We have .*\n. */ + STORE_JUMP (jump, b, laststart); + keep_string_p = true; + } + else + /* Anything else. */ + STORE_JUMP (maybe_pop_jump, b, laststart - 3); + + /* We've added more stuff to the buffer. */ + b += 3; + } + + /* On failure, jump from laststart to b + 3, which will be the + end of the buffer after this jump is inserted. */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump + : on_failure_jump, + laststart, b + 3); + pending_exact = 0; + b += 3; + + if (!zero_times_ok) + { + /* At least one repetition is required, so insert a + `dummy_failure_jump' before the initial + `on_failure_jump' instruction of the loop. This + effects a skip over that instruction the first time + we hit that loop. */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); + b += 3; + } + } + break; + + + case '.': + laststart = b; + BUF_PUSH (anychar); + break; + + + case '[': + { + boolean had_char_class = false; + + if (p == pend) return REG_EBRACK; + + /* Ensure that we have enough space to push a charset: the + opcode, the length count, and the bitset; 34 bytes in all. */ + GET_BUFFER_SPACE (34); + + laststart = b; + + /* We test `*p == '^' twice, instead of using an if + statement, so we only need one BUF_PUSH. */ + BUF_PUSH (*p == '^' ? charset_not : charset); + if (*p == '^') + p++; + + /* Remember the first position in the bracket expression. */ + p1 = p; + + /* Push the number of bytes in the bitmap. */ + BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); + + /* Clear the whole map. */ + bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); + + /* charset_not matches newline according to a syntax bit. */ + if ((re_opcode_t) b[-2] == charset_not + && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) + SET_LIST_BIT ('\n'); + + /* Read in characters and ranges, setting map bits. */ + for (;;) + { + if (p == pend) return REG_EBRACK; + + PATFETCH (c); + + /* \ might escape characters inside [...] and [^...]. */ + if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') + { + if (p == pend) return REG_EESCAPE; + + PATFETCH (c1); + SET_LIST_BIT (c1); + continue; + } + + /* Could be the end of the bracket expression. If it's + not (i.e., when the bracket expression is `[]' so + far), the ']' character bit gets set way below. */ + if (c == ']' && p != p1 + 1) + break; + + /* Look ahead to see if it's a range when the last thing + was a character class. */ + if (had_char_class && c == '-' && *p != ']') + return REG_ERANGE; + + /* Look ahead to see if it's a range when the last thing + was a character: if this is a hyphen not at the + beginning or the end of a list, then it's the range + operator. */ + if (c == '-' + && !(p - 2 >= pattern && p[-2] == '[') + && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') + && *p != ']') + { + reg_errcode_t ret + = compile_range (&p, pend, translate, syntax, b); + if (ret != REG_NOERROR) return ret; + } + + else if (p[0] == '-' && p[1] != ']') + { /* This handles ranges made up of characters only. */ + reg_errcode_t ret; + + /* Move past the `-'. */ + PATFETCH (c1); + + ret = compile_range (&p, pend, translate, syntax, b); + if (ret != REG_NOERROR) return ret; + } + + /* See if we're at the beginning of a possible character + class. */ + + else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') + { /* Leave room for the null. */ + char str[CHAR_CLASS_MAX_LENGTH + 1]; + + PATFETCH (c); + c1 = 0; + + /* If pattern is `[[:'. */ + if (p == pend) return REG_EBRACK; + + for (;;) + { + PATFETCH (c); + if (c == ':' || c == ']' || p == pend + || c1 == CHAR_CLASS_MAX_LENGTH) + break; + str[c1++] = c; + } + str[c1] = '\0'; + + /* If isn't a word bracketed by `[:' and:`]': + undo the ending character, the letters, and leave + the leading `:' and `[' (but set bits for them). */ + if (c == ':' && *p == ']') + { + int ch; + boolean is_alnum = STREQ (str, "alnum"); + boolean is_alpha = STREQ (str, "alpha"); + boolean is_blank = STREQ (str, "blank"); + boolean is_cntrl = STREQ (str, "cntrl"); + boolean is_digit = STREQ (str, "digit"); + boolean is_graph = STREQ (str, "graph"); + boolean is_lower = STREQ (str, "lower"); + boolean is_print = STREQ (str, "print"); + boolean is_punct = STREQ (str, "punct"); + boolean is_space = STREQ (str, "space"); + boolean is_upper = STREQ (str, "upper"); + boolean is_xdigit = STREQ (str, "xdigit"); + + if (!IS_CHAR_CLASS (str)) return REG_ECTYPE; + + /* Throw away the ] at the end of the character + class. */ + PATFETCH (c); + + if (p == pend) return REG_EBRACK; + + for (ch = 0; ch < 1 << BYTEWIDTH; ch++) + { + if ( (is_alnum && ISALNUM (ch)) + || (is_alpha && ISALPHA (ch)) + || (is_blank && ISBLANK (ch)) + || (is_cntrl && ISCNTRL (ch)) + || (is_digit && ISDIGIT (ch)) + || (is_graph && ISGRAPH (ch)) + || (is_lower && ISLOWER (ch)) + || (is_print && ISPRINT (ch)) + || (is_punct && ISPUNCT (ch)) + || (is_space && ISSPACE (ch)) + || (is_upper && ISUPPER (ch)) + || (is_xdigit && ISXDIGIT (ch))) + SET_LIST_BIT (ch); + } + had_char_class = true; + } + else + { + c1++; + while (c1--) + PATUNFETCH; + SET_LIST_BIT ('['); + SET_LIST_BIT (':'); + had_char_class = false; + } + } + else + { + had_char_class = false; + SET_LIST_BIT (c); + } + } + + /* Discard any (non)matching list bytes that are all 0 at the + end of the map. Decrease the map-length byte too. */ + while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) + b[-1]--; + b += b[-1]; + } + break; + + + case '(': + if (syntax & RE_NO_BK_PARENS) + goto handle_open; + else + goto normal_char; + + + case ')': + if (syntax & RE_NO_BK_PARENS) + goto handle_close; + else + goto normal_char; + + + case '\n': + if (syntax & RE_NEWLINE_ALT) + goto handle_alt; + else + goto normal_char; + + + case '|': + if (syntax & RE_NO_BK_VBAR) + goto handle_alt; + else + goto normal_char; + + + case '{': + if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) + goto handle_interval; + else + goto normal_char; + + + case '\\': + if (p == pend) return REG_EESCAPE; + + /* Do not translate the character after the \, so that we can + distinguish, e.g., \B from \b, even if we normally would + translate, e.g., B to b. */ + PATFETCH_RAW (c); + + switch (c) + { + case '(': + if (syntax & RE_NO_BK_PARENS) + goto normal_backslash; + + handle_open: + bufp->re_nsub++; + regnum++; + + if (COMPILE_STACK_FULL) + { + RETALLOC (compile_stack.stack, compile_stack.size << 1, + compile_stack_elt_t); + if (compile_stack.stack == NULL) return REG_ESPACE; + + compile_stack.size <<= 1; + } + + /* These are the values to restore when we hit end of this + group. They are all relative offsets, so that if the + whole pattern moves because of realloc, they will still + be valid. */ + COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; + COMPILE_STACK_TOP.fixup_alt_jump + = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; + COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; + COMPILE_STACK_TOP.regnum = regnum; + + /* We will eventually replace the 0 with the number of + groups inner to this one. But do not push a + start_memory for groups beyond the last one we can + represent in the compiled pattern. */ + if (regnum <= MAX_REGNUM) + { + COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; + BUF_PUSH_3 (start_memory, regnum, 0); + } + + compile_stack.avail++; + + fixup_alt_jump = 0; + laststart = 0; + begalt = b; + /* If we've reached MAX_REGNUM groups, then this open + won't actually generate any code, so we'll have to + clear pending_exact explicitly. */ + pending_exact = 0; + break; + + + case ')': + if (syntax & RE_NO_BK_PARENS) goto normal_backslash; + + if (COMPILE_STACK_EMPTY) + { + if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) + goto normal_backslash; + else + return REG_ERPAREN; + } + + handle_close: + if (fixup_alt_jump) + { /* Push a dummy failure point at the end of the + alternative for a possible future + `pop_failure_jump' to pop. See comments at + `push_dummy_failure' in `re_match_2'. */ + BUF_PUSH (push_dummy_failure); + + /* We allocated space for this jump when we assigned + to `fixup_alt_jump', in the `handle_alt' case below. */ + STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); + } + + /* See similar code for backslashed left paren above. */ + if (COMPILE_STACK_EMPTY) + { + if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) + goto normal_char; + else + return REG_ERPAREN; + } + + /* Since we just checked for an empty stack above, this + ``can't happen''. */ + assert (compile_stack.avail != 0); + { + /* We don't just want to restore into `regnum', because + later groups should continue to be numbered higher, + as in `(ab)c(de)' -- the second group is #2. */ + regnum_t this_group_regnum; + + compile_stack.avail--; + begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; + fixup_alt_jump + = COMPILE_STACK_TOP.fixup_alt_jump + ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 + : 0; + laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; + this_group_regnum = COMPILE_STACK_TOP.regnum; + /* If we've reached MAX_REGNUM groups, then this open + won't actually generate any code, so we'll have to + clear pending_exact explicitly. */ + pending_exact = 0; + + /* We're at the end of the group, so now we know how many + groups were inside this one. */ + if (this_group_regnum <= MAX_REGNUM) + { + unsigned char *inner_group_loc + = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; + + *inner_group_loc = regnum - this_group_regnum; + BUF_PUSH_3 (stop_memory, this_group_regnum, + regnum - this_group_regnum); + } + } + break; + + + case '|': /* `\|'. */ + if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) + goto normal_backslash; + handle_alt: + if (syntax & RE_LIMITED_OPS) + goto normal_char; + + /* Insert before the previous alternative a jump which + jumps to this alternative if the former fails. */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (on_failure_jump, begalt, b + 6); + pending_exact = 0; + b += 3; + + /* The alternative before this one has a jump after it + which gets executed if it gets matched. Adjust that + jump so it will jump to this alternative's analogous + jump (put in below, which in turn will jump to the next + (if any) alternative's such jump, etc.). The last such + jump jumps to the correct final destination. A picture: + _____ _____ + | | | | + | v | v + a | b | c + + If we are at `b', then fixup_alt_jump right now points to a + three-byte space after `a'. We'll put in the jump, set + fixup_alt_jump to right after `b', and leave behind three + bytes which we'll fill in when we get to after `c'. */ + + if (fixup_alt_jump) + STORE_JUMP (jump_past_alt, fixup_alt_jump, b); + + /* Mark and leave space for a jump after this alternative, + to be filled in later either by next alternative or + when know we're at the end of a series of alternatives. */ + fixup_alt_jump = b; + GET_BUFFER_SPACE (3); + b += 3; + + laststart = 0; + begalt = b; + break; + + + case '{': + /* If \{ is a literal. */ + if (!(syntax & RE_INTERVALS) + /* If we're at `\{' and it's not the open-interval + operator. */ + || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + || (p - 2 == pattern && p == pend)) + goto normal_backslash; + + handle_interval: + { + /* If got here, then the syntax allows intervals. */ + + /* At least (most) this many matches must be made. */ + int lower_bound = -1, upper_bound = -1; + + beg_interval = p - 1; + + if (p == pend) + { + if (syntax & RE_NO_BK_BRACES) + goto unfetch_interval; + else + return REG_EBRACE; + } + + GET_UNSIGNED_NUMBER (lower_bound); + + if (c == ',') + { + GET_UNSIGNED_NUMBER (upper_bound); + if (upper_bound < 0) upper_bound = RE_DUP_MAX; + } + else + /* Interval such as `{1}' => match exactly once. */ + upper_bound = lower_bound; + + if (lower_bound < 0 || upper_bound > RE_DUP_MAX + || lower_bound > upper_bound) + { + if (syntax & RE_NO_BK_BRACES) + goto unfetch_interval; + else + return REG_BADBR; + } + + if (!(syntax & RE_NO_BK_BRACES)) + { + if (c != '\\') return REG_EBRACE; + + PATFETCH (c); + } + + if (c != '}') + { + if (syntax & RE_NO_BK_BRACES) + goto unfetch_interval; + else + return REG_BADBR; + } + + /* We just parsed a valid interval. */ + + /* If it's invalid to have no preceding re. */ + if (!laststart) + { + if (syntax & RE_CONTEXT_INVALID_OPS) + return REG_BADRPT; + else if (syntax & RE_CONTEXT_INDEP_OPS) + laststart = b; + else + goto unfetch_interval; + } + + /* If the upper bound is zero, don't want to succeed at + all; jump from `laststart' to `b + 3', which will be + the end of the buffer after we insert the jump. */ + if (upper_bound == 0) + { + GET_BUFFER_SPACE (3); + INSERT_JUMP (jump, laststart, b + 3); + b += 3; + } + + /* Otherwise, we have a nontrivial interval. When + we're all done, the pattern will look like: + set_number_at + set_number_at + succeed_n + + jump_n + (The upper bound and `jump_n' are omitted if + `upper_bound' is 1, though.) */ + else + { /* If the upper bound is > 1, we need to insert + more at the end of the loop. */ + unsigned nbytes = 10 + (upper_bound > 1) * 10; + + GET_BUFFER_SPACE (nbytes); + + /* Initialize lower bound of the `succeed_n', even + though it will be set during matching by its + attendant `set_number_at' (inserted next), + because `re_compile_fastmap' needs to know. + Jump to the `jump_n' we might insert below. */ + INSERT_JUMP2 (succeed_n, laststart, + b + 5 + (upper_bound > 1) * 5, + lower_bound); + b += 5; + + /* Code to initialize the lower bound. Insert + before the `succeed_n'. The `5' is the last two + bytes of this `set_number_at', plus 3 bytes of + the following `succeed_n'. */ + insert_op2 (set_number_at, laststart, 5, lower_bound, b); + b += 5; + + if (upper_bound > 1) + { /* More than one repetition is allowed, so + append a backward jump to the `succeed_n' + that starts this interval. + + When we've reached this during matching, + we'll have matched the interval once, so + jump back only `upper_bound - 1' times. */ + STORE_JUMP2 (jump_n, b, laststart + 5, + upper_bound - 1); + b += 5; + + /* The location we want to set is the second + parameter of the `jump_n'; that is `b-2' as + an absolute address. `laststart' will be + the `set_number_at' we're about to insert; + `laststart+3' the number to set, the source + for the relative address. But we are + inserting into the middle of the pattern -- + so everything is getting moved up by 5. + Conclusion: (b - 2) - (laststart + 3) + 5, + i.e., b - laststart. + + We insert this at the beginning of the loop + so that if we fail during matching, we'll + reinitialize the bounds. */ + insert_op2 (set_number_at, laststart, b - laststart, + upper_bound - 1, b); + b += 5; + } + } + pending_exact = 0; + beg_interval = NULL; + } + break; + + unfetch_interval: + /* If an invalid interval, match the characters as literals. */ + assert (beg_interval); + p = beg_interval; + beg_interval = NULL; + + /* normal_char and normal_backslash need `c'. */ + PATFETCH (c); + + if (!(syntax & RE_NO_BK_BRACES)) + { + if (p > pattern && p[-1] == '\\') + goto normal_backslash; + } + goto normal_char; + +#ifdef emacs + /* There is no way to specify the before_dot and after_dot + operators. rms says this is ok. --karl */ + case '=': + BUF_PUSH (at_dot); + break; + + case 's': + laststart = b; + PATFETCH (c); + BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); + break; + + case 'S': + laststart = b; + PATFETCH (c); + BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); + break; +#endif /* emacs */ + + + case 'w': + laststart = b; + BUF_PUSH (wordchar); + break; + + + case 'W': + laststart = b; + BUF_PUSH (notwordchar); + break; + + + case '<': + BUF_PUSH (wordbeg); + break; + + case '>': + BUF_PUSH (wordend); + break; + + case 'b': + BUF_PUSH (wordbound); + break; + + case 'B': + BUF_PUSH (notwordbound); + break; + + case '`': + BUF_PUSH (begbuf); + break; + + case '\'': + BUF_PUSH (endbuf); + break; + + case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + if (syntax & RE_NO_BK_REFS) + goto normal_char; + + c1 = c - '0'; + + if (c1 > regnum) + return REG_ESUBREG; + + /* Can't back reference to a subexpression if inside of it. */ + if (group_in_compile_stack (compile_stack, c1)) + goto normal_char; + + laststart = b; + BUF_PUSH_2 (duplicate, c1); + break; + + + case '+': + case '?': + if (syntax & RE_BK_PLUS_QM) + goto handle_plus; + else + goto normal_backslash; + + default: + normal_backslash: + /* You might think it would be useful for \ to mean + not to translate; but if we don't translate it + it will never match anything. */ + c = TRANSLATE (c); + goto normal_char; + } + break; + + + default: + /* Expects the character in `c'. */ + normal_char: + /* If no exactn currently being built. */ + if (!pending_exact + + /* If last exactn not at current position. */ + || pending_exact + *pending_exact + 1 != b + + /* We have only one byte following the exactn for the count. */ + || *pending_exact == (1 << BYTEWIDTH) - 1 + + /* If followed by a repetition operator. */ + || *p == '*' || *p == '^' + || ((syntax & RE_BK_PLUS_QM) + ? *p == '\\' && (p[1] == '+' || p[1] == '?') + : (*p == '+' || *p == '?')) + || ((syntax & RE_INTERVALS) + && ((syntax & RE_NO_BK_BRACES) + ? *p == '{' + : (p[0] == '\\' && p[1] == '{')))) + { + /* Start building a new exactn. */ + + laststart = b; + + BUF_PUSH_2 (exactn, 0); + pending_exact = b - 1; + } + + BUF_PUSH (c); + (*pending_exact)++; + break; + } /* switch (c) */ + } /* while p != pend */ + + + /* Through the pattern now. */ + + if (fixup_alt_jump) + STORE_JUMP (jump_past_alt, fixup_alt_jump, b); + + if (!COMPILE_STACK_EMPTY) + return REG_EPAREN; + + free (compile_stack.stack); + + /* We have succeeded; set the length of the buffer. */ + bufp->used = b - bufp->buffer; + +#ifdef DEBUG + if (debug) + { + DEBUG_PRINT1 ("\nCompiled pattern: "); + print_compiled_pattern (bufp); + } +#endif /* DEBUG */ + + return REG_NOERROR; +} /* regex_compile */ + +/* Subroutines for `regex_compile'. */ + +/* Store OP at LOC followed by two-byte integer parameter ARG. */ + +static void +store_op1 (op, loc, arg) + re_opcode_t op; + unsigned char *loc; + int arg; +{ + *loc = (unsigned char) op; + STORE_NUMBER (loc + 1, arg); +} + + +/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ + +static void +store_op2 (op, loc, arg1, arg2) + re_opcode_t op; + unsigned char *loc; + int arg1, arg2; +{ + *loc = (unsigned char) op; + STORE_NUMBER (loc + 1, arg1); + STORE_NUMBER (loc + 3, arg2); +} + + +/* Copy the bytes from LOC to END to open up three bytes of space at LOC + for OP followed by two-byte integer parameter ARG. */ + +static void +insert_op1 (op, loc, arg, end) + re_opcode_t op; + unsigned char *loc; + int arg; + unsigned char *end; +{ + register unsigned char *pfrom = end; + register unsigned char *pto = end + 3; + + while (pfrom != loc) + *--pto = *--pfrom; + + store_op1 (op, loc, arg); +} + + +/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ + +static void +insert_op2 (op, loc, arg1, arg2, end) + re_opcode_t op; + unsigned char *loc; + int arg1, arg2; + unsigned char *end; +{ + register unsigned char *pfrom = end; + register unsigned char *pto = end + 5; + + while (pfrom != loc) + *--pto = *--pfrom; + + store_op2 (op, loc, arg1, arg2); +} + + +/* P points to just after a ^ in PATTERN. Return true if that ^ comes + after an alternative or a begin-subexpression. We assume there is at + least one character before the ^. */ + +static boolean +at_begline_loc_p (pattern, p, syntax) + const char *pattern, *p; + reg_syntax_t syntax; +{ + const char *prev = p - 2; + boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; + + return + /* After a subexpression? */ + (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) + /* After an alternative? */ + || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); +} + + +/* The dual of at_begline_loc_p. This one is for $. We assume there is + at least one character after the $, i.e., `P < PEND'. */ + +static boolean +at_endline_loc_p (p, pend, syntax) + const char *p, *pend; + int syntax; +{ + const char *next = p; + boolean next_backslash = *next == '\\'; + const char *next_next = p + 1 < pend ? p + 1 : NULL; + + return + /* Before a subexpression? */ + (syntax & RE_NO_BK_PARENS ? *next == ')' + : next_backslash && next_next && *next_next == ')') + /* Before an alternative? */ + || (syntax & RE_NO_BK_VBAR ? *next == '|' + : next_backslash && next_next && *next_next == '|'); +} + + +/* Returns true if REGNUM is in one of COMPILE_STACK's elements and + false if it's not. */ + +static boolean +group_in_compile_stack (compile_stack, regnum) + compile_stack_type compile_stack; + regnum_t regnum; +{ + int this_element; + + for (this_element = compile_stack.avail - 1; + this_element >= 0; + this_element--) + if (compile_stack.stack[this_element].regnum == regnum) + return true; + + return false; +} + + +/* Read the ending character of a range (in a bracket expression) from the + uncompiled pattern *P_PTR (which ends at PEND). We assume the + starting character is in `P[-2]'. (`P[-1]' is the character `-'.) + Then we set the translation of all bits between the starting and + ending characters (inclusive) in the compiled pattern B. + + Return an error code. + + We use these short variable names so we can use the same macros as + `regex_compile' itself. */ + +static reg_errcode_t +compile_range (p_ptr, pend, translate, syntax, b) + const char **p_ptr, *pend; + char *translate; + reg_syntax_t syntax; + unsigned char *b; +{ + unsigned this_char; + + const char *p = *p_ptr; + int range_start, range_end; + + if (p == pend) + return REG_ERANGE; + + /* Even though the pattern is a signed `char *', we need to fetch + with unsigned char *'s; if the high bit of the pattern character + is set, the range endpoints will be negative if we fetch using a + signed char *. + + We also want to fetch the endpoints without translating them; the + appropriate translation is done in the bit-setting loop below. */ + range_start = ((unsigned char *) p)[-2]; + range_end = ((unsigned char *) p)[0]; + + /* Have to increment the pointer into the pattern string, so the + caller isn't still at the ending character. */ + (*p_ptr)++; + + /* If the start is after the end, the range is empty. */ + if (range_start > range_end) + return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; + + /* Here we see why `this_char' has to be larger than an `unsigned + char' -- the range is inclusive, so if `range_end' == 0xff + (assuming 8-bit characters), we would otherwise go into an infinite + loop, since all characters <= 0xff. */ + for (this_char = range_start; this_char <= range_end; this_char++) + { + SET_LIST_BIT (TRANSLATE (this_char)); + } + + return REG_NOERROR; +} + +/* Failure stack declarations and macros; both re_compile_fastmap and + re_match_2 use a failure stack. These have to be macros because of + REGEX_ALLOCATE. */ + + +/* Number of failure points for which to initially allocate space + when matching. If this number is exceeded, we allocate more + space, so it is not a hard limit. */ +#ifndef INIT_FAILURE_ALLOC +#define INIT_FAILURE_ALLOC 5 +#endif + +/* Roughly the maximum number of failure points on the stack. Would be + exactly that if always used MAX_FAILURE_SPACE each time we failed. + This is a variable only so users of regex can assign to it; we never + change it ourselves. */ +int re_max_failures = 2000; + +typedef const unsigned char *fail_stack_elt_t; + +typedef struct +{ + fail_stack_elt_t *stack; + unsigned size; + unsigned avail; /* Offset of next open position. */ +} fail_stack_type; + +#define FAIL_STACK_EMPTY() (fail_stack.avail == 0) +#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) +#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) +#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail]) + + +/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */ + +#define INIT_FAIL_STACK() \ + do { \ + fail_stack.stack = (fail_stack_elt_t *) \ + REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ + \ + if (fail_stack.stack == NULL) \ + return -2; \ + \ + fail_stack.size = INIT_FAILURE_ALLOC; \ + fail_stack.avail = 0; \ + } while (0) + + +/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. + + Return 1 if succeeds, and 0 if either ran out of memory + allocating space for it or it was already too large. + + REGEX_REALLOCATE requires `destination' be declared. */ + +#define DOUBLE_FAIL_STACK(fail_stack) \ + ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ + ? 0 \ + : ((fail_stack).stack = (fail_stack_elt_t *) \ + REGEX_REALLOCATE ((fail_stack).stack, \ + (fail_stack).size * sizeof (fail_stack_elt_t), \ + ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ + \ + (fail_stack).stack == NULL \ + ? 0 \ + : ((fail_stack).size <<= 1, \ + 1))) + + +/* Push PATTERN_OP on FAIL_STACK. + + Return 1 if was able to do so and 0 if ran out of memory allocating + space to do so. */ +#define PUSH_PATTERN_OP(pattern_op, fail_stack) \ + ((FAIL_STACK_FULL () \ + && !DOUBLE_FAIL_STACK (fail_stack)) \ + ? 0 \ + : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \ + 1)) + +/* This pushes an item onto the failure stack. Must be a four-byte + value. Assumes the variable `fail_stack'. Probably should only + be called from within `PUSH_FAILURE_POINT'. */ +#define PUSH_FAILURE_ITEM(item) \ + fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item + +/* The complement operation. Assumes `fail_stack' is nonempty. */ +#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail] + +/* Used to omit pushing failure point id's when we're not debugging. */ +#ifdef DEBUG +#define DEBUG_PUSH PUSH_FAILURE_ITEM +#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM () +#else +#define DEBUG_PUSH(item) +#define DEBUG_POP(item_addr) +#endif + + +/* Push the information about the state we will need + if we ever fail back to it. + + Requires variables fail_stack, regstart, regend, reg_info, and + num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be + declared. + + Does `return FAILURE_CODE' if runs out of memory. */ + +#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ + do { \ + char *destination; \ + /* Must be int, so when we don't save any registers, the arithmetic \ + of 0 + -1 isn't done as unsigned. */ \ + int this_reg; \ + \ + DEBUG_STATEMENT (failure_id++); \ + DEBUG_STATEMENT (nfailure_points_pushed++); \ + DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ + DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ + DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ + \ + DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ + DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ + \ + /* Ensure we have enough space allocated for what we will push. */ \ + while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ + { \ + if (!DOUBLE_FAIL_STACK (fail_stack)) \ + return failure_code; \ + \ + DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ + (fail_stack).size); \ + DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ + } \ + \ + /* Push the info, starting with the registers. */ \ + DEBUG_PRINT1 ("\n"); \ + \ + for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ + this_reg++) \ + { \ + DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \ + DEBUG_STATEMENT (num_regs_pushed++); \ + \ + DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ + PUSH_FAILURE_ITEM (regstart[this_reg]); \ + \ + DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ + PUSH_FAILURE_ITEM (regend[this_reg]); \ + \ + DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \ + DEBUG_PRINT2 (" match_null=%d", \ + REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ + DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ + DEBUG_PRINT2 (" matched_something=%d", \ + MATCHED_SOMETHING (reg_info[this_reg])); \ + DEBUG_PRINT2 (" ever_matched=%d", \ + EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ + DEBUG_PRINT1 ("\n"); \ + PUSH_FAILURE_ITEM (reg_info[this_reg].word); \ + } \ + \ + DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\ + PUSH_FAILURE_ITEM (lowest_active_reg); \ + \ + DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\ + PUSH_FAILURE_ITEM (highest_active_reg); \ + \ + DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \ + DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ + PUSH_FAILURE_ITEM (pattern_place); \ + \ + DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \ + DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ + size2); \ + DEBUG_PRINT1 ("'\n"); \ + PUSH_FAILURE_ITEM (string_place); \ + \ + DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ + DEBUG_PUSH (failure_id); \ + } while (0) + +/* This is the number of items that are pushed and popped on the stack + for each register. */ +#define NUM_REG_ITEMS 3 + +/* Individual items aside from the registers. */ +#ifdef DEBUG +#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ +#else +#define NUM_NONREG_ITEMS 4 +#endif + +/* We push at most this many items on the stack. */ +#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS) + +/* We actually push this many items. */ +#define NUM_FAILURE_ITEMS \ + ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ + + NUM_NONREG_ITEMS) + +/* How many items can still be added to the stack without overflowing it. */ +#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) + + +/* Pops what PUSH_FAIL_STACK pushes. + + We restore into the parameters, all of which should be lvalues: + STR -- the saved data position. + PAT -- the saved pattern position. + LOW_REG, HIGH_REG -- the highest and lowest active registers. + REGSTART, REGEND -- arrays of string positions. + REG_INFO -- array of information about each subexpression. + + Also assumes the variables `fail_stack' and (if debugging), `bufp', + `pend', `string1', `size1', `string2', and `size2'. */ + +#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ +{ \ + DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \ + int this_reg; \ + const unsigned char *string_temp; \ + \ + assert (!FAIL_STACK_EMPTY ()); \ + \ + /* Remove failure points and point to how many regs pushed. */ \ + DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ + DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ + DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ + \ + assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ + \ + DEBUG_POP (&failure_id); \ + DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ + \ + /* If the saved string location is NULL, it came from an \ + on_failure_keep_string_jump opcode, and we want to throw away the \ + saved NULL, thus retaining our current position in the string. */ \ + string_temp = POP_FAILURE_ITEM (); \ + if (string_temp != NULL) \ + str = (const char *) string_temp; \ + \ + DEBUG_PRINT2 (" Popping string 0x%x: `", str); \ + DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ + DEBUG_PRINT1 ("'\n"); \ + \ + pat = (unsigned char *) POP_FAILURE_ITEM (); \ + DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \ + DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ + \ + /* Restore register info. */ \ + high_reg = (unsigned) POP_FAILURE_ITEM (); \ + DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \ + \ + low_reg = (unsigned) POP_FAILURE_ITEM (); \ + DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ + \ + for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ + { \ + DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ + \ + reg_info[this_reg].word = POP_FAILURE_ITEM (); \ + DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \ + \ + regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \ + DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ + \ + regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \ + DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ + } \ + \ + DEBUG_STATEMENT (nfailure_points_popped++); \ +} /* POP_FAILURE_POINT */ + +/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in + BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible + characters can start a string that matches the pattern. This fastmap + is used by re_search to skip quickly over impossible starting points. + + The caller must supply the address of a (1 << BYTEWIDTH)-byte data + area as BUFP->fastmap. + + We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in + the pattern buffer. + + Returns 0 if we succeed, -2 if an internal error. */ + +int +re_compile_fastmap (bufp) + struct re_pattern_buffer *bufp; +{ + int j, k; + fail_stack_type fail_stack; +#ifndef REGEX_MALLOC + char *destination; +#endif + /* We don't push any register information onto the failure stack. */ + unsigned num_regs = 0; + + register char *fastmap = bufp->fastmap; + unsigned char *pattern = bufp->buffer; + unsigned long size = bufp->used; + const unsigned char *p = pattern; + register unsigned char *pend = pattern + size; + + /* Assume that each path through the pattern can be null until + proven otherwise. We set this false at the bottom of switch + statement, to which we get only if a particular path doesn't + match the empty string. */ + boolean path_can_be_null = true; + + /* We aren't doing a `succeed_n' to begin with. */ + boolean succeed_n_p = false; + + assert (fastmap != NULL && p != NULL); + + INIT_FAIL_STACK (); + bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ + bufp->fastmap_accurate = 1; /* It will be when we're done. */ + bufp->can_be_null = 0; + + while (p != pend || !FAIL_STACK_EMPTY ()) + { + if (p == pend) + { + bufp->can_be_null |= path_can_be_null; + + /* Reset for next path. */ + path_can_be_null = true; + + p = fail_stack.stack[--fail_stack.avail]; + } + + /* We should never be about to go beyond the end of the pattern. */ + assert (p < pend); + +#ifdef SWITCH_ENUM_BUG + switch ((int) ((re_opcode_t) *p++)) +#else + switch ((re_opcode_t) *p++) +#endif + { + + /* I guess the idea here is to simply not bother with a fastmap + if a backreference is used, since it's too hard to figure out + the fastmap for the corresponding group. Setting + `can_be_null' stops `re_search_2' from using the fastmap, so + that is all we do. */ + case duplicate: + bufp->can_be_null = 1; + return 0; + + + /* Following are the cases which match a character. These end + with `break'. */ + + case exactn: + fastmap[p[1]] = 1; + break; + + + case charset: + for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) + if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) + fastmap[j] = 1; + break; + + + case charset_not: + /* Chars beyond end of map must be allowed. */ + for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) + fastmap[j] = 1; + + for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) + if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) + fastmap[j] = 1; + break; + + + case wordchar: + for (j = 0; j < (1 << BYTEWIDTH); j++) + if (SYNTAX (j) == Sword) + fastmap[j] = 1; + break; + + + case notwordchar: + for (j = 0; j < (1 << BYTEWIDTH); j++) + if (SYNTAX (j) != Sword) + fastmap[j] = 1; + break; + + + case anychar: + /* `.' matches anything ... */ + for (j = 0; j < (1 << BYTEWIDTH); j++) + fastmap[j] = 1; + + /* ... except perhaps newline. */ + if (!(bufp->syntax & RE_DOT_NEWLINE)) + fastmap['\n'] = 0; + + /* Return if we have already set `can_be_null'; if we have, + then the fastmap is irrelevant. Something's wrong here. */ + else if (bufp->can_be_null) + return 0; + + /* Otherwise, have to check alternative paths. */ + break; + + +#ifdef emacs + case syntaxspec: + k = *p++; + for (j = 0; j < (1 << BYTEWIDTH); j++) + if (SYNTAX (j) == (enum syntaxcode) k) + fastmap[j] = 1; + break; + + + case notsyntaxspec: + k = *p++; + for (j = 0; j < (1 << BYTEWIDTH); j++) + if (SYNTAX (j) != (enum syntaxcode) k) + fastmap[j] = 1; + break; + + + /* All cases after this match the empty string. These end with + `continue'. */ + + + case before_dot: + case at_dot: + case after_dot: + continue; +#endif /* not emacs */ + + + case no_op: + case begline: + case endline: + case begbuf: + case endbuf: + case wordbound: + case notwordbound: + case wordbeg: + case wordend: + case push_dummy_failure: + continue; + + + case jump_n: + case pop_failure_jump: + case maybe_pop_jump: + case jump: + case jump_past_alt: + case dummy_failure_jump: + EXTRACT_NUMBER_AND_INCR (j, p); + p += j; + if (j > 0) + continue; + + /* Jump backward implies we just went through the body of a + loop and matched nothing. Opcode jumped to should be + `on_failure_jump' or `succeed_n'. Just treat it like an + ordinary jump. For a * loop, it has pushed its failure + point already; if so, discard that as redundant. */ + if ((re_opcode_t) *p != on_failure_jump + && (re_opcode_t) *p != succeed_n) + continue; + + p++; + EXTRACT_NUMBER_AND_INCR (j, p); + p += j; + + /* If what's on the stack is where we are now, pop it. */ + if (!FAIL_STACK_EMPTY () + && fail_stack.stack[fail_stack.avail - 1] == p) + fail_stack.avail--; + + continue; + + + case on_failure_jump: + case on_failure_keep_string_jump: + handle_on_failure_jump: + EXTRACT_NUMBER_AND_INCR (j, p); + + /* For some patterns, e.g., `(a?)?', `p+j' here points to the + end of the pattern. We don't want to push such a point, + since when we restore it above, entering the switch will + increment `p' past the end of the pattern. We don't need + to push such a point since we obviously won't find any more + fastmap entries beyond `pend'. Such a pattern can match + the null string, though. */ + if (p + j < pend) + { + if (!PUSH_PATTERN_OP (p + j, fail_stack)) + return -2; + } + else + bufp->can_be_null = 1; + + if (succeed_n_p) + { + EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ + succeed_n_p = false; + } + + continue; + + + case succeed_n: + /* Get to the number of times to succeed. */ + p += 2; + + /* Increment p past the n for when k != 0. */ + EXTRACT_NUMBER_AND_INCR (k, p); + if (k == 0) + { + p -= 4; + succeed_n_p = true; /* Spaghetti code alert. */ + goto handle_on_failure_jump; + } + continue; + + + case set_number_at: + p += 4; + continue; + + + case start_memory: + case stop_memory: + p += 2; + continue; + + + default: + abort (); /* We have listed all the cases. */ + } /* switch *p++ */ + + /* Getting here means we have found the possible starting + characters for one path of the pattern -- and that the empty + string does not match. We need not follow this path further. + Instead, look at the next alternative (remembered on the + stack), or quit if no more. The test at the top of the loop + does these things. */ + path_can_be_null = false; + p = pend; + } /* while p */ + + /* Set `can_be_null' for the last path (also the first path, if the + pattern is empty). */ + bufp->can_be_null |= path_can_be_null; + return 0; +} /* re_compile_fastmap */ + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use + this memory for recording register information. STARTS and ENDS + must be allocated using the malloc library routine, and must each + be at least NUM_REGS * sizeof (regoff_t) bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ + +void +re_set_registers (bufp, regs, num_regs, starts, ends) + struct re_pattern_buffer *bufp; + struct re_registers *regs; + unsigned num_regs; + regoff_t *starts, *ends; +{ + if (num_regs) + { + bufp->regs_allocated = REGS_REALLOCATE; + regs->num_regs = num_regs; + regs->start = starts; + regs->end = ends; + } + else + { + bufp->regs_allocated = REGS_UNALLOCATED; + regs->num_regs = 0; + regs->start = regs->end = (regoff_t) 0; + } +} + +/* Searching routines. */ + +/* Like re_search_2, below, but only one string is specified, and + doesn't let you say where to stop matching. */ + +int +re_search (bufp, string, size, startpos, range, regs) + struct re_pattern_buffer *bufp; + const char *string; + int size, startpos, range; + struct re_registers *regs; +{ + return re_search_2 (bufp, NULL, 0, string, size, startpos, range, + regs, size); +} + + +/* Using the compiled pattern in BUFP->buffer, first tries to match the + virtual concatenation of STRING1 and STRING2, starting first at index + STARTPOS, then at STARTPOS + 1, and so on. + + STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. + + RANGE is how far to scan while trying to match. RANGE = 0 means try + only at STARTPOS; in general, the last start tried is STARTPOS + + RANGE. + + In REGS, return the indices of the virtual concatenation of STRING1 + and STRING2 that matched the entire BUFP->buffer and its contained + subexpressions. + + Do not consider matching one past the index STOP in the virtual + concatenation of STRING1 and STRING2. + + We return either the position in the strings at which the match was + found, -1 if no match, or -2 if error (such as failure + stack overflow). */ + +int +re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int size1, size2; + int startpos; + int range; + struct re_registers *regs; + int stop; +{ + int val; + register char *fastmap = bufp->fastmap; + register char *translate = bufp->translate; + int total_size = size1 + size2; + int endpos = startpos + range; + + /* Check for out-of-range STARTPOS. */ + if (startpos < 0 || startpos > total_size) + return -1; + + /* Fix up RANGE if it might eventually take us outside + the virtual concatenation of STRING1 and STRING2. */ + if (endpos < -1) + range = -1 - startpos; + else if (endpos > total_size) + range = total_size - startpos; + + /* If the search isn't to be a backwards one, don't waste time in a + search for a pattern that must be anchored. */ + if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) + { + if (startpos > 0) + return -1; + else + range = 1; + } + + /* Update the fastmap now if not correct already. */ + if (fastmap && !bufp->fastmap_accurate) + if (re_compile_fastmap (bufp) == -2) + return -2; + + /* Loop through the string, looking for a place to start matching. */ + for (;;) + { + /* If a fastmap is supplied, skip quickly over characters that + cannot be the start of a match. If the pattern can match the + null string, however, we don't need to skip characters; we want + the first null string. */ + if (fastmap && startpos < total_size && !bufp->can_be_null) + { + if (range > 0) /* Searching forwards. */ + { + register const char *d; + register int lim = 0; + int irange = range; + + if (startpos < size1 && startpos + range >= size1) + lim = range - (size1 - startpos); + + d = (startpos >= size1 ? string2 - size1 : string1) + startpos; + + /* Written out as an if-else to avoid testing `translate' + inside the loop. */ + if (translate) + while (range > lim + && !fastmap[(unsigned char) + translate[(unsigned char) *d++]]) + range--; + else + while (range > lim && !fastmap[(unsigned char) *d++]) + range--; + + startpos += irange - range; + } + else /* Searching backwards. */ + { + register char c = (size1 == 0 || startpos >= size1 + ? string2[startpos - size1] + : string1[startpos]); + + if (!fastmap[(unsigned char) TRANSLATE (c)]) + goto advance; + } + } + + /* If can't match the null string, and that's all we have left, fail. */ + if (range >= 0 && startpos == total_size && fastmap + && !bufp->can_be_null) + return -1; + + val = re_match_2 (bufp, string1, size1, string2, size2, + startpos, regs, stop); + if (val >= 0) + return startpos; + + if (val == -2) + return -2; + + advance: + if (!range) + break; + else if (range > 0) + { + range--; + startpos++; + } + else + { + range++; + startpos--; + } + } + return -1; +} /* re_search_2 */ + +/* Declarations and macros for re_match_2. */ + +static int bcmp_translate (); +static boolean alt_match_null_string_p (), + common_op_match_null_string_p (), + group_match_null_string_p (); + +/* Structure for per-register (a.k.a. per-group) information. + This must not be longer than one word, because we push this value + onto the failure stack. Other register information, such as the + starting and ending positions (which are addresses), and the list of + inner groups (which is a bits list) are maintained in separate + variables. + + We are making a (strictly speaking) nonportable assumption here: that + the compiler will pack our bit fields into something that fits into + the type of `word', i.e., is something that fits into one item on the + failure stack. */ +typedef union +{ + fail_stack_elt_t word; + struct + { + /* This field is one if this group can match the empty string, + zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ +#define MATCH_NULL_UNSET_VALUE 3 + unsigned match_null_string_p : 2; + unsigned is_active : 1; + unsigned matched_something : 1; + unsigned ever_matched_something : 1; + } bits; +} register_info_type; + +#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) +#define IS_ACTIVE(R) ((R).bits.is_active) +#define MATCHED_SOMETHING(R) ((R).bits.matched_something) +#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) + + +/* Call this when have matched a real character; it sets `matched' flags + for the subexpressions which we are currently inside. Also records + that those subexprs have matched. */ +#define SET_REGS_MATCHED() \ + do \ + { \ + unsigned r; \ + for (r = lowest_active_reg; r <= highest_active_reg; r++) \ + { \ + MATCHED_SOMETHING (reg_info[r]) \ + = EVER_MATCHED_SOMETHING (reg_info[r]) \ + = 1; \ + } \ + } \ + while (0) + + +/* This converts PTR, a pointer into one of the search strings `string1' + and `string2' into an offset from the beginning of that string. */ +#define POINTER_TO_OFFSET(ptr) \ + (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1) + +/* Registers are set to a sentinel when they haven't yet matched. */ +#define REG_UNSET_VALUE ((char *) -1) +#define REG_UNSET(e) ((e) == REG_UNSET_VALUE) + + +/* Macros for dealing with the split strings in re_match_2. */ + +#define MATCHING_IN_FIRST_STRING (dend == end_match_1) + +/* Call before fetching a character with *d. This switches over to + string2 if necessary. */ +#define PREFETCH() \ + while (d == dend) \ + { \ + /* End of string2 => fail. */ \ + if (dend == end_match_2) \ + goto fail; \ + /* End of string1 => advance to string2. */ \ + d = string2; \ + dend = end_match_2; \ + } + + +/* Test if at very beginning or at very end of the virtual concatenation + of `string1' and `string2'. If only one string, it's `string2'. */ +#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) +#define AT_STRINGS_END(d) ((d) == end2) + + +/* Test if D points to a character which is word-constituent. We have + two special cases to check for: if past the end of string1, look at + the first character in string2; and if before the beginning of + string2, look at the last character in string1. */ +#define WORDCHAR_P(d) \ + (SYNTAX ((d) == end1 ? *string2 \ + : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ + == Sword) + +/* Test if the character before D and the one at D differ with respect + to being word-constituent. */ +#define AT_WORD_BOUNDARY(d) \ + (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ + || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) + + +/* Free everything we malloc. */ +#ifdef REGEX_MALLOC +#define FREE_VAR(var) if (var) free (var); var = NULL +#define FREE_VARIABLES() \ + do { \ + FREE_VAR (fail_stack.stack); \ + FREE_VAR (regstart); \ + FREE_VAR (regend); \ + FREE_VAR (old_regstart); \ + FREE_VAR (old_regend); \ + FREE_VAR (best_regstart); \ + FREE_VAR (best_regend); \ + FREE_VAR (reg_info); \ + FREE_VAR (reg_dummy); \ + FREE_VAR (reg_info_dummy); \ + } while (0) +#else /* not REGEX_MALLOC */ +/* Some MIPS systems (at least) want this to free alloca'd storage. */ +#define FREE_VARIABLES() alloca (0) +#endif /* not REGEX_MALLOC */ + + +/* These values must meet several constraints. They must not be valid + register values; since we have a limit of 255 registers (because + we use only one byte in the pattern for the register number), we can + use numbers larger than 255. They must differ by 1, because of + NUM_FAILURE_ITEMS above. And the value for the lowest register must + be larger than the value for the highest register, so we do not try + to actually save any registers when none are active. */ +#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) +#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) + +/* Matching routines. */ + +#ifndef emacs /* Emacs never uses this. */ +/* re_match is like re_match_2 except it takes only a single string. */ + +int +re_match (bufp, string, size, pos, regs) + struct re_pattern_buffer *bufp; + const char *string; + int size, pos; + struct re_registers *regs; + { + return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size); +} +#endif /* not emacs */ + + +/* re_match_2 matches the compiled pattern in BUFP against the + the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 + and SIZE2, respectively). We start matching at POS, and stop + matching at STOP. + + If REGS is non-null and the `no_sub' field of BUFP is nonzero, we + store offsets for the substring each group matched in REGS. See the + documentation for exactly how many groups we fill. + + We return -1 if no match, -2 if an internal error (such as the + failure stack overflowing). Otherwise, we return the length of the + matched substring. */ + +int +re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int size1, size2; + int pos; + struct re_registers *regs; + int stop; +{ + /* General temporaries. */ + int mcnt; + unsigned char *p1; + + /* Just past the end of the corresponding string. */ + const char *end1, *end2; + + /* Pointers into string1 and string2, just past the last characters in + each to consider matching. */ + const char *end_match_1, *end_match_2; + + /* Where we are in the data, and the end of the current string. */ + const char *d, *dend; + + /* Where we are in the pattern, and the end of the pattern. */ + unsigned char *p = bufp->buffer; + register unsigned char *pend = p + bufp->used; + + /* We use this to map every character in the string. */ + char *translate = bufp->translate; + + /* Failure point stack. Each place that can handle a failure further + down the line pushes a failure point on this stack. It consists of + restart, regend, and reg_info for all registers corresponding to + the subexpressions we're currently inside, plus the number of such + registers, and, finally, two char *'s. The first char * is where + to resume scanning the pattern; the second one is where to resume + scanning the strings. If the latter is zero, the failure point is + a ``dummy''; if a failure happens and the failure point is a dummy, + it gets discarded and the next next one is tried. */ + fail_stack_type fail_stack; +#ifdef DEBUG + static unsigned failure_id = 0; + unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; +#endif + + /* We fill all the registers internally, independent of what we + return, for use in backreferences. The number here includes + an element for register zero. */ + unsigned num_regs = bufp->re_nsub + 1; + + /* The currently active registers. */ + unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG; + unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG; + + /* Information on the contents of registers. These are pointers into + the input strings; they record just what was matched (on this + attempt) by a subexpression part of the pattern, that is, the + regnum-th regstart pointer points to where in the pattern we began + matching and the regnum-th regend points to right after where we + stopped matching the regnum-th subexpression. (The zeroth register + keeps track of what the whole pattern matches.) */ + const char **regstart = NULL, **regend = NULL; + + /* If a group that's operated upon by a repetition operator fails to + match anything, then the register for its start will need to be + restored because it will have been set to wherever in the string we + are when we last see its open-group operator. Similarly for a + register's end. */ + const char **old_regstart = NULL, **old_regend = NULL; + + /* The is_active field of reg_info helps us keep track of which (possibly + nested) subexpressions we are currently in. The matched_something + field of reg_info[reg_num] helps us tell whether or not we have + matched any of the pattern so far this time through the reg_num-th + subexpression. These two fields get reset each time through any + loop their register is in. */ + register_info_type *reg_info = NULL; + + /* The following record the register info as found in the above + variables when we find a match better than any we've seen before. + This happens as we backtrack through the failure points, which in + turn happens only if we have not yet matched the entire string. */ + unsigned best_regs_set = false; + const char **best_regstart = NULL, **best_regend = NULL; + + /* Logically, this is `best_regend[0]'. But we don't want to have to + allocate space for that if we're not allocating space for anything + else (see below). Also, we never need info about register 0 for + any of the other register vectors, and it seems rather a kludge to + treat `best_regend' differently than the rest. So we keep track of + the end of the best match so far in a separate variable. We + initialize this to NULL so that when we backtrack the first time + and need to test it, it's not garbage. */ + const char *match_end = NULL; + + /* Used when we pop values we don't care about. */ + const char **reg_dummy = NULL; + register_info_type *reg_info_dummy = NULL; + +#ifdef DEBUG + /* Counts the total number of registers pushed. */ + unsigned num_regs_pushed = 0; +#endif + + DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); + + INIT_FAIL_STACK (); + + /* Do not bother to initialize all the register variables if there are + no groups in the pattern, as it takes a fair amount of time. If + there are groups, we include space for register 0 (the whole + pattern), even though we never use it, since it simplifies the + array indexing. We should fix this. */ + if (bufp->re_nsub) + { + regstart = REGEX_TALLOC (num_regs, const char *); + regend = REGEX_TALLOC (num_regs, const char *); + old_regstart = REGEX_TALLOC (num_regs, const char *); + old_regend = REGEX_TALLOC (num_regs, const char *); + best_regstart = REGEX_TALLOC (num_regs, const char *); + best_regend = REGEX_TALLOC (num_regs, const char *); + reg_info = REGEX_TALLOC (num_regs, register_info_type); + reg_dummy = REGEX_TALLOC (num_regs, const char *); + reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); + + if (!(regstart && regend && old_regstart && old_regend && reg_info + && best_regstart && best_regend && reg_dummy && reg_info_dummy)) + { + FREE_VARIABLES (); + return -2; + } + } +#ifdef REGEX_MALLOC + else + { + /* We must initialize all our variables to NULL, so that + `FREE_VARIABLES' doesn't try to free them. */ + regstart = regend = old_regstart = old_regend = best_regstart + = best_regend = reg_dummy = NULL; + reg_info = reg_info_dummy = (register_info_type *) NULL; + } +#endif /* REGEX_MALLOC */ + + /* The starting position is bogus. */ + if (pos < 0 || pos > size1 + size2) + { + FREE_VARIABLES (); + return -1; + } + + /* Initialize subexpression text positions to -1 to mark ones that no + start_memory/stop_memory has been seen for. Also initialize the + register information struct. */ + for (mcnt = 1; mcnt < num_regs; mcnt++) + { + regstart[mcnt] = regend[mcnt] + = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; + + REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; + IS_ACTIVE (reg_info[mcnt]) = 0; + MATCHED_SOMETHING (reg_info[mcnt]) = 0; + EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; + } + + /* We move `string1' into `string2' if the latter's empty -- but not if + `string1' is null. */ + if (size2 == 0 && string1 != NULL) + { + string2 = string1; + size2 = size1; + string1 = 0; + size1 = 0; + } + end1 = string1 + size1; + end2 = string2 + size2; + + /* Compute where to stop matching, within the two strings. */ + if (stop <= size1) + { + end_match_1 = string1 + stop; + end_match_2 = string2; + } + else + { + end_match_1 = end1; + end_match_2 = string2 + stop - size1; + } + + /* `p' scans through the pattern as `d' scans through the data. + `dend' is the end of the input string that `d' points within. `d' + is advanced into the following input string whenever necessary, but + this happens before fetching; therefore, at the beginning of the + loop, `d' can be pointing at the end of a string, but it cannot + equal `string2'. */ + if (size1 > 0 && pos <= size1) + { + d = string1 + pos; + dend = end_match_1; + } + else + { + d = string2 + pos - size1; + dend = end_match_2; + } + + DEBUG_PRINT1 ("The compiled pattern is: "); + DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); + DEBUG_PRINT1 ("The string to match is: `"); + DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); + DEBUG_PRINT1 ("'\n"); + + /* This loops over pattern commands. It exits by returning from the + function if the match is complete, or it drops through if the match + fails at this starting point in the input data. */ + for (;;) + { + DEBUG_PRINT2 ("\n0x%x: ", p); + + if (p == pend) + { /* End of pattern means we might have succeeded. */ + DEBUG_PRINT1 ("end of pattern ... "); + + /* If we haven't matched the entire string, and we want the + longest match, try backtracking. */ + if (d != end_match_2) + { + DEBUG_PRINT1 ("backtracking.\n"); + + if (!FAIL_STACK_EMPTY ()) + { /* More failure points to try. */ + boolean same_str_p = (FIRST_STRING_P (match_end) + == MATCHING_IN_FIRST_STRING); + + /* If exceeds best match so far, save it. */ + if (!best_regs_set + || (same_str_p && d > match_end) + || (!same_str_p && !MATCHING_IN_FIRST_STRING)) + { + best_regs_set = true; + match_end = d; + + DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); + + for (mcnt = 1; mcnt < num_regs; mcnt++) + { + best_regstart[mcnt] = regstart[mcnt]; + best_regend[mcnt] = regend[mcnt]; + } + } + goto fail; + } + + /* If no failure points, don't restore garbage. */ + else if (best_regs_set) + { + restore_best_regs: + /* Restore best match. It may happen that `dend == + end_match_1' while the restored d is in string2. + For example, the pattern `x.*y.*z' against the + strings `x-' and `y-z-', if the two strings are + not consecutive in memory. */ + DEBUG_PRINT1 ("Restoring best registers.\n"); + + d = match_end; + dend = ((d >= string1 && d <= end1) + ? end_match_1 : end_match_2); + + for (mcnt = 1; mcnt < num_regs; mcnt++) + { + regstart[mcnt] = best_regstart[mcnt]; + regend[mcnt] = best_regend[mcnt]; + } + } + } /* d != end_match_2 */ + + DEBUG_PRINT1 ("Accepting match.\n"); + + /* If caller wants register contents data back, do it. */ + if (regs && !bufp->no_sub) + { + /* Have the register data arrays been allocated? */ + if (bufp->regs_allocated == REGS_UNALLOCATED) + { /* No. So allocate them with malloc. We need one + extra element beyond `num_regs' for the `-1' marker + GNU code uses. */ + regs->num_regs = MAX (RE_NREGS, num_regs + 1); + regs->start = TALLOC (regs->num_regs, regoff_t); + regs->end = TALLOC (regs->num_regs, regoff_t); + if (regs->start == NULL || regs->end == NULL) + return -2; + bufp->regs_allocated = REGS_REALLOCATE; + } + else if (bufp->regs_allocated == REGS_REALLOCATE) + { /* Yes. If we need more elements than were already + allocated, reallocate them. If we need fewer, just + leave it alone. */ + if (regs->num_regs < num_regs + 1) + { + regs->num_regs = num_regs + 1; + RETALLOC (regs->start, regs->num_regs, regoff_t); + RETALLOC (regs->end, regs->num_regs, regoff_t); + if (regs->start == NULL || regs->end == NULL) + return -2; + } + } + else + assert (bufp->regs_allocated == REGS_FIXED); + + /* Convert the pointer data in `regstart' and `regend' to + indices. Register zero has to be set differently, + since we haven't kept track of any info for it. */ + if (regs->num_regs > 0) + { + regs->start[0] = pos; + regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1 + : d - string2 + size1); + } + + /* Go through the first `min (num_regs, regs->num_regs)' + registers, since that is all we initialized. */ + for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++) + { + if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) + regs->start[mcnt] = regs->end[mcnt] = -1; + else + { + regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]); + regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]); + } + } + + /* If the regs structure we return has more elements than + were in the pattern, set the extra elements to -1. If + we (re)allocated the registers, this is the case, + because we always allocate enough to have at least one + -1 at the end. */ + for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++) + regs->start[mcnt] = regs->end[mcnt] = -1; + } /* regs && !bufp->no_sub */ + + FREE_VARIABLES (); + DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", + nfailure_points_pushed, nfailure_points_popped, + nfailure_points_pushed - nfailure_points_popped); + DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); + + mcnt = d - pos - (MATCHING_IN_FIRST_STRING + ? string1 + : string2 - size1); + + DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); + + return mcnt; + } + + /* Otherwise match next pattern command. */ +#ifdef SWITCH_ENUM_BUG + switch ((int) ((re_opcode_t) *p++)) +#else + switch ((re_opcode_t) *p++) +#endif + { + /* Ignore these. Used to ignore the n of succeed_n's which + currently have n == 0. */ + case no_op: + DEBUG_PRINT1 ("EXECUTING no_op.\n"); + break; + + + /* Match the next n pattern characters exactly. The following + byte in the pattern defines n, and the n bytes after that + are the characters to match. */ + case exactn: + mcnt = *p++; + DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); + + /* This is written out as an if-else so we don't waste time + testing `translate' inside the loop. */ + if (translate) + { + do + { + PREFETCH (); + if (translate[(unsigned char) *d++] != (char) *p++) + goto fail; + } + while (--mcnt); + } + else + { + do + { + PREFETCH (); + if (*d++ != (char) *p++) goto fail; + } + while (--mcnt); + } + SET_REGS_MATCHED (); + break; + + + /* Match any character except possibly a newline or a null. */ + case anychar: + DEBUG_PRINT1 ("EXECUTING anychar.\n"); + + PREFETCH (); + + if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') + || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) + goto fail; + + SET_REGS_MATCHED (); + DEBUG_PRINT2 (" Matched `%d'.\n", *d); + d++; + break; + + + case charset: + case charset_not: + { + register unsigned char c; + boolean not = (re_opcode_t) *(p - 1) == charset_not; + + DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); + + PREFETCH (); + c = TRANSLATE (*d); /* The character to match. */ + + /* Cast to `unsigned' instead of `unsigned char' in case the + bit list is a full 32 bytes long. */ + if (c < (unsigned) (*p * BYTEWIDTH) + && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) + not = !not; + + p += 1 + *p; + + if (!not) goto fail; + + SET_REGS_MATCHED (); + d++; + break; + } + + + /* The beginning of a group is represented by start_memory. + The arguments are the register number in the next byte, and the + number of groups inner to this one in the next. The text + matched within the group is recorded (in the internal + registers data structure) under the register number. */ + case start_memory: + DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); + + /* Find out if this group can match the empty string. */ + p1 = p; /* To send to group_match_null_string_p. */ + + if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) + REG_MATCH_NULL_STRING_P (reg_info[*p]) + = group_match_null_string_p (&p1, pend, reg_info); + + /* Save the position in the string where we were the last time + we were at this open-group operator in case the group is + operated upon by a repetition operator, e.g., with `(a*)*b' + against `ab'; then we want to ignore where we are now in + the string in case this attempt to match fails. */ + old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) + ? REG_UNSET (regstart[*p]) ? d : regstart[*p] + : regstart[*p]; + DEBUG_PRINT2 (" old_regstart: %d\n", + POINTER_TO_OFFSET (old_regstart[*p])); + + regstart[*p] = d; + DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); + + IS_ACTIVE (reg_info[*p]) = 1; + MATCHED_SOMETHING (reg_info[*p]) = 0; + + /* This is the new highest active register. */ + highest_active_reg = *p; + + /* If nothing was active before, this is the new lowest active + register. */ + if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) + lowest_active_reg = *p; + + /* Move past the register number and inner group count. */ + p += 2; + break; + + + /* The stop_memory opcode represents the end of a group. Its + arguments are the same as start_memory's: the register + number, and the number of inner groups. */ + case stop_memory: + DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); + + /* We need to save the string position the last time we were at + this close-group operator in case the group is operated + upon by a repetition operator, e.g., with `((a*)*(b*)*)*' + against `aba'; then we want to ignore where we are now in + the string in case this attempt to match fails. */ + old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) + ? REG_UNSET (regend[*p]) ? d : regend[*p] + : regend[*p]; + DEBUG_PRINT2 (" old_regend: %d\n", + POINTER_TO_OFFSET (old_regend[*p])); + + regend[*p] = d; + DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); + + /* This register isn't active anymore. */ + IS_ACTIVE (reg_info[*p]) = 0; + + /* If this was the only register active, nothing is active + anymore. */ + if (lowest_active_reg == highest_active_reg) + { + lowest_active_reg = NO_LOWEST_ACTIVE_REG; + highest_active_reg = NO_HIGHEST_ACTIVE_REG; + } + else + { /* We must scan for the new highest active register, since + it isn't necessarily one less than now: consider + (a(b)c(d(e)f)g). When group 3 ends, after the f), the + new highest active register is 1. */ + unsigned char r = *p - 1; + while (r > 0 && !IS_ACTIVE (reg_info[r])) + r--; + + /* If we end up at register zero, that means that we saved + the registers as the result of an `on_failure_jump', not + a `start_memory', and we jumped to past the innermost + `stop_memory'. For example, in ((.)*) we save + registers 1 and 2 as a result of the *, but when we pop + back to the second ), we are at the stop_memory 1. + Thus, nothing is active. */ + if (r == 0) + { + lowest_active_reg = NO_LOWEST_ACTIVE_REG; + highest_active_reg = NO_HIGHEST_ACTIVE_REG; + } + else + highest_active_reg = r; + } + + /* If just failed to match something this time around with a + group that's operated on by a repetition operator, try to + force exit from the ``loop'', and restore the register + information for this group that we had before trying this + last match. */ + if ((!MATCHED_SOMETHING (reg_info[*p]) + || (re_opcode_t) p[-3] == start_memory) + && (p + 2) < pend) + { + boolean is_a_jump_n = false; + + p1 = p + 2; + mcnt = 0; + switch ((re_opcode_t) *p1++) + { + case jump_n: + is_a_jump_n = true; + case pop_failure_jump: + case maybe_pop_jump: + case jump: + case dummy_failure_jump: + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + if (is_a_jump_n) + p1 += 2; + break; + + default: + /* do nothing */ ; + } + p1 += mcnt; + + /* If the next operation is a jump backwards in the pattern + to an on_failure_jump right before the start_memory + corresponding to this stop_memory, exit from the loop + by forcing a failure after pushing on the stack the + on_failure_jump's jump in the pattern, and d. */ + if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump + && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) + { + /* If this group ever matched anything, then restore + what its registers were before trying this last + failed match, e.g., with `(a*)*b' against `ab' for + regstart[1], and, e.g., with `((a*)*(b*)*)*' + against `aba' for regend[3]. + + Also restore the registers for inner groups for, + e.g., `((a*)(b*))*' against `aba' (register 3 would + otherwise get trashed). */ + + if (EVER_MATCHED_SOMETHING (reg_info[*p])) + { + unsigned r; + + EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; + + /* Restore this and inner groups' (if any) registers. */ + for (r = *p; r < *p + *(p + 1); r++) + { + regstart[r] = old_regstart[r]; + + /* xx why this test? */ + if ((int) old_regend[r] >= (int) regstart[r]) + regend[r] = old_regend[r]; + } + } + p1++; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + PUSH_FAILURE_POINT (p1 + mcnt, d, -2); + + goto fail; + } + } + + /* Move past the register number and the inner group count. */ + p += 2; + break; + + + /* \ has been turned into a `duplicate' command which is + followed by the numeric value of as the register number. */ + case duplicate: + { + register const char *d2, *dend2; + int regno = *p++; /* Get which register to match against. */ + DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); + + /* Can't back reference a group which we've never matched. */ + if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) + goto fail; + + /* Where in input to try to start matching. */ + d2 = regstart[regno]; + + /* Where to stop matching; if both the place to start and + the place to stop matching are in the same string, then + set to the place to stop, otherwise, for now have to use + the end of the first string. */ + + dend2 = ((FIRST_STRING_P (regstart[regno]) + == FIRST_STRING_P (regend[regno])) + ? regend[regno] : end_match_1); + for (;;) + { + /* If necessary, advance to next segment in register + contents. */ + while (d2 == dend2) + { + if (dend2 == end_match_2) break; + if (dend2 == regend[regno]) break; + + /* End of string1 => advance to string2. */ + d2 = string2; + dend2 = regend[regno]; + } + /* At end of register contents => success */ + if (d2 == dend2) break; + + /* If necessary, advance to next segment in data. */ + PREFETCH (); + + /* How many characters left in this segment to match. */ + mcnt = dend - d; + + /* Want how many consecutive characters we can match in + one shot, so, if necessary, adjust the count. */ + if (mcnt > dend2 - d2) + mcnt = dend2 - d2; + + /* Compare that many; failure if mismatch, else move + past them. */ + if (translate + ? bcmp_translate (d, d2, mcnt, translate) + : bcmp (d, d2, mcnt)) + goto fail; + d += mcnt, d2 += mcnt; + } + } + break; + + + /* begline matches the empty string at the beginning of the string + (unless `not_bol' is set in `bufp'), and, if + `newline_anchor' is set, after newlines. */ + case begline: + DEBUG_PRINT1 ("EXECUTING begline.\n"); + + if (AT_STRINGS_BEG (d)) + { + if (!bufp->not_bol) break; + } + else if (d[-1] == '\n' && bufp->newline_anchor) + { + break; + } + /* In all other cases, we fail. */ + goto fail; + + + /* endline is the dual of begline. */ + case endline: + DEBUG_PRINT1 ("EXECUTING endline.\n"); + + if (AT_STRINGS_END (d)) + { + if (!bufp->not_eol) break; + } + + /* We have to ``prefetch'' the next character. */ + else if ((d == end1 ? *string2 : *d) == '\n' + && bufp->newline_anchor) + { + break; + } + goto fail; + + + /* Match at the very beginning of the data. */ + case begbuf: + DEBUG_PRINT1 ("EXECUTING begbuf.\n"); + if (AT_STRINGS_BEG (d)) + break; + goto fail; + + + /* Match at the very end of the data. */ + case endbuf: + DEBUG_PRINT1 ("EXECUTING endbuf.\n"); + if (AT_STRINGS_END (d)) + break; + goto fail; + + + /* on_failure_keep_string_jump is used to optimize `.*\n'. It + pushes NULL as the value for the string on the stack. Then + `pop_failure_point' will keep the current value for the + string, instead of restoring it. To see why, consider + matching `foo\nbar' against `.*\n'. The .* matches the foo; + then the . fails against the \n. But the next thing we want + to do is match the \n against the \n; if we restored the + string value, we would be back at the foo. + + Because this is used only in specific cases, we don't need to + check all the things that `on_failure_jump' does, to make + sure the right things get saved on the stack. Hence we don't + share its code. The only reason to push anything on the + stack at all is that otherwise we would have to change + `anychar's code to do something besides goto fail in this + case; that seems worse than this. */ + case on_failure_keep_string_jump: + DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); + + EXTRACT_NUMBER_AND_INCR (mcnt, p); + DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); + + PUSH_FAILURE_POINT (p + mcnt, NULL, -2); + break; + + + /* Uses of on_failure_jump: + + Each alternative starts with an on_failure_jump that points + to the beginning of the next alternative. Each alternative + except the last ends with a jump that in effect jumps past + the rest of the alternatives. (They really jump to the + ending jump of the following alternative, because tensioning + these jumps is a hassle.) + + Repeats start with an on_failure_jump that points past both + the repetition text and either the following jump or + pop_failure_jump back to this on_failure_jump. */ + case on_failure_jump: + on_failure: + DEBUG_PRINT1 ("EXECUTING on_failure_jump"); + + EXTRACT_NUMBER_AND_INCR (mcnt, p); + DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); + + /* If this on_failure_jump comes right before a group (i.e., + the original * applied to a group), save the information + for that group and all inner ones, so that if we fail back + to this point, the group's information will be correct. + For example, in \(a*\)*\1, we need the preceding group, + and in \(\(a*\)b*\)\2, we need the inner group. */ + + /* We can't use `p' to check ahead because we push + a failure point to `p + mcnt' after we do this. */ + p1 = p; + + /* We need to skip no_op's before we look for the + start_memory in case this on_failure_jump is happening as + the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 + against aba. */ + while (p1 < pend && (re_opcode_t) *p1 == no_op) + p1++; + + if (p1 < pend && (re_opcode_t) *p1 == start_memory) + { + /* We have a new highest active register now. This will + get reset at the start_memory we are about to get to, + but we will have saved all the registers relevant to + this repetition op, as described above. */ + highest_active_reg = *(p1 + 1) + *(p1 + 2); + if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) + lowest_active_reg = *(p1 + 1); + } + + DEBUG_PRINT1 (":\n"); + PUSH_FAILURE_POINT (p + mcnt, d, -2); + break; + + + /* A smart repeat ends with `maybe_pop_jump'. + We change it to either `pop_failure_jump' or `jump'. */ + case maybe_pop_jump: + EXTRACT_NUMBER_AND_INCR (mcnt, p); + DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); + { + register unsigned char *p2 = p; + + /* Compare the beginning of the repeat with what in the + pattern follows its end. If we can establish that there + is nothing that they would both match, i.e., that we + would have to backtrack because of (as in, e.g., `a*a') + then we can change to pop_failure_jump, because we'll + never have to backtrack. + + This is not true in the case of alternatives: in + `(a|ab)*' we do need to backtrack to the `ab' alternative + (e.g., if the string was `ab'). But instead of trying to + detect that here, the alternative has put on a dummy + failure point which is what we will end up popping. */ + + /* Skip over open/close-group commands. */ + while (p2 + 2 < pend + && ((re_opcode_t) *p2 == stop_memory + || (re_opcode_t) *p2 == start_memory)) + p2 += 3; /* Skip over args, too. */ + + /* If we're at the end of the pattern, we can change. */ + if (p2 == pend) + { + /* Consider what happens when matching ":\(.*\)" + against ":/". I don't really understand this code + yet. */ + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 + (" End of pattern: change to `pop_failure_jump'.\n"); + } + + else if ((re_opcode_t) *p2 == exactn + || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) + { + register unsigned char c + = *p2 == (unsigned char) endline ? '\n' : p2[2]; + p1 = p + mcnt; + + /* p1[0] ... p1[2] are the `on_failure_jump' corresponding + to the `maybe_finalize_jump' of this case. Examine what + follows. */ + if ((re_opcode_t) p1[3] == exactn && p1[5] != c) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", + c, p1[5]); + } + + else if ((re_opcode_t) p1[3] == charset + || (re_opcode_t) p1[3] == charset_not) + { + int not = (re_opcode_t) p1[3] == charset_not; + + if (c < (unsigned char) (p1[4] * BYTEWIDTH) + && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) + not = !not; + + /* `not' is equal to 1 if c would match, which means + that we can't change to pop_failure_jump. */ + if (!not) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); + } + } + } + } + p -= 2; /* Point at relative address again. */ + if ((re_opcode_t) p[-1] != pop_failure_jump) + { + p[-1] = (unsigned char) jump; + DEBUG_PRINT1 (" Match => jump.\n"); + goto unconditional_jump; + } + /* Note fall through. */ + + + /* The end of a simple repeat has a pop_failure_jump back to + its matching on_failure_jump, where the latter will push a + failure point. The pop_failure_jump takes off failure + points put on by this pop_failure_jump's matching + on_failure_jump; we got through the pattern to here from the + matching on_failure_jump, so didn't fail. */ + case pop_failure_jump: + { + /* We need to pass separate storage for the lowest and + highest registers, even though we don't care about the + actual values. Otherwise, we will restore only one + register from the stack, since lowest will == highest in + `pop_failure_point'. */ + unsigned dummy_low_reg, dummy_high_reg; + unsigned char *pdummy; + const char *sdummy; + + DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); + POP_FAILURE_POINT (sdummy, pdummy, + dummy_low_reg, dummy_high_reg, + reg_dummy, reg_dummy, reg_info_dummy); + } + /* Note fall through. */ + + + /* Unconditionally jump (without popping any failure points). */ + case jump: + unconditional_jump: + EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ + DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); + p += mcnt; /* Do the jump. */ + DEBUG_PRINT2 ("(to 0x%x).\n", p); + break; + + + /* We need this opcode so we can detect where alternatives end + in `group_match_null_string_p' et al. */ + case jump_past_alt: + DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); + goto unconditional_jump; + + + /* Normally, the on_failure_jump pushes a failure point, which + then gets popped at pop_failure_jump. We will end up at + pop_failure_jump, also, and with a pattern of, say, `a+', we + are skipping over the on_failure_jump, so we have to push + something meaningless for pop_failure_jump to pop. */ + case dummy_failure_jump: + DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); + /* It doesn't matter what we push for the string here. What + the code at `fail' tests is the value for the pattern. */ + PUSH_FAILURE_POINT (0, 0, -2); + goto unconditional_jump; + + + /* At the end of an alternative, we need to push a dummy failure + point in case we are followed by a `pop_failure_jump', because + we don't want the failure point for the alternative to be + popped. For example, matching `(a|ab)*' against `aab' + requires that we match the `ab' alternative. */ + case push_dummy_failure: + DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); + /* See comments just above at `dummy_failure_jump' about the + two zeroes. */ + PUSH_FAILURE_POINT (0, 0, -2); + break; + + /* Have to succeed matching what follows at least n times. + After that, handle like `on_failure_jump'. */ + case succeed_n: + EXTRACT_NUMBER (mcnt, p + 2); + DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); + + assert (mcnt >= 0); + /* Originally, this is how many times we HAVE to succeed. */ + if (mcnt > 0) + { + mcnt--; + p += 2; + STORE_NUMBER_AND_INCR (p, mcnt); + DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt); + } + else if (mcnt == 0) + { + DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2); + p[2] = (unsigned char) no_op; + p[3] = (unsigned char) no_op; + goto on_failure; + } + break; + + case jump_n: + EXTRACT_NUMBER (mcnt, p + 2); + DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); + + /* Originally, this is how many times we CAN jump. */ + if (mcnt) + { + mcnt--; + STORE_NUMBER (p + 2, mcnt); + goto unconditional_jump; + } + /* If don't have to jump any more, skip over the rest of command. */ + else + p += 4; + break; + + case set_number_at: + { + DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); + + EXTRACT_NUMBER_AND_INCR (mcnt, p); + p1 = p + mcnt; + EXTRACT_NUMBER_AND_INCR (mcnt, p); + DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); + STORE_NUMBER (p1, mcnt); + break; + } + + case wordbound: + DEBUG_PRINT1 ("EXECUTING wordbound.\n"); + if (AT_WORD_BOUNDARY (d)) + break; + goto fail; + + case notwordbound: + DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); + if (AT_WORD_BOUNDARY (d)) + goto fail; + break; + + case wordbeg: + DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); + if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) + break; + goto fail; + + case wordend: + DEBUG_PRINT1 ("EXECUTING wordend.\n"); + if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) + && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) + break; + goto fail; + +#ifdef emacs +#ifdef emacs19 + case before_dot: + DEBUG_PRINT1 ("EXECUTING before_dot.\n"); + if (PTR_CHAR_POS ((unsigned char *) d) >= point) + goto fail; + break; + + case at_dot: + DEBUG_PRINT1 ("EXECUTING at_dot.\n"); + if (PTR_CHAR_POS ((unsigned char *) d) != point) + goto fail; + break; + + case after_dot: + DEBUG_PRINT1 ("EXECUTING after_dot.\n"); + if (PTR_CHAR_POS ((unsigned char *) d) <= point) + goto fail; + break; +#else /* not emacs19 */ + case at_dot: + DEBUG_PRINT1 ("EXECUTING at_dot.\n"); + if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point) + goto fail; + break; +#endif /* not emacs19 */ + + case syntaxspec: + DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); + mcnt = *p++; + goto matchsyntax; + + case wordchar: + DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); + mcnt = (int) Sword; + matchsyntax: + PREFETCH (); + if (SYNTAX (*d++) != (enum syntaxcode) mcnt) + goto fail; + SET_REGS_MATCHED (); + break; + + case notsyntaxspec: + DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); + mcnt = *p++; + goto matchnotsyntax; + + case notwordchar: + DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); + mcnt = (int) Sword; + matchnotsyntax: + PREFETCH (); + if (SYNTAX (*d++) == (enum syntaxcode) mcnt) + goto fail; + SET_REGS_MATCHED (); + break; + +#else /* not emacs */ + case wordchar: + DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); + PREFETCH (); + if (!WORDCHAR_P (d)) + goto fail; + SET_REGS_MATCHED (); + d++; + break; + + case notwordchar: + DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); + PREFETCH (); + if (WORDCHAR_P (d)) + goto fail; + SET_REGS_MATCHED (); + d++; + break; +#endif /* not emacs */ + + default: + abort (); + } + continue; /* Successfully executed one pattern command; keep going. */ + + + /* We goto here if a matching operation fails. */ + fail: + if (!FAIL_STACK_EMPTY ()) + { /* A restart point is known. Restore to that state. */ + DEBUG_PRINT1 ("\nFAIL:\n"); + POP_FAILURE_POINT (d, p, + lowest_active_reg, highest_active_reg, + regstart, regend, reg_info); + + /* If this failure point is a dummy, try the next one. */ + if (!p) + goto fail; + + /* If we failed to the end of the pattern, don't examine *p. */ + assert (p <= pend); + if (p < pend) + { + boolean is_a_jump_n = false; + + /* If failed to a backwards jump that's part of a repetition + loop, need to pop this failure point and use the next one. */ + switch ((re_opcode_t) *p) + { + case jump_n: + is_a_jump_n = true; + case maybe_pop_jump: + case pop_failure_jump: + case jump: + p1 = p + 1; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + p1 += mcnt; + + if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) + || (!is_a_jump_n + && (re_opcode_t) *p1 == on_failure_jump)) + goto fail; + break; + default: + /* do nothing */ ; + } + } + + if (d >= string1 && d <= end1) + dend = end_match_1; + } + else + break; /* Matching at this starting point really fails. */ + } /* for (;;) */ + + if (best_regs_set) + goto restore_best_regs; + + FREE_VARIABLES (); + + return -1; /* Failure to match. */ +} /* re_match_2 */ + +/* Subroutine definitions for re_match_2. */ + + +/* We are passed P pointing to a register number after a start_memory. + + Return true if the pattern up to the corresponding stop_memory can + match the empty string, and false otherwise. + + If we find the matching stop_memory, sets P to point to one past its number. + Otherwise, sets P to an undefined byte less than or equal to END. + + We don't handle duplicates properly (yet). */ + +static boolean +group_match_null_string_p (p, end, reg_info) + unsigned char **p, *end; + register_info_type *reg_info; +{ + int mcnt; + /* Point to after the args to the start_memory. */ + unsigned char *p1 = *p + 2; + + while (p1 < end) + { + /* Skip over opcodes that can match nothing, and return true or + false, as appropriate, when we get to one that can't, or to the + matching stop_memory. */ + + switch ((re_opcode_t) *p1) + { + /* Could be either a loop or a series of alternatives. */ + case on_failure_jump: + p1++; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + + /* If the next operation is not a jump backwards in the + pattern. */ + + if (mcnt >= 0) + { + /* Go through the on_failure_jumps of the alternatives, + seeing if any of the alternatives cannot match nothing. + The last alternative starts with only a jump, + whereas the rest start with on_failure_jump and end + with a jump, e.g., here is the pattern for `a|b|c': + + /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 + /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 + /exactn/1/c + + So, we have to first go through the first (n-1) + alternatives and then deal with the last one separately. */ + + + /* Deal with the first (n-1) alternatives, which start + with an on_failure_jump (see above) that jumps to right + past a jump_past_alt. */ + + while ((re_opcode_t) p1[mcnt-3] == jump_past_alt) + { + /* `mcnt' holds how many bytes long the alternative + is, including the ending `jump_past_alt' and + its number. */ + + if (!alt_match_null_string_p (p1, p1 + mcnt - 3, + reg_info)) + return false; + + /* Move to right after this alternative, including the + jump_past_alt. */ + p1 += mcnt; + + /* Break if it's the beginning of an n-th alternative + that doesn't begin with an on_failure_jump. */ + if ((re_opcode_t) *p1 != on_failure_jump) + break; + + /* Still have to check that it's not an n-th + alternative that starts with an on_failure_jump. */ + p1++; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) + { + /* Get to the beginning of the n-th alternative. */ + p1 -= 3; + break; + } + } + + /* Deal with the last alternative: go back and get number + of the `jump_past_alt' just before it. `mcnt' contains + the length of the alternative. */ + EXTRACT_NUMBER (mcnt, p1 - 2); + + if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info)) + return false; + + p1 += mcnt; /* Get past the n-th alternative. */ + } /* if mcnt > 0 */ + break; + + + case stop_memory: + assert (p1[1] == **p); + *p = p1 + 2; + return true; + + + default: + if (!common_op_match_null_string_p (&p1, end, reg_info)) + return false; + } + } /* while p1 < end */ + + return false; +} /* group_match_null_string_p */ + + +/* Similar to group_match_null_string_p, but doesn't deal with alternatives: + It expects P to be the first byte of a single alternative and END one + byte past the last. The alternative can contain groups. */ + +static boolean +alt_match_null_string_p (p, end, reg_info) + unsigned char *p, *end; + register_info_type *reg_info; +{ + int mcnt; + unsigned char *p1 = p; + + while (p1 < end) + { + /* Skip over opcodes that can match nothing, and break when we get + to one that can't. */ + + switch ((re_opcode_t) *p1) + { + /* It's a loop. */ + case on_failure_jump: + p1++; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + p1 += mcnt; + break; + + default: + if (!common_op_match_null_string_p (&p1, end, reg_info)) + return false; + } + } /* while p1 < end */ + + return true; +} /* alt_match_null_string_p */ + + +/* Deals with the ops common to group_match_null_string_p and + alt_match_null_string_p. + + Sets P to one after the op and its arguments, if any. */ + +static boolean +common_op_match_null_string_p (p, end, reg_info) + unsigned char **p, *end; + register_info_type *reg_info; +{ + int mcnt; + boolean ret; + int reg_no; + unsigned char *p1 = *p; + + switch ((re_opcode_t) *p1++) + { + case no_op: + case begline: + case endline: + case begbuf: + case endbuf: + case wordbeg: + case wordend: + case wordbound: + case notwordbound: +#ifdef emacs + case before_dot: + case at_dot: + case after_dot: +#endif + break; + + case start_memory: + reg_no = *p1; + assert (reg_no > 0 && reg_no <= MAX_REGNUM); + ret = group_match_null_string_p (&p1, end, reg_info); + + /* Have to set this here in case we're checking a group which + contains a group and a back reference to it. */ + + if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) + REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; + + if (!ret) + return false; + break; + + /* If this is an optimized succeed_n for zero times, make the jump. */ + case jump: + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + if (mcnt >= 0) + p1 += mcnt; + else + return false; + break; + + case succeed_n: + /* Get to the number of times to succeed. */ + p1 += 2; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + + if (mcnt == 0) + { + p1 -= 4; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + p1 += mcnt; + } + else + return false; + break; + + case duplicate: + if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) + return false; + break; + + case set_number_at: + p1 += 4; + + default: + /* All other opcodes mean we cannot match the empty string. */ + return false; + } + + *p = p1; + return true; +} /* common_op_match_null_string_p */ + + +/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN + bytes; nonzero otherwise. */ + +static int +bcmp_translate( + unsigned char *s1, + unsigned char *s2, + int len, + char *translate +) +{ + register unsigned char *p1 = s1, *p2 = s2; + while (len) + { + if (translate[*p1++] != translate[*p2++]) return 1; + len--; + } + return 0; +} + +/* Entry points for GNU code. */ + +/* re_compile_pattern is the GNU regular expression compiler: it + compiles PATTERN (of length SIZE) and puts the result in BUFP. + Returns 0 if the pattern was valid, otherwise an error string. + + Assumes the `allocated' (and perhaps `buffer') and `translate' fields + are set in BUFP on entry. + + We call regex_compile to do the actual compilation. */ + +const char * +re_compile_pattern (pattern, length, bufp) + const char *pattern; + int length; + struct re_pattern_buffer *bufp; +{ + reg_errcode_t ret; + + /* GNU code is written to assume at least RE_NREGS registers will be set + (and at least one extra will be -1). */ + bufp->regs_allocated = REGS_UNALLOCATED; + + /* And GNU code determines whether or not to get register information + by passing null for the REGS argument to re_match, etc., not by + setting no_sub. */ + bufp->no_sub = 0; + + /* Match anchors at newline. */ + bufp->newline_anchor = 1; + + ret = regex_compile (pattern, length, re_syntax_options, bufp); + + return re_error_msg[(int) ret]; +} + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them if this is an Emacs or POSIX compilation. */ + +#if !defined (emacs) && !defined (_POSIX_SOURCE) + +/* BSD has one and only one pattern buffer. */ +static struct re_pattern_buffer re_comp_buf; + +char * +re_comp (s) + const char *s; +{ + reg_errcode_t ret; + + if (!s) + { + if (!re_comp_buf.buffer) + return "No previous regular expression"; + return 0; + } + + if (!re_comp_buf.buffer) + { + re_comp_buf.buffer = (unsigned char *) malloc (200); + if (re_comp_buf.buffer == NULL) + return "Memory exhausted"; + re_comp_buf.allocated = 200; + + re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); + if (re_comp_buf.fastmap == NULL) + return "Memory exhausted"; + } + + /* Since `re_exec' always passes NULL for the `regs' argument, we + don't need to initialize the pattern buffer fields which affect it. */ + + /* Match anchors at newlines. */ + re_comp_buf.newline_anchor = 1; + + ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); + + /* Yes, we're discarding `const' here. */ + return (char *) re_error_msg[(int) ret]; +} + + +int +re_exec (s) + const char *s; +{ + const int len = strlen (s); + return + 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); +} +#endif /* not emacs and not _POSIX_SOURCE */ + +/* POSIX.2 functions. Don't define these for Emacs. */ + +#ifndef emacs + +/* regcomp takes a regular expression as a string and compiles it. + + PREG is a regex_t *. We do not expect any fields to be initialized, + since POSIX says we shouldn't. Thus, we set + + `buffer' to the compiled pattern; + `used' to the length of the compiled pattern; + `syntax' to RE_SYNTAX_POSIX_EXTENDED if the + REG_EXTENDED bit in CFLAGS is set; otherwise, to + RE_SYNTAX_POSIX_BASIC; + `newline_anchor' to REG_NEWLINE being set in CFLAGS; + `fastmap' and `fastmap_accurate' to zero; + `re_nsub' to the number of subexpressions in PATTERN. + + PATTERN is the address of the pattern string. + + CFLAGS is a series of bits which affect compilation. + + If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we + use POSIX basic syntax. + + If REG_NEWLINE is set, then . and [^...] don't match newline. + Also, regexec will try a match beginning after every newline. + + If REG_ICASE is set, then we considers upper- and lowercase + versions of letters to be equivalent when matching. + + If REG_NOSUB is set, then when PREG is passed to regexec, that + routine will report only success or failure, and nothing about the + registers. + + It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for + the return codes and their meanings.) */ + +int +regcomp (preg, pattern, cflags) + regex_t *preg; + const char *pattern; + int cflags; +{ + reg_errcode_t ret; + unsigned syntax + = (cflags & REG_EXTENDED) ? + RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; + + /* regex_compile will allocate the space for the compiled pattern. */ + preg->buffer = 0; + preg->allocated = 0; + + /* Don't bother to use a fastmap when searching. This simplifies the + REG_NEWLINE case: if we used a fastmap, we'd have to put all the + characters after newlines into the fastmap. This way, we just try + every character. */ + preg->fastmap = 0; + + if (cflags & REG_ICASE) + { + unsigned i; + + preg->translate = (char *) malloc (CHAR_SET_SIZE); + if (preg->translate == NULL) + return (int) REG_ESPACE; + + /* Map uppercase characters to corresponding lowercase ones. */ + for (i = 0; i < CHAR_SET_SIZE; i++) + preg->translate[i] = ISUPPER (i) ? tolower (i) : i; + } + else + preg->translate = NULL; + + /* If REG_NEWLINE is set, newlines are treated differently. */ + if (cflags & REG_NEWLINE) + { /* REG_NEWLINE implies neither . nor [^...] match newline. */ + syntax &= ~RE_DOT_NEWLINE; + syntax |= RE_HAT_LISTS_NOT_NEWLINE; + /* It also changes the matching behavior. */ + preg->newline_anchor = 1; + } + else + preg->newline_anchor = 0; + + preg->no_sub = !!(cflags & REG_NOSUB); + + /* POSIX says a null character in the pattern terminates it, so we + can use strlen here in compiling the pattern. */ + ret = regex_compile (pattern, strlen (pattern), syntax, preg); + + /* POSIX doesn't distinguish between an unmatched open-group and an + unmatched close-group: both are REG_EPAREN. */ + if (ret == REG_ERPAREN) ret = REG_EPAREN; + + return (int) ret; +} + + +/* regexec searches for a given pattern, specified by PREG, in the + string STRING. + + If NMATCH is zero or REG_NOSUB was set in the cflags argument to + `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at + least NMATCH elements, and we set them to the offsets of the + corresponding matched substrings. + + EFLAGS specifies `execution flags' which affect matching: if + REG_NOTBOL is set, then ^ does not match at the beginning of the + string; if REG_NOTEOL is set, then $ does not match at the end. + + We return 0 if we find a match and REG_NOMATCH if not. */ + +int +regexec (preg, string, nmatch, pmatch, eflags) + const regex_t *preg; + const char *string; + size_t nmatch; + regmatch_t pmatch[]; + int eflags; +{ + int ret; + struct re_registers regs; + regex_t private_preg; + int len = strlen (string); + boolean want_reg_info = !preg->no_sub && nmatch > 0; + + private_preg = *preg; + + private_preg.not_bol = !!(eflags & REG_NOTBOL); + private_preg.not_eol = !!(eflags & REG_NOTEOL); + + /* The user has told us exactly how many registers to return + information about, via `nmatch'. We have to pass that on to the + matching routines. */ + private_preg.regs_allocated = REGS_FIXED; + + if (want_reg_info) + { + regs.num_regs = nmatch; + regs.start = TALLOC (nmatch, regoff_t); + regs.end = TALLOC (nmatch, regoff_t); + if (regs.start == NULL || regs.end == NULL) + return (int) REG_NOMATCH; + } + + /* Perform the searching operation. */ + ret = re_search (&private_preg, string, len, + /* start: */ 0, /* range: */ len, + want_reg_info ? ®s : (struct re_registers *) 0); + + /* Copy the register information to the POSIX structure. */ + if (want_reg_info) + { + if (ret >= 0) + { + unsigned r; + + for (r = 0; r < nmatch; r++) + { + pmatch[r].rm_so = regs.start[r]; + pmatch[r].rm_eo = regs.end[r]; + } + } + + /* If we needed the temporary register info, free the space now. */ + free (regs.start); + free (regs.end); + } + + /* We want zero return to mean success, unlike `re_search'. */ + return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; +} + + +/* Returns a message corresponding to an error code, ERRCODE, returned + from either regcomp or regexec. We don't use PREG here. */ + +size_t +regerror (errcode, preg, errbuf, errbuf_size) + int errcode; + const regex_t *preg; + char *errbuf; + size_t errbuf_size; +{ + const char *msg; + size_t msg_size; + + if (errcode < 0 + || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0]))) + /* Only error codes returned by the rest of the code should be passed + to this routine. If we are given anything else, or if other regex + code generates an invalid error code, then the program has a bug. + Dump core so we can fix it. */ + abort (); + + msg = re_error_msg[errcode]; + + /* POSIX doesn't require that we do anything in this case, but why + not be nice. */ + if (! msg) + msg = "Success"; + + msg_size = strlen (msg) + 1; /* Includes the null. */ + + if (errbuf_size != 0) + { + if (msg_size > errbuf_size) + { + strncpy (errbuf, msg, errbuf_size - 1); + errbuf[errbuf_size - 1] = 0; + } + else + strcpy (errbuf, msg); + } + + return msg_size; +} + + +/* Free dynamically allocated space used by PREG. */ + +void +regfree (preg) + regex_t *preg; +{ + if (preg->buffer != NULL) + free (preg->buffer); + preg->buffer = NULL; + + preg->allocated = 0; + preg->used = 0; + + if (preg->fastmap != NULL) + free (preg->fastmap); + preg->fastmap = NULL; + preg->fastmap_accurate = 0; + + if (preg->translate != NULL) + free (preg->translate); + preg->translate = NULL; +} + +#endif /* not emacs */ + +/* +Local variables: +make-backup-files: t +version-control: t +trim-versions-without-asking: nil +End: +*/ diff --git a/compat/regex.h b/compat/regex.h new file mode 100644 index 0000000000..6eb64f1402 --- /dev/null +++ b/compat/regex.h @@ -0,0 +1,490 @@ +/* Definitions for data structures and routines for the regular + expression library, version 0.12. + + Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef __REGEXP_LIBRARY_H__ +#define __REGEXP_LIBRARY_H__ + +/* POSIX says that must be included (by the caller) before + . */ + +#ifdef VMS +/* VMS doesn't have `size_t' in , even though POSIX says it + should be there. */ +#include +#endif + + +/* The following bits are used to determine the regexp syntax we + recognize. The set/not-set meanings are chosen so that Emacs syntax + remains the value 0. The bits are given in alphabetical order, and + the definitions shifted by one from the previous bit; thus, when we + add or remove a bit, only one other definition need change. */ +typedef unsigned reg_syntax_t; + +/* If this bit is not set, then \ inside a bracket expression is literal. + If set, then such a \ quotes the following character. */ +#define RE_BACKSLASH_ESCAPE_IN_LISTS (1) + +/* If this bit is not set, then + and ? are operators, and \+ and \? are + literals. + If set, then \+ and \? are operators and + and ? are literals. */ +#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) + +/* If this bit is set, then character classes are supported. They are: + [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], + [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. + If not set, then character classes are not supported. */ +#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) + +/* If this bit is set, then ^ and $ are always anchors (outside bracket + expressions, of course). + If this bit is not set, then it depends: + ^ is an anchor if it is at the beginning of a regular + expression or after an open-group or an alternation operator; + $ is an anchor if it is at the end of a regular expression, or + before a close-group or an alternation operator. + + This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because + POSIX draft 11.2 says that * etc. in leading positions is undefined. + We already implemented a previous draft which made those constructs + invalid, though, so we haven't changed the code back. */ +#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) + +/* If this bit is set, then special characters are always special + regardless of where they are in the pattern. + If this bit is not set, then special characters are special only in + some contexts; otherwise they are ordinary. Specifically, + * + ? and intervals are only special when not after the beginning, + open-group, or alternation operator. */ +#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) + +/* If this bit is set, then *, +, ?, and { cannot be first in an re or + immediately after an alternation or begin-group operator. */ +#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) + +/* If this bit is set, then . matches newline. + If not set, then it doesn't. */ +#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) + +/* If this bit is set, then . doesn't match NUL. + If not set, then it does. */ +#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) + +/* If this bit is set, nonmatching lists [^...] do not match newline. + If not set, they do. */ +#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) + +/* If this bit is set, either \{...\} or {...} defines an + interval, depending on RE_NO_BK_BRACES. + If not set, \{, \}, {, and } are literals. */ +#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) + +/* If this bit is set, +, ? and | aren't recognized as operators. + If not set, they are. */ +#define RE_LIMITED_OPS (RE_INTERVALS << 1) + +/* If this bit is set, newline is an alternation operator. + If not set, newline is literal. */ +#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) + +/* If this bit is set, then `{...}' defines an interval, and \{ and \} + are literals. + If not set, then `\{...\}' defines an interval. */ +#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) + +/* If this bit is set, (...) defines a group, and \( and \) are literals. + If not set, \(...\) defines a group, and ( and ) are literals. */ +#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) + +/* If this bit is set, then \ matches . + If not set, then \ is a back-reference. */ +#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) + +/* If this bit is set, then | is an alternation operator, and \| is literal. + If not set, then \| is an alternation operator, and | is literal. */ +#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) + +/* If this bit is set, then an ending range point collating higher + than the starting range point, as in [z-a], is invalid. + If not set, then when ending range point collates higher than the + starting range point, the range is ignored. */ +#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) + +/* If this bit is set, then an unmatched ) is ordinary. + If not set, then an unmatched ) is invalid. */ +#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) + +/* This global variable defines the particular regexp syntax to use (for + some interfaces). When a regexp is compiled, the syntax used is + stored in the pattern buffer, so changing this does not affect + already-compiled regexps. */ +extern reg_syntax_t re_syntax_options; + +/* Define combinations of the above bits for the standard possibilities. + (The [[[ comments delimit what gets put into the Texinfo file, so + don't delete them!) */ +/* [[[begin syntaxes]]] */ +#define RE_SYNTAX_EMACS 0 + +#define RE_SYNTAX_AWK \ + (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ + | RE_UNMATCHED_RIGHT_PAREN_ORD) + +#define RE_SYNTAX_POSIX_AWK \ + (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) + +#define RE_SYNTAX_GREP \ + (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ + | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ + | RE_NEWLINE_ALT) + +#define RE_SYNTAX_EGREP \ + (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ + | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ + | RE_NO_BK_VBAR) + +#define RE_SYNTAX_POSIX_EGREP \ + (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) + +/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ +#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC + +#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC + +/* Syntax bits common to both basic and extended POSIX regex syntax. */ +#define _RE_SYNTAX_POSIX_COMMON \ + (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ + | RE_INTERVALS | RE_NO_EMPTY_RANGES) + +#define RE_SYNTAX_POSIX_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) + +/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes + RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this + isn't minimal, since other operators, such as \`, aren't disabled. */ +#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) + +#define RE_SYNTAX_POSIX_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ + | RE_UNMATCHED_RIGHT_PAREN_ORD) + +/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS + replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ +#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) +/* [[[end syntaxes]]] */ + +/* Maximum number of duplicates an interval can allow. Some systems + (erroneously) define this in other header files, but we want our + value, so remove any previous define. */ +#ifdef RE_DUP_MAX +#undef RE_DUP_MAX +#endif +#define RE_DUP_MAX ((1 << 15) - 1) + + +/* POSIX `cflags' bits (i.e., information for `regcomp'). */ + +/* If this bit is set, then use extended regular expression syntax. + If not set, then use basic regular expression syntax. */ +#define REG_EXTENDED 1 + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +#define REG_ICASE (REG_EXTENDED << 1) + +/* If this bit is set, then anchors do not match at newline + characters in the string. + If not set, then anchors do match at newlines. */ +#define REG_NEWLINE (REG_ICASE << 1) + +/* If this bit is set, then report only success or fail in regexec. + If not set, then returns differ between not matching and errors. */ +#define REG_NOSUB (REG_NEWLINE << 1) + + +/* POSIX `eflags' bits (i.e., information for regexec). */ + +/* If this bit is set, then the beginning-of-line operator doesn't match + the beginning of the string (presumably because it's not the + beginning of a line). + If not set, then the beginning-of-line operator does match the + beginning of the string. */ +#define REG_NOTBOL 1 + +/* Like REG_NOTBOL, except for the end-of-line. */ +#define REG_NOTEOL (1 << 1) + + +/* If any error codes are removed, changed, or added, update the + `re_error_msg' table in regex.c. */ +typedef enum +{ + REG_NOERROR = 0, /* Success. */ + REG_NOMATCH, /* Didn't find a match (for regexec). */ + + /* POSIX regcomp return error codes. (In the order listed in the + standard.) */ + REG_BADPAT, /* Invalid pattern. */ + REG_ECOLLATE, /* Not implemented. */ + REG_ECTYPE, /* Invalid character class name. */ + REG_EESCAPE, /* Trailing backslash. */ + REG_ESUBREG, /* Invalid back reference. */ + REG_EBRACK, /* Unmatched left bracket. */ + REG_EPAREN, /* Parenthesis imbalance. */ + REG_EBRACE, /* Unmatched \{. */ + REG_BADBR, /* Invalid contents of \{\}. */ + REG_ERANGE, /* Invalid range end. */ + REG_ESPACE, /* Ran out of memory. */ + REG_BADRPT, /* No preceding re for repetition op. */ + + /* Error codes we've added. */ + REG_EEND, /* Premature end. */ + REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ + REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ +} reg_errcode_t; + +/* This data structure represents a compiled pattern. Before calling + the pattern compiler, the fields `buffer', `allocated', `fastmap', + `translate', and `no_sub' can be set. After the pattern has been + compiled, the `re_nsub' field is available. All other fields are + private to the regex routines. */ + +struct re_pattern_buffer +{ +/* [[[begin pattern_buffer]]] */ + /* Space that holds the compiled pattern. It is declared as + `unsigned char *' because its elements are + sometimes used as array indexes. */ + unsigned char *buffer; + + /* Number of bytes to which `buffer' points. */ + unsigned long allocated; + + /* Number of bytes actually used in `buffer'. */ + unsigned long used; + + /* Syntax setting with which the pattern was compiled. */ + reg_syntax_t syntax; + + /* Pointer to a fastmap, if any, otherwise zero. re_search uses + the fastmap, if there is one, to skip over impossible + starting points for matches. */ + char *fastmap; + + /* Either a translate table to apply to all characters before + comparing them, or zero for no translation. The translation + is applied to a pattern when it is compiled and to a string + when it is matched. */ + char *translate; + + /* Number of subexpressions found by the compiler. */ + size_t re_nsub; + + /* Zero if this pattern cannot match the empty string, one else. + Well, in truth it's used only in `re_search_2', to see + whether or not we should use the fastmap, so we don't set + this absolutely perfectly; see `re_compile_fastmap' (the + `duplicate' case). */ + unsigned can_be_null : 1; + + /* If REGS_UNALLOCATED, allocate space in the `regs' structure + for `max (RE_NREGS, re_nsub + 1)' groups. + If REGS_REALLOCATE, reallocate space if necessary. + If REGS_FIXED, use what's there. */ +#define REGS_UNALLOCATED 0 +#define REGS_REALLOCATE 1 +#define REGS_FIXED 2 + unsigned regs_allocated : 2; + + /* Set to zero when `regex_compile' compiles a pattern; set to one + by `re_compile_fastmap' if it updates the fastmap. */ + unsigned fastmap_accurate : 1; + + /* If set, `re_match_2' does not return information about + subexpressions. */ + unsigned no_sub : 1; + + /* If set, a beginning-of-line anchor doesn't match at the + beginning of the string. */ + unsigned not_bol : 1; + + /* Similarly for an end-of-line anchor. */ + unsigned not_eol : 1; + + /* If true, an anchor at a newline matches. */ + unsigned newline_anchor : 1; + +/* [[[end pattern_buffer]]] */ +}; + +typedef struct re_pattern_buffer regex_t; + + +/* search.c (search_buffer) in Emacs needs this one opcode value. It is + defined both in `regex.c' and here. */ +#define RE_EXACTN_VALUE 1 + +/* Type for byte offsets within the string. POSIX mandates this. */ +typedef int regoff_t; + + +/* This is the structure we store register match data in. See + regex.texinfo for a full description of what registers match. */ +struct re_registers +{ + unsigned num_regs; + regoff_t *start; + regoff_t *end; +}; + + +/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, + `re_match_2' returns information about at least this many registers + the first time a `regs' structure is passed. */ +#ifndef RE_NREGS +#define RE_NREGS 30 +#endif + + +/* POSIX specification for registers. Aside from the different names than + `re_registers', POSIX uses an array of structures, instead of a + structure of arrays. */ +typedef struct +{ + regoff_t rm_so; /* Byte offset from string's start to substring's start. */ + regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ +} regmatch_t; + +/* Declarations for routines. */ + +/* To avoid duplicating every routine declaration -- once with a + prototype (if we are ANSI), and once without (if we aren't) -- we + use the following macro to declare argument types. This + unfortunately clutters up the declarations a bit, but I think it's + worth it. */ + +#if __STDC__ + +#define _RE_ARGS(args) args + +#else /* not __STDC__ */ + +#define _RE_ARGS(args) () + +#endif /* not __STDC__ */ + +/* Sets the current default syntax to SYNTAX, and return the old syntax. + You can also simply assign to the `re_syntax_options' variable. */ +extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); + +/* Compile the regular expression PATTERN, with length LENGTH + and syntax given by the global `re_syntax_options', into the buffer + BUFFER. Return NULL if successful, and an error string if not. */ +extern const char *re_compile_pattern + _RE_ARGS ((const char *pattern, int length, + struct re_pattern_buffer *buffer)); + + +/* Compile a fastmap for the compiled pattern in BUFFER; used to + accelerate searches. Return 0 if successful and -2 if was an + internal error. */ +extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); + + +/* Search in the string STRING (with length LENGTH) for the pattern + compiled into BUFFER. Start searching at position START, for RANGE + characters. Return the starting position of the match, -1 for no + match, or -2 for an internal error. Also return register + information in REGS (if REGS and BUFFER->no_sub are nonzero). */ +extern int re_search + _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, + int length, int start, int range, struct re_registers *regs)); + + +/* Like `re_search', but search in the concatenation of STRING1 and + STRING2. Also, stop searching at index START + STOP. */ +extern int re_search_2 + _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, + int length1, const char *string2, int length2, + int start, int range, struct re_registers *regs, int stop)); + + +/* Like `re_search', but return how many characters in STRING the regexp + in BUFFER matched, starting at position START. */ +extern int re_match + _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, + int length, int start, struct re_registers *regs)); + + +/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ +extern int re_match_2 + _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, + int length1, const char *string2, int length2, + int start, struct re_registers *regs, int stop)); + + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using BUFFER and REGS will use this memory + for recording register information. STARTS and ENDS must be + allocated with malloc, and must each be at least `NUM_REGS * sizeof + (regoff_t)' bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ +extern void re_set_registers + _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, + unsigned num_regs, regoff_t *starts, regoff_t *ends)); + +/* 4.2 bsd compatibility. */ +extern char *re_comp _RE_ARGS ((const char *)); +extern int re_exec _RE_ARGS ((const char *)); + +/* POSIX compatibility. */ +extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags)); +extern int regexec + _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch, + regmatch_t pmatch[], int eflags)); +extern size_t regerror + _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf, + size_t errbuf_size)); +extern void regfree _RE_ARGS ((regex_t *preg)); + +#endif /* not __REGEXP_LIBRARY_H__ */ + +/* +Local variables: +make-backup-files: t +version-control: t +trim-versions-without-asking: nil +End: +*/ -- cgit v1.2.3 From 58eda02257a89df6e05f504bdf87e578d90f3b5e Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Sat, 1 Dec 2007 11:10:54 +0100 Subject: Compile some programs only conditionally. These programs depend on difficult to emulate POSIX functionality. On Windows, we won't compile them. Signed-off-by: Johannes Sixt --- Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index b003e3e60a..95572efee5 100644 --- a/Makefile +++ b/Makefile @@ -273,11 +273,9 @@ EXTRA_PROGRAMS = # ... and all the rest that could be moved out of bindir to gitexecdir PROGRAMS += $(EXTRA_PROGRAMS) -PROGRAMS += git-daemon$X PROGRAMS += git-fast-import$X PROGRAMS += git-fetch-pack$X PROGRAMS += git-hash-object$X -PROGRAMS += git-imap-send$X PROGRAMS += git-index-pack$X PROGRAMS += git-merge-index$X PROGRAMS += git-merge-tree$X @@ -773,6 +771,10 @@ ifdef ZLIB_PATH endif EXTLIBS += -lz +ifndef NO_POSIX_ONLY_PROGRAMS + PROGRAMS += git-daemon$X + PROGRAMS += git-imap-send$X +endif ifndef NO_OPENSSL OPENSSL_LIBSSL = -lssl ifdef OPENSSLDIR -- cgit v1.2.3 From f4626df51f63d53b89ff01de54cbf7558217ea2b Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Sat, 1 Dec 2007 21:24:59 +0100 Subject: Add target architecture MinGW. With this change GIT can be compiled and linked using MinGW. Builtins that only read the repository such as the log family and grep already work. Simple stubs are provided for a number of functions that the Windows C runtime does not offer. They will be completed in later patches. However, a fix for the snprintf/vsnprintf replacement is applied here to avoid buffer overflows. Dmitry Kakurin pointed out that access(..., X_OK) would always fails on Vista and suggested the -D__USE_MINGW_ACCESS workaround. Signed-off-by: Johannes Sixt --- Makefile | 28 ++++++++++++ compat/mingw.c | 57 +++++++++++++++++++++++ compat/mingw.h | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ compat/snprintf.c | 19 ++++++-- git-compat-util.h | 13 ++++-- 5 files changed, 244 insertions(+), 7 deletions(-) create mode 100644 compat/mingw.c create mode 100644 compat/mingw.h diff --git a/Makefile b/Makefile index 95572efee5..a7dafa7586 100644 --- a/Makefile +++ b/Makefile @@ -335,6 +335,7 @@ LIB_H += builtin.h LIB_H += cache.h LIB_H += cache-tree.h LIB_H += commit.h +LIB_H += compat/mingw.h LIB_H += csum-file.h LIB_H += decorate.h LIB_H += delta.h @@ -711,6 +712,33 @@ ifeq ($(uname_S),HP-UX) NO_HSTRERROR = YesPlease NO_SYS_SELECT_H = YesPlease endif +ifneq (,$(findstring MINGW,$(uname_S))) + NO_MMAP = YesPlease + NO_PREAD = YesPlease + NO_OPENSSL = YesPlease + NO_CURL = YesPlease + NO_SYMLINK_HEAD = YesPlease + NO_IPV6 = YesPlease + NO_SETENV = YesPlease + NO_UNSETENV = YesPlease + NO_STRCASESTR = YesPlease + NO_STRLCPY = YesPlease + NO_MEMMEM = YesPlease + NEEDS_LIBICONV = YesPlease + OLD_ICONV = YesPlease + NO_C99_FORMAT = YesPlease + NO_STRTOUMAX = YesPlease + NO_MKDTEMP = YesPlease + SNPRINTF_RETURNS_BOGUS = YesPlease + NO_SVN_TESTS = YesPlease + NO_PERL_MAKEMAKER = YesPlease + NO_POSIX_ONLY_PROGRAMS = YesPlease + COMPAT_CFLAGS += -D__USE_MINGW_ACCESS -DNOGDI -Icompat + COMPAT_CFLAGS += -DSNPRINTF_SIZE_CORR=1 + COMPAT_OBJS += compat/mingw.o compat/fnmatch.o compat/regex.o + EXTLIBS += -lws2_32 + X = .exe +endif ifneq (,$(findstring arm,$(uname_M))) ARM_SHA1 = YesPlease endif diff --git a/compat/mingw.c b/compat/mingw.c new file mode 100644 index 0000000000..075448d245 --- /dev/null +++ b/compat/mingw.c @@ -0,0 +1,57 @@ +#include "../git-compat-util.h" + +unsigned int _CRT_fmode = _O_BINARY; + +unsigned int sleep (unsigned int seconds) +{ + Sleep(seconds*1000); + return 0; +} + +int mkstemp(char *template) +{ + char *filename = mktemp(template); + if (filename == NULL) + return -1; + return open(filename, O_RDWR | O_CREAT, 0600); +} + +int gettimeofday(struct timeval *tv, void *tz) +{ + return -1; +} + +int poll(struct pollfd *ufds, unsigned int nfds, int timeout) +{ + return -1; +} + +struct tm *gmtime_r(const time_t *timep, struct tm *result) +{ + /* gmtime() in MSVCRT.DLL is thread-safe, but not reentrant */ + memcpy(result, gmtime(timep), sizeof(struct tm)); + return result; +} + +struct tm *localtime_r(const time_t *timep, struct tm *result) +{ + /* localtime() in MSVCRT.DLL is thread-safe, but not reentrant */ + memcpy(result, localtime(timep), sizeof(struct tm)); + return result; +} + +struct passwd *getpwuid(int uid) +{ + static struct passwd p; + return &p; +} + +int setitimer(int type, struct itimerval *in, struct itimerval *out) +{ + return -1; +} + +int sigaction(int sig, struct sigaction *in, struct sigaction *out) +{ + return -1; +} diff --git a/compat/mingw.h b/compat/mingw.h new file mode 100644 index 0000000000..e5c0c6ba21 --- /dev/null +++ b/compat/mingw.h @@ -0,0 +1,134 @@ +#include + +/* + * things that are not available in header files + */ + +typedef int pid_t; +#define hstrerror strerror + +#define S_IFLNK 0120000 /* Symbolic link */ +#define S_ISLNK(x) (((x) & S_IFMT) == S_IFLNK) +#define S_ISSOCK(x) 0 +#define S_IRGRP 0 +#define S_IWGRP 0 +#define S_IXGRP 0 +#define S_ISGID 0 +#define S_IROTH 0 +#define S_IXOTH 0 + +#define WIFEXITED(x) ((unsigned)(x) < 259) /* STILL_ACTIVE */ +#define WEXITSTATUS(x) ((x) & 0xff) +#define WIFSIGNALED(x) ((unsigned)(x) > 259) + +#define SIGKILL 0 +#define SIGCHLD 0 +#define SIGPIPE 0 +#define SIGHUP 0 +#define SIGQUIT 0 +#define SIGALRM 100 + +#define F_GETFD 1 +#define F_SETFD 2 +#define FD_CLOEXEC 0x1 + +struct passwd { + char *pw_name; + char *pw_gecos; + char *pw_dir; +}; + +struct pollfd { + int fd; /* file descriptor */ + short events; /* requested events */ + short revents; /* returned events */ +}; +#define POLLIN 1 +#define POLLHUP 2 + +typedef void (__cdecl *sig_handler_t)(int); +struct sigaction { + sig_handler_t sa_handler; + unsigned sa_flags; +}; +#define sigemptyset(x) (void)0 +#define SA_RESTART 0 + +struct itimerval { + struct timeval it_value, it_interval; +}; +#define ITIMER_REAL 0 + +#define st_blocks st_size/512 /* will be cleaned up later */ +#define lstat stat + +/* + * trivial stubs + */ + +static inline int readlink(const char *path, char *buf, size_t bufsiz) +{ errno = ENOSYS; return -1; } +static inline int symlink(const char *oldpath, const char *newpath) +{ errno = ENOSYS; return -1; } +static inline int link(const char *oldpath, const char *newpath) +{ errno = ENOSYS; return -1; } +static inline int fchmod(int fildes, mode_t mode) +{ errno = ENOSYS; return -1; } +static inline int fork(void) +{ errno = ENOSYS; return -1; } +static inline unsigned int alarm(unsigned int seconds) +{ return 0; } +static inline int fsync(int fd) +{ return 0; } +static inline int getppid(void) +{ return 1; } +static inline void sync(void) +{} +static inline int getuid() +{ return 1; } +static inline struct passwd *getpwnam(const char *name) +{ return NULL; } +static inline int fcntl(int fd, int cmd, long arg) +{ + if (cmd == F_GETFD || cmd == F_SETFD) + return 0; + errno = EINVAL; + return -1; +} + +/* + * simple adaptors + */ + +static inline int mingw_mkdir(const char *path, int mode) +{ + return mkdir(path); +} +#define mkdir mingw_mkdir + +static inline int waitpid(pid_t pid, unsigned *status, unsigned options) +{ + if (options == 0) + return _cwait(status, pid, 0); + errno = EINVAL; + return -1; +} + + +static inline int pipe(int filedes[2]) +{ return _pipe(filedes, 8192, 0); } + +/* + * implementations of missing functions + */ + +unsigned int sleep (unsigned int seconds); +int mkstemp(char *template); +int gettimeofday(struct timeval *tv, void *tz); +int poll(struct pollfd *ufds, unsigned int nfds, int timeout); +struct tm *gmtime_r(const time_t *timep, struct tm *result); +struct tm *localtime_r(const time_t *timep, struct tm *result); +int getpagesize(void); /* defined in MinGW's libgcc.a */ +struct passwd *getpwuid(int uid); +int setitimer(int type, struct itimerval *in, struct itimerval *out); +int sigaction(int sig, struct sigaction *in, struct sigaction *out); diff --git a/compat/snprintf.c b/compat/snprintf.c index dbfc2d6b6e..580966e56a 100644 --- a/compat/snprintf.c +++ b/compat/snprintf.c @@ -1,12 +1,25 @@ #include "../git-compat-util.h" +/* + * The size parameter specifies the available space, i.e. includes + * the trailing NUL byte; but Windows's vsnprintf expects the + * number of characters to write without the trailing NUL. + */ +#ifndef SNPRINTF_SIZE_CORR +#define SNPRINTF_SIZE_CORR 0 +#endif + #undef vsnprintf int git_vsnprintf(char *str, size_t maxsize, const char *format, va_list ap) { char *s; - int ret; + int ret = -1; - ret = vsnprintf(str, maxsize, format, ap); + if (maxsize > 0) { + ret = vsnprintf(str, maxsize-SNPRINTF_SIZE_CORR, format, ap); + /* Windows does not NUL-terminate if result fills buffer */ + str[maxsize-1] = 0; + } if (ret != -1) return ret; @@ -20,7 +33,7 @@ int git_vsnprintf(char *str, size_t maxsize, const char *format, va_list ap) if (! str) break; s = str; - ret = vsnprintf(str, maxsize, format, ap); + ret = vsnprintf(str, maxsize-SNPRINTF_SIZE_CORR, format, ap); } free(s); return ret; diff --git a/git-compat-util.h b/git-compat-util.h index c04e8baa87..da7249ac1c 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -63,17 +63,18 @@ #include #include #include -#include #include +#include +#include +#include +#ifndef __MINGW32__ +#include #include #include #include -#include #ifndef NO_SYS_SELECT_H #include #endif -#include -#include #include #include #include @@ -89,6 +90,10 @@ #include #define _ALL_SOURCE 1 #endif +#else /* __MINGW32__ */ +/* pull in Windows compatibility stuff */ +#include "compat/mingw.h" +#endif /* __MINGW32__ */ #ifndef NO_ICONV #include -- cgit v1.2.3 From 80ba074f4163dc8ee4232d64e73a8521edcadc1d Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Mon, 3 Dec 2007 21:55:57 +0100 Subject: Windows: Use the Windows style PATH separator ';'. Signed-off-by: Johannes Sixt --- Documentation/git.txt | 6 +++--- compat/mingw.h | 6 ++++++ exec_cmd.c | 2 +- git-compat-util.h | 4 ++++ sha1_file.c | 2 +- 5 files changed, 15 insertions(+), 5 deletions(-) diff --git a/Documentation/git.txt b/Documentation/git.txt index 7414238fe5..4e4bd6ddb1 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -409,9 +409,9 @@ git so take care if using Cogito etc. 'GIT_ALTERNATE_OBJECT_DIRECTORIES':: Due to the immutable nature of git objects, old objects can be archived into shared, read-only directories. This variable - specifies a ":" separated list of git object directories which - can be used to search for git objects. New objects will not be - written to these directories. + specifies a ":" separated (on Windows ";" separated) list + of git object directories which can be used to search for git + objects. New objects will not be written to these directories. 'GIT_DIR':: If the 'GIT_DIR' environment variable is set then it diff --git a/compat/mingw.h b/compat/mingw.h index e5c0c6ba21..22aae0077e 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -132,3 +132,9 @@ int getpagesize(void); /* defined in MinGW's libgcc.a */ struct passwd *getpwuid(int uid); int setitimer(int type, struct itimerval *in, struct itimerval *out); int sigaction(int sig, struct sigaction *in, struct sigaction *out); + +/* + * git specific compatibility + */ + +#define PATH_SEP ';' diff --git a/exec_cmd.c b/exec_cmd.c index e189caca62..a1bc4e04bf 100644 --- a/exec_cmd.c +++ b/exec_cmd.c @@ -37,7 +37,7 @@ static void add_path(struct strbuf *out, const char *path) else strbuf_addstr(out, make_absolute_path(path)); - strbuf_addch(out, ':'); + strbuf_addch(out, PATH_SEP); } } diff --git a/git-compat-util.h b/git-compat-util.h index da7249ac1c..9dc733e56c 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -110,6 +110,10 @@ #define PRIuMAX "llu" #endif +#ifndef PATH_SEP +#define PATH_SEP ':' +#endif + #ifdef __GNUC__ #define NORETURN __attribute__((__noreturn__)) #else diff --git a/sha1_file.c b/sha1_file.c index 191f814e09..6f004ffd09 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -392,7 +392,7 @@ void prepare_alt_odb(void) if (!alt) alt = ""; alt_odb_tail = &alt_odb_list; - link_alt_odb_entries(alt, alt + strlen(alt), ':', NULL, 0); + link_alt_odb_entries(alt, alt + strlen(alt), PATH_SEP, NULL, 0); read_info_alternates(get_object_directory(), 0); } -- cgit v1.2.3 From 4cd148d83f852363363e921c4925e67601654ff6 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Sat, 1 Mar 2008 21:11:14 +0100 Subject: setup.c: Prepare for Windows directory separators. This turns two switch/case statements into an if-else-if cascade because we later do not want to have case '/': #ifdef __MINGW32__ case '\\': #endif but use a predicate is_dir_sep(foo) in order to check for the directory separator. Signed-off-by: Johannes Sixt --- setup.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/setup.c b/setup.c index d630e374e7..5fc89fd7da 100644 --- a/setup.c +++ b/setup.c @@ -26,24 +26,21 @@ static int sanitary_path_copy(char *dst, const char *src) * (4) "../" -- strip one, eat slash and continue. */ if (c == '.') { - switch (src[1]) { - case '\0': + if (!src[1]) { /* (1) */ src++; - break; - case '/': + } else if (src[1] == '/') { /* (2) */ src += 2; while (*src == '/') src++; continue; - case '.': - switch (src[2]) { - case '\0': + } else if (src[1] == '.') { + if (!src[2]) { /* (3) */ src += 2; goto up_one; - case '/': + } else if (src[2] == '/') { /* (4) */ src += 3; while (*src == '/') -- cgit v1.2.3 From 25fe217b86ca40c53e710d776e120dfa0d81f60b Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Wed, 5 Mar 2008 21:51:27 +0100 Subject: Windows: Treat Windows style path names. GIT's guts work with a forward slash as a path separators. We do not change that. Rather we make sure that only "normalized" paths enter the depths of the machinery. We have to translate backslashes to forward slashes in the prefix and in command line arguments. Fortunately, all of them are passed through functions in setup.c. A macro has_dos_drive_path() is defined that checks whether a path begins with a drive letter+colon combination. This predicate is always false on Unix. Another macro is_dir_sep() abstracts that a backslash is also a directory separator on Windows. Signed-off-by: Johannes Sixt --- cache.h | 2 +- compat/mingw.c | 13 +++++++++++++ compat/mingw.h | 9 +++++++++ git-compat-util.h | 8 ++++++++ setup.c | 50 ++++++++++++++++++++++++++++++++++++-------------- 5 files changed, 67 insertions(+), 15 deletions(-) diff --git a/cache.h b/cache.h index 81b7e17de2..72544de4ce 100644 --- a/cache.h +++ b/cache.h @@ -521,7 +521,7 @@ int safe_create_leading_directories(char *path); char *enter_repo(char *path, int strict); static inline int is_absolute_path(const char *path) { - return path[0] == '/'; + return path[0] == '/' || has_dos_drive_prefix(path); } const char *make_absolute_path(const char *path); const char *make_nonrelative_path(const char *path); diff --git a/compat/mingw.c b/compat/mingw.c index 075448d245..4e559bdc93 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -40,6 +40,19 @@ struct tm *localtime_r(const time_t *timep, struct tm *result) return result; } +#undef getcwd +char *mingw_getcwd(char *pointer, int len) +{ + int i; + char *ret = getcwd(pointer, len); + if (!ret) + return ret; + for (i = 0; pointer[i]; i++) + if (pointer[i] == '\\') + pointer[i] = '/'; + return ret; +} + struct passwd *getpwuid(int uid) { static struct passwd p; diff --git a/compat/mingw.h b/compat/mingw.h index 22aae0077e..a954014ff2 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -133,8 +133,17 @@ struct passwd *getpwuid(int uid); int setitimer(int type, struct itimerval *in, struct itimerval *out); int sigaction(int sig, struct sigaction *in, struct sigaction *out); +/* + * replacements of existing functions + */ + +char *mingw_getcwd(char *pointer, int len); +#define getcwd mingw_getcwd + /* * git specific compatibility */ +#define has_dos_drive_prefix(path) (isalpha(*(path)) && (path)[1] == ':') +#define is_dir_sep(c) ((c) == '/' || (c) == '\\') #define PATH_SEP ';' diff --git a/git-compat-util.h b/git-compat-util.h index 9dc733e56c..ab762c79ee 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -114,6 +114,14 @@ #define PATH_SEP ':' #endif +#ifndef has_dos_drive_prefix +#define has_dos_drive_prefix(path) 0 +#endif + +#ifndef is_dir_sep +#define is_dir_sep(c) ((c) == '/') +#endif + #ifdef __GNUC__ #define NORETURN __attribute__((__noreturn__)) #else diff --git a/setup.c b/setup.c index 5fc89fd7da..8bb7b10174 100644 --- a/setup.c +++ b/setup.c @@ -6,11 +6,17 @@ static int inside_work_tree = -1; static int sanitary_path_copy(char *dst, const char *src) { - char *dst0 = dst; + char *dst0; - if (*src == '/') { + if (has_dos_drive_prefix(src)) { + *dst++ = *src++; + *dst++ = *src++; + } + dst0 = dst; + + if (is_dir_sep(*src)) { *dst++ = '/'; - while (*src == '/') + while (is_dir_sep(*src)) src++; } @@ -29,10 +35,10 @@ static int sanitary_path_copy(char *dst, const char *src) if (!src[1]) { /* (1) */ src++; - } else if (src[1] == '/') { + } else if (is_dir_sep(src[1])) { /* (2) */ src += 2; - while (*src == '/') + while (is_dir_sep(*src)) src++; continue; } else if (src[1] == '.') { @@ -40,10 +46,10 @@ static int sanitary_path_copy(char *dst, const char *src) /* (3) */ src += 2; goto up_one; - } else if (src[2] == '/') { + } else if (is_dir_sep(src[2])) { /* (4) */ src += 3; - while (*src == '/') + while (is_dir_sep(*src)) src++; goto up_one; } @@ -51,11 +57,11 @@ static int sanitary_path_copy(char *dst, const char *src) } /* copy up to the next '/', and eat all '/' */ - while ((c = *src++) != '\0' && c != '/') + while ((c = *src++) != '\0' && !is_dir_sep(c)) *dst++ = c; - if (c == '/') { - *dst++ = c; - while (c == '/') + if (is_dir_sep(c)) { + *dst++ = '/'; + while (is_dir_sep(c)) c = *src++; src--; } else if (!c) @@ -74,7 +80,7 @@ static int sanitary_path_copy(char *dst, const char *src) if (dst <= dst0) break; c = *dst--; - if (c == '/') { + if (c == '/') { /* MinGW: cannot be '\\' anymore */ dst += 2; break; } @@ -123,10 +129,23 @@ const char *prefix_path(const char *prefix, int len, const char *path) const char *prefix_filename(const char *pfx, int pfx_len, const char *arg) { static char path[PATH_MAX]; +#ifndef __MINGW32__ if (!pfx || !*pfx || is_absolute_path(arg)) return arg; memcpy(path, pfx, pfx_len); strcpy(path + pfx_len, arg); +#else + char *p; + /* don't add prefix to absolute paths, but still replace '\' by '/' */ + if (is_absolute_path(arg)) + pfx_len = 0; + else + memcpy(path, pfx, pfx_len); + strcpy(path + pfx_len, arg); + for (p = path + pfx_len; *p; p++) + if (*p == '\\') + *p = '/'; +#endif return path; } @@ -360,6 +379,7 @@ const char *setup_git_directory_gently(int *nongit_ok) const char *gitdirenv; const char *gitfile_dir; int len, offset; + int minoffset = 0; /* * Let's assume that we are in a git repository. @@ -410,6 +430,8 @@ const char *setup_git_directory_gently(int *nongit_ok) if (!getcwd(cwd, sizeof(cwd)-1)) die("Unable to read current working directory"); + if (has_dos_drive_prefix(cwd)) + minoffset = 2; /* * Test in the following order (relative to the cwd): @@ -442,7 +464,7 @@ const char *setup_git_directory_gently(int *nongit_ok) } chdir(".."); do { - if (!offset) { + if (offset <= minoffset) { if (nongit_ok) { if (chdir(cwd)) die("Cannot come back to cwd"); @@ -451,7 +473,7 @@ const char *setup_git_directory_gently(int *nongit_ok) } die("Not a git repository"); } - } while (cwd[--offset] != '/'); + } while (offset > minoffset && cwd[--offset] != '/'); } inside_git_dir = 0; -- cgit v1.2.3 From 8385abfda533819be9fbec436230ccd7be4bcda8 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Fri, 30 Nov 2007 21:36:00 +0100 Subject: Windows: Handle absolute paths in safe_create_leading_directories(). In this function we must be careful to handle drive-local paths else there is a danger that it runs into an infinite loop. Signed-off-by: Johannes Sixt --- sha1_file.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sha1_file.c b/sha1_file.c index 6f004ffd09..0a54eed761 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -83,14 +83,18 @@ int get_sha1_hex(const char *hex, unsigned char *sha1) return 0; } +static inline int offset_1st_component(const char *path) +{ + if (has_dos_drive_prefix(path)) + return 2 + (path[2] == '/'); + return *path == '/'; +} + int safe_create_leading_directories(char *path) { - char *pos = path; + char *pos = path + offset_1st_component(path); struct stat st; - if (is_absolute_path(path)) - pos++; - while (pos) { pos = strchr(pos, '/'); if (!pos) -- cgit v1.2.3 From 23326d14edbd16469453f6c3ecdd98ab90e6efb7 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Sat, 8 Dec 2007 20:57:25 +0100 Subject: Windows: Strip ".exe" from the program name. Before we can successfully parse a builtin command from the program name we must strip off unneeded parts, that is, the file extension. Furthermore, we must take Windows style path names into account when we parse the program name. Signed-off-by: Johannes Sixt --- Makefile | 1 + git-compat-util.h | 4 ++++ git.c | 19 ++++++++++++++++--- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index a7dafa7586..9c5aae03bb 100644 --- a/Makefile +++ b/Makefile @@ -735,6 +735,7 @@ ifneq (,$(findstring MINGW,$(uname_S))) NO_POSIX_ONLY_PROGRAMS = YesPlease COMPAT_CFLAGS += -D__USE_MINGW_ACCESS -DNOGDI -Icompat COMPAT_CFLAGS += -DSNPRINTF_SIZE_CORR=1 + COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\" COMPAT_OBJS += compat/mingw.o compat/fnmatch.o compat/regex.o EXTLIBS += -lws2_32 X = .exe diff --git a/git-compat-util.h b/git-compat-util.h index ab762c79ee..a9a85be89d 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -114,6 +114,10 @@ #define PATH_SEP ':' #endif +#ifndef STRIP_EXTENSION +#define STRIP_EXTENSION "" +#endif + #ifndef has_dos_drive_prefix #define has_dos_drive_prefix(path) 0 #endif diff --git a/git.c b/git.c index 59f0fcc1f2..871b93ca7e 100644 --- a/git.c +++ b/git.c @@ -369,6 +369,16 @@ static void handle_internal_command(int argc, const char **argv) { "pack-refs", cmd_pack_refs, RUN_SETUP }, }; int i; + static const char ext[] = STRIP_EXTENSION; + + if (sizeof(ext) > 1) { + i = strlen(argv[0]) - strlen(ext); + if (i > 0 && !strcmp(argv[0] + i, ext)) { + char *argv0 = strdup(argv[0]); + argv[0] = cmd = argv0; + argv0[i] = '\0'; + } + } /* Turn "git cmd --help" into "git help cmd" */ if (argc > 1 && !strcmp(argv[1], "--help")) { @@ -386,8 +396,8 @@ static void handle_internal_command(int argc, const char **argv) int main(int argc, const char **argv) { - const char *cmd = argv[0] ? argv[0] : "git-help"; - char *slash = strrchr(cmd, '/'); + const char *cmd = argv[0] && *argv[0] ? argv[0] : "git-help"; + char *slash = (char *)cmd + strlen(cmd); const char *cmd_path = NULL; int done_alias = 0; @@ -396,7 +406,10 @@ int main(int argc, const char **argv) * name, and the dirname as the default exec_path * if we don't have anything better. */ - if (slash) { + do + --slash; + while (cmd <= slash && !is_dir_sep(*slash)); + if (cmd <= slash) { *slash++ = 0; cmd_path = cmd; cmd = slash; -- cgit v1.2.3 From 3e4a1ba07b65506c36f7f40fa76fcee26c400a5c Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Thu, 15 Nov 2007 22:22:47 +0100 Subject: Windows: Implement a wrapper of the open() function. The wrapper does two things: - Requests to open /dev/null are redirected to open the nul pseudo file. - A request to open a file that currently exists as a directory on Windows fails with EACCES; this is changed to EISDIR. Signed-off-by: Johannes Sixt --- compat/mingw.c | 20 ++++++++++++++++++++ compat/mingw.h | 3 +++ 2 files changed, 23 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index 4e559bdc93..f869999a5d 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2,6 +2,26 @@ unsigned int _CRT_fmode = _O_BINARY; +#undef open +int mingw_open (const char *filename, int oflags, ...) +{ + va_list args; + unsigned mode; + va_start(args, oflags); + mode = va_arg(args, int); + va_end(args); + + if (!strcmp(filename, "/dev/null")) + filename = "nul"; + int fd = open(filename, oflags, mode); + if (fd < 0 && (oflags & O_CREAT) && errno == EACCES) { + DWORD attrs = GetFileAttributes(filename); + if (attrs != INVALID_FILE_ATTRIBUTES && (attrs & FILE_ATTRIBUTE_DIRECTORY)) + errno = EISDIR; + } + return fd; +} + unsigned int sleep (unsigned int seconds) { Sleep(seconds*1000); diff --git a/compat/mingw.h b/compat/mingw.h index a954014ff2..901cfa7c82 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -137,6 +137,9 @@ int sigaction(int sig, struct sigaction *in, struct sigaction *out); * replacements of existing functions */ +int mingw_open (const char *filename, int oflags, ...); +#define open mingw_open + char *mingw_getcwd(char *pointer, int len); #define getcwd mingw_getcwd -- cgit v1.2.3 From f7597acac069bbaad8887cb315a4e5420222971a Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Sat, 1 Dec 2007 22:09:17 +0100 Subject: Windows: A minimal implemention of getpwuid(). getpwuid() is implemented just enough that GIT does not issue errors. Since the information that it returns is not very useful, users are required to set up user.name and user.email configuration. All uses of getpwuid() are like getpwuid(getuid()), hence, the return value of getuid() is irrelevant and the uid parameter is not even looked at. Side note: getpwnam() is only used to resolve '~' and '~username' paths, which is an idiom not known on Windows, hence, we don't implement it. Signed-off-by: Johannes Sixt --- compat/mingw.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index f869999a5d..0e1ddbe4df 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -75,7 +75,15 @@ char *mingw_getcwd(char *pointer, int len) struct passwd *getpwuid(int uid) { + static char user_name[100]; static struct passwd p; + + DWORD len = sizeof(user_name); + if (!GetUserName(user_name, &len)) + return NULL; + p.pw_name = user_name; + p.pw_gecos = "unknown"; + p.pw_dir = NULL; return &p; } -- cgit v1.2.3 From 132a6e903fd912d02392db55c407267103766f19 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 23 Jan 2007 13:39:09 +0100 Subject: Windows: always chmod(, 0666) before unlink(). On Windows, read-only files cannot be deleted. To make sure that deletion does not fail because of this, always call chmod() before unlink(). Signed-off-by: Johannes Schindelin Signed-off-by: Johannes Sixt --- compat/mingw.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/compat/mingw.h b/compat/mingw.h index 901cfa7c82..95a08b4128 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -106,6 +106,14 @@ static inline int mingw_mkdir(const char *path, int mode) } #define mkdir mingw_mkdir +static inline int mingw_unlink(const char *pathname) +{ + /* read-only files cannot be removed */ + chmod(pathname, 0666); + return unlink(pathname); +} +#define unlink mingw_unlink + static inline int waitpid(pid_t pid, unsigned *status, unsigned options) { if (options == 0) -- cgit v1.2.3 From ea9e98c3a54d3b693367fbb8c87812b17ad95869 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Fri, 7 Dec 2007 22:19:40 +0100 Subject: Windows: Work around misbehaved rename(). Windows's rename() is based on the MoveFile() API, which fails if the destination exists. Here we work around the problem by using MoveFileEx(). Furthermore, the posixly correct error is returned if the destination is a directory. The implementation is still slightly incomplete, however, because of the missing error code translation: We assume that the failure is due to permissions. Signed-off-by: Johannes Sixt --- compat/mingw.c | 25 +++++++++++++++++++++++++ compat/mingw.h | 3 +++ 2 files changed, 28 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index 0e1ddbe4df..c6a5c1b218 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -73,6 +73,31 @@ char *mingw_getcwd(char *pointer, int len) return ret; } +#undef rename +int mingw_rename(const char *pold, const char *pnew) +{ + /* + * Try native rename() first to get errno right. + * It is based on MoveFile(), which cannot overwrite existing files. + */ + if (!rename(pold, pnew)) + return 0; + if (errno != EEXIST) + return -1; + if (MoveFileEx(pold, pnew, MOVEFILE_REPLACE_EXISTING)) + return 0; + /* TODO: translate more errors */ + if (GetLastError() == ERROR_ACCESS_DENIED) { + DWORD attrs = GetFileAttributes(pnew); + if (attrs != INVALID_FILE_ATTRIBUTES && (attrs & FILE_ATTRIBUTE_DIRECTORY)) { + errno = EISDIR; + return -1; + } + } + errno = EACCES; + return -1; +} + struct passwd *getpwuid(int uid) { static char user_name[100]; diff --git a/compat/mingw.h b/compat/mingw.h index 95a08b4128..46fd8da06e 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -151,6 +151,9 @@ int mingw_open (const char *filename, int oflags, ...); char *mingw_getcwd(char *pointer, int len); #define getcwd mingw_getcwd +int mingw_rename(const char*, const char*); +#define rename mingw_rename + /* * git specific compatibility */ -- cgit v1.2.3 From bb5799d6ef3ad7ee70a138b82e67b93e3aaa8017 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Mon, 23 Jun 2008 08:31:41 +0200 Subject: Make my_mktime() public and rename it to tm_to_time_t() We will use it from the MinGW port's gettimeofday() substitution. Signed-off-by: Johannes Sixt --- date.c | 13 ++++++++----- git-compat-util.h | 1 + 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/date.c b/date.c index 1a4eb87b01..35a52576c5 100644 --- a/date.c +++ b/date.c @@ -6,7 +6,10 @@ #include "cache.h" -static time_t my_mktime(struct tm *tm) +/* + * This is like mktime, but without normalization of tm_wday and tm_yday. + */ +time_t tm_to_time_t(const struct tm *tm) { static const int mdays[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 @@ -67,7 +70,7 @@ static int local_tzoffset(unsigned long time) t = time; localtime_r(&t, &tm); - t_local = my_mktime(&tm); + t_local = tm_to_time_t(&tm); if (t_local < t) { eastwest = -1; @@ -322,7 +325,7 @@ static int is_date(int year, int month, int day, struct tm *now_tm, time_t now, if (!now_tm) return 1; - specified = my_mktime(r); + specified = tm_to_time_t(r); /* Be it commit time or author time, it does not make * sense to specify timestamp way into the future. Make @@ -572,7 +575,7 @@ int parse_date(const char *date, char *result, int maxlen) } /* mktime uses local timezone */ - then = my_mktime(&tm); + then = tm_to_time_t(&tm); if (offset == -1) offset = (then - mktime(&tm)) / 60; @@ -611,7 +614,7 @@ void datestamp(char *buf, int bufsize) time(&now); - offset = my_mktime(localtime(&now)) - now; + offset = tm_to_time_t(localtime(&now)) - now; offset /= 60; date_string(now, offset, buf, bufsize); diff --git a/git-compat-util.h b/git-compat-util.h index a9a85be89d..58cdc087fa 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -147,6 +147,7 @@ extern void set_error_routine(void (*routine)(const char *err, va_list params)); extern void set_warn_routine(void (*routine)(const char *warn, va_list params)); extern int prefixcmp(const char *str, const char *prefix); +extern time_t tm_to_time_t(const struct tm *tm); #ifdef NO_MMAP -- cgit v1.2.3 From a42a0c2e715d6783a6f9b75a8f2c9a48bf499c47 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Sat, 1 Dec 2007 21:51:20 +0100 Subject: Windows: Implement gettimeofday(). Signed-off-by: Johannes Sixt --- compat/mingw.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/compat/mingw.c b/compat/mingw.c index c6a5c1b218..57af486b7a 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -38,7 +38,20 @@ int mkstemp(char *template) int gettimeofday(struct timeval *tv, void *tz) { - return -1; + SYSTEMTIME st; + struct tm tm; + GetSystemTime(&st); + tm.tm_year = st.wYear-1900; + tm.tm_mon = st.wMonth-1; + tm.tm_mday = st.wDay; + tm.tm_hour = st.wHour; + tm.tm_min = st.wMinute; + tm.tm_sec = st.wSecond; + tv->tv_sec = tm_to_time_t(&tm); + if (tv->tv_sec < 0) + return -1; + tv->tv_usec = st.wMilliseconds*1000; + return 0; } int poll(struct pollfd *ufds, unsigned int nfds, int timeout) -- cgit v1.2.3 From 82f8d969f581c4bd4a782565709466046d962d81 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Fri, 23 Mar 2007 10:57:05 +0100 Subject: Windows: Fix PRIuMAX definition. Since GIT calls into Microsoft's MSVCRT.DLL, it must use the printf format that this DLL uses for 64-bit integers, which is %I64u instead of %llu. Signed-off-by: Johannes Sixt --- compat/mingw.h | 1 + 1 file changed, 1 insertion(+) diff --git a/compat/mingw.h b/compat/mingw.h index 46fd8da06e..fad855ef99 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -161,3 +161,4 @@ int mingw_rename(const char*, const char*); #define has_dos_drive_prefix(path) (isalpha(*(path)) && (path)[1] == ':') #define is_dir_sep(c) ((c) == '/' || (c) == '\\') #define PATH_SEP ';' +#define PRIuMAX "I64u" -- cgit v1.2.3 From 6072fc314e87d53bec7266df5b6c3db2c2c82829 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Tue, 13 Nov 2007 10:14:45 +0100 Subject: Windows: Implement setitimer() and sigaction(). The timer is implemented using a thread that calls the signal handler at regular intervals. We also replace Windows's signal() function because we must intercept that SIGALRM is set (which is used when a timer is canceled). Signed-off-by: Johannes Sixt --- compat/mingw.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- compat/mingw.h | 3 ++ 2 files changed, 113 insertions(+), 2 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 57af486b7a..3ffff75ba6 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -125,12 +125,120 @@ struct passwd *getpwuid(int uid) return &p; } +static HANDLE timer_event; +static HANDLE timer_thread; +static int timer_interval; +static int one_shot; +static sig_handler_t timer_fn = SIG_DFL; + +/* The timer works like this: + * The thread, ticktack(), is a trivial routine that most of the time + * only waits to receive the signal to terminate. The main thread tells + * the thread to terminate by setting the timer_event to the signalled + * state. + * But ticktack() interrupts the wait state after the timer's interval + * length to call the signal handler. + */ + +static __stdcall unsigned ticktack(void *dummy) +{ + while (WaitForSingleObject(timer_event, timer_interval) == WAIT_TIMEOUT) { + if (timer_fn == SIG_DFL) + die("Alarm"); + if (timer_fn != SIG_IGN) + timer_fn(SIGALRM); + if (one_shot) + break; + } + return 0; +} + +static int start_timer_thread(void) +{ + timer_event = CreateEvent(NULL, FALSE, FALSE, NULL); + if (timer_event) { + timer_thread = (HANDLE) _beginthreadex(NULL, 0, ticktack, NULL, 0, NULL); + if (!timer_thread ) + return errno = ENOMEM, + error("cannot start timer thread"); + } else + return errno = ENOMEM, + error("cannot allocate resources for timer"); + return 0; +} + +static void stop_timer_thread(void) +{ + if (timer_event) + SetEvent(timer_event); /* tell thread to terminate */ + if (timer_thread) { + int rc = WaitForSingleObject(timer_thread, 1000); + if (rc == WAIT_TIMEOUT) + error("timer thread did not terminate timely"); + else if (rc != WAIT_OBJECT_0) + error("waiting for timer thread failed: %lu", + GetLastError()); + CloseHandle(timer_thread); + } + if (timer_event) + CloseHandle(timer_event); + timer_event = NULL; + timer_thread = NULL; +} + +static inline int is_timeval_eq(const struct timeval *i1, const struct timeval *i2) +{ + return i1->tv_sec == i2->tv_sec && i1->tv_usec == i2->tv_usec; +} + int setitimer(int type, struct itimerval *in, struct itimerval *out) { - return -1; + static const struct timeval zero; + static int atexit_done; + + if (out != NULL) + return errno = EINVAL, + error("setitimer param 3 != NULL not implemented"); + if (!is_timeval_eq(&in->it_interval, &zero) && + !is_timeval_eq(&in->it_interval, &in->it_value)) + return errno = EINVAL, + error("setitimer: it_interval must be zero or eq it_value"); + + if (timer_thread) + stop_timer_thread(); + + if (is_timeval_eq(&in->it_value, &zero) && + is_timeval_eq(&in->it_interval, &zero)) + return 0; + + timer_interval = in->it_value.tv_sec * 1000 + in->it_value.tv_usec / 1000; + one_shot = is_timeval_eq(&in->it_interval, &zero); + if (!atexit_done) { + atexit(stop_timer_thread); + atexit_done = 1; + } + return start_timer_thread(); } int sigaction(int sig, struct sigaction *in, struct sigaction *out) { - return -1; + if (sig != SIGALRM) + return errno = EINVAL, + error("sigaction only implemented for SIGALRM"); + if (out != NULL) + return errno = EINVAL, + error("sigaction: param 3 != NULL not implemented"); + + timer_fn = in->sa_handler; + return 0; +} + +#undef signal +sig_handler_t mingw_signal(int sig, sig_handler_t handler) +{ + if (sig != SIGALRM) + return signal(sig, handler); + sig_handler_t old = timer_fn; + timer_fn = handler; + return old; } diff --git a/compat/mingw.h b/compat/mingw.h index fad855ef99..a369ade9e2 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -154,6 +154,9 @@ char *mingw_getcwd(char *pointer, int len); int mingw_rename(const char*, const char*); #define rename mingw_rename +sig_handler_t mingw_signal(int sig, sig_handler_t handler); +#define signal mingw_signal + /* * git specific compatibility */ -- cgit v1.2.3 From f1a4dfb85a432ae338d162179eaac5d50154fbeb Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Tue, 4 Dec 2007 12:38:32 +0100 Subject: Windows: Wrap execve so that shell scripts can be invoked. When an external git command is invoked, it can be a Bourne shell script. This patch looks into the command file to see whether it is one. In this case, the command line is rearranged to invoke the shell with the proper arguments. With this change, scripted git commands work. Command line arguments to those scripts cannot be complex (contain spaces or double-quotes), yet. Signed-off-by: Johannes Sixt --- compat/mingw.c | 184 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ compat/mingw.h | 3 + 2 files changed, 187 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index 3ffff75ba6..89b29c55e5 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -86,6 +86,190 @@ char *mingw_getcwd(char *pointer, int len) return ret; } +static const char *parse_interpreter(const char *cmd) +{ + static char buf[100]; + char *p, *opt; + int n, fd; + + /* don't even try a .exe */ + n = strlen(cmd); + if (n >= 4 && !strcasecmp(cmd+n-4, ".exe")) + return NULL; + + fd = open(cmd, O_RDONLY); + if (fd < 0) + return NULL; + n = read(fd, buf, sizeof(buf)-1); + close(fd); + if (n < 4) /* at least '#!/x' and not error */ + return NULL; + + if (buf[0] != '#' || buf[1] != '!') + return NULL; + buf[n] = '\0'; + p = strchr(buf, '\n'); + if (!p) + return NULL; + + *p = '\0'; + if (!(p = strrchr(buf+2, '/')) && !(p = strrchr(buf+2, '\\'))) + return NULL; + /* strip options */ + if ((opt = strchr(p+1, ' '))) + *opt = '\0'; + return p+1; +} + +/* + * Splits the PATH into parts. + */ +static char **get_path_split(void) +{ + char *p, **path, *envpath = getenv("PATH"); + int i, n = 0; + + if (!envpath || !*envpath) + return NULL; + + envpath = xstrdup(envpath); + p = envpath; + while (p) { + char *dir = p; + p = strchr(p, ';'); + if (p) *p++ = '\0'; + if (*dir) { /* not earlier, catches series of ; */ + ++n; + } + } + if (!n) + return NULL; + + path = xmalloc((n+1)*sizeof(char*)); + p = envpath; + i = 0; + do { + if (*p) + path[i++] = xstrdup(p); + p = p+strlen(p)+1; + } while (i < n); + path[i] = NULL; + + free(envpath); + + return path; +} + +static void free_path_split(char **path) +{ + if (!path) + return; + + char **p = path; + while (*p) + free(*p++); + free(path); +} + +/* + * exe_only means that we only want to detect .exe files, but not scripts + * (which do not have an extension) + */ +static char *lookup_prog(const char *dir, const char *cmd, int isexe, int exe_only) +{ + char path[MAX_PATH]; + snprintf(path, sizeof(path), "%s/%s.exe", dir, cmd); + + if (!isexe && access(path, F_OK) == 0) + return xstrdup(path); + path[strlen(path)-4] = '\0'; + if ((!exe_only || isexe) && access(path, F_OK) == 0) + return xstrdup(path); + return NULL; +} + +/* + * Determines the absolute path of cmd using the the split path in path. + * If cmd contains a slash or backslash, no lookup is performed. + */ +static char *path_lookup(const char *cmd, char **path, int exe_only) +{ + char *prog = NULL; + int len = strlen(cmd); + int isexe = len >= 4 && !strcasecmp(cmd+len-4, ".exe"); + + if (strchr(cmd, '/') || strchr(cmd, '\\')) + prog = xstrdup(cmd); + + while (!prog && *path) + prog = lookup_prog(*path++, cmd, isexe, exe_only); + + return prog; +} + +static int try_shell_exec(const char *cmd, char *const *argv, char **env) +{ + const char *interpr = parse_interpreter(cmd); + char **path; + char *prog; + int pid = 0; + + if (!interpr) + return 0; + path = get_path_split(); + prog = path_lookup(interpr, path, 1); + if (prog) { + int argc = 0; + const char **argv2; + while (argv[argc]) argc++; + argv2 = xmalloc(sizeof(*argv) * (argc+2)); + argv2[0] = (char *)interpr; + argv2[1] = (char *)cmd; /* full path to the script file */ + memcpy(&argv2[2], &argv[1], sizeof(*argv) * argc); + pid = spawnve(_P_NOWAIT, prog, argv2, (const char **)env); + if (pid >= 0) { + int status; + if (waitpid(pid, &status, 0) < 0) + status = 255; + exit(status); + } + pid = 1; /* indicate that we tried but failed */ + free(prog); + free(argv2); + } + free_path_split(path); + return pid; +} + +static void mingw_execve(const char *cmd, char *const *argv, char *const *env) +{ + /* check if git_command is a shell script */ + if (!try_shell_exec(cmd, argv, (char **)env)) { + int pid, status; + + pid = spawnve(_P_NOWAIT, cmd, (const char **)argv, (const char **)env); + if (pid < 0) + return; + if (waitpid(pid, &status, 0) < 0) + status = 255; + exit(status); + } +} + +void mingw_execvp(const char *cmd, char *const *argv) +{ + char **path = get_path_split(); + char *prog = path_lookup(cmd, path, 0); + + if (prog) { + mingw_execve(prog, argv, environ); + free(prog); + } else + errno = ENOENT; + + free_path_split(path); +} + #undef rename int mingw_rename(const char *pold, const char *pnew) { diff --git a/compat/mingw.h b/compat/mingw.h index a369ade9e2..3ddef11eda 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -154,6 +154,9 @@ char *mingw_getcwd(char *pointer, int len); int mingw_rename(const char*, const char*); #define rename mingw_rename +void mingw_execvp(const char *cmd, char *const *argv); +#define execvp mingw_execvp + sig_handler_t mingw_signal(int sig, sig_handler_t handler); #define signal mingw_signal -- cgit v1.2.3 From 897bb8cb2c2ce6b73038bd8d4106fde079a09cf6 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Fri, 7 Dec 2007 22:05:36 +0100 Subject: Windows: A pipe() replacement whose ends are not inherited to children. On Unix the idiom to use a pipe is as follows: pipe(fd); pid = fork(); if (!pid) { dup2(fd[1], 1); close(fd[1]); close(fd[0]); ... } close(fd[1]); i.e. the child process closes the both pipe ends after duplicating one to the file descriptors where they are needed. On Windows, which does not have fork(), we never have an opportunity to (1) duplicate a pipe end in the child, (2) close unused pipe ends. Instead, we must use this idiom: save1 = dup(1); pipe(fd); dup2(fd[1], 1); spawn(...); dup2(save1, 1); close(fd[1]); i.e. save away the descriptor at the destination slot, replace by the pipe end, spawn process, restore the saved file. But there is a problem: Notice that the child did not only inherit the dup2()ed descriptor, but also *both* original pipe ends. Although the one end that was dup()ed could be closed before the spawn(), we cannot close the other end - the child inherits it, no matter what. The solution is to generate non-inheritable pipes. At the first glance, this looks strange: The purpose of pipes is usually to be inherited to child processes. But notice that in the course of actions as outlined above, the pipe descriptor that we want to inherit to the child is dup2()ed, and as it so happens, Windows's dup2() creates inheritable duplicates. Signed-off-by: Johannes Sixt --- compat/mingw.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ compat/mingw.h | 5 +---- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 89b29c55e5..31a9b9e251 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -54,6 +54,51 @@ int gettimeofday(struct timeval *tv, void *tz) return 0; } +int pipe(int filedes[2]) +{ + int fd; + HANDLE h[2], parent; + + if (_pipe(filedes, 8192, 0) < 0) + return -1; + + parent = GetCurrentProcess(); + + if (!DuplicateHandle (parent, (HANDLE)_get_osfhandle(filedes[0]), + parent, &h[0], 0, FALSE, DUPLICATE_SAME_ACCESS)) { + close(filedes[0]); + close(filedes[1]); + return -1; + } + if (!DuplicateHandle (parent, (HANDLE)_get_osfhandle(filedes[1]), + parent, &h[1], 0, FALSE, DUPLICATE_SAME_ACCESS)) { + close(filedes[0]); + close(filedes[1]); + CloseHandle(h[0]); + return -1; + } + fd = _open_osfhandle((int)h[0], O_NOINHERIT); + if (fd < 0) { + close(filedes[0]); + close(filedes[1]); + CloseHandle(h[0]); + CloseHandle(h[1]); + return -1; + } + close(filedes[0]); + filedes[0] = fd; + fd = _open_osfhandle((int)h[1], O_NOINHERIT); + if (fd < 0) { + close(filedes[0]); + close(filedes[1]); + CloseHandle(h[1]); + return -1; + } + close(filedes[1]); + filedes[1] = fd; + return 0; +} + int poll(struct pollfd *ufds, unsigned int nfds, int timeout) { return -1; diff --git a/compat/mingw.h b/compat/mingw.h index 3ddef11eda..0ce9c96f93 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -122,14 +122,11 @@ static inline int waitpid(pid_t pid, unsigned *status, unsigned options) return -1; } - -static inline int pipe(int filedes[2]) -{ return _pipe(filedes, 8192, 0); } - /* * implementations of missing functions */ +int pipe(int filedes[2]); unsigned int sleep (unsigned int seconds); int mkstemp(char *template); int gettimeofday(struct timeval *tv, void *tz); -- cgit v1.2.3 From ba26f296f9ddc694fc42683132bc328dffd777ec Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Fri, 7 Dec 2007 22:08:59 +0100 Subject: Windows: Implement start_command(). On Windows, we have spawnv() variants to run a child process instead of fork()/exec(). In order to attach pipe ends to stdin, stdout, and stderr, we have to use this idiom: save1 = dup(1); dup2(pipe[1], 1); spawnv(); dup2(save1, 1); close(pipe[1]); assuming that the descriptors created by pipe() are not inheritable. Signed-off-by: Johannes Sixt --- compat/mingw.c | 62 +++++++++++++++++++++++++++++++++++++ compat/mingw.h | 8 +++++ run-command.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 153 insertions(+), 14 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 31a9b9e251..7f89a6cb87 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -315,6 +315,68 @@ void mingw_execvp(const char *cmd, char *const *argv) free_path_split(path); } +char **copy_environ() +{ + char **env; + int i = 0; + while (environ[i]) + i++; + env = xmalloc((i+1)*sizeof(*env)); + for (i = 0; environ[i]; i++) + env[i] = xstrdup(environ[i]); + env[i] = NULL; + return env; +} + +void free_environ(char **env) +{ + int i; + for (i = 0; env[i]; i++) + free(env[i]); + free(env); +} + +static int lookup_env(char **env, const char *name, size_t nmln) +{ + int i; + + for (i = 0; env[i]; i++) { + if (0 == strncmp(env[i], name, nmln) + && '=' == env[i][nmln]) + /* matches */ + return i; + } + return -1; +} + +/* + * If name contains '=', then sets the variable, otherwise it unsets it + */ +char **env_setenv(char **env, const char *name) +{ + char *eq = strchrnul(name, '='); + int i = lookup_env(env, name, eq-name); + + if (i < 0) { + if (*eq) { + for (i = 0; env[i]; i++) + ; + env = xrealloc(env, (i+2)*sizeof(*env)); + env[i] = xstrdup(name); + env[i+1] = NULL; + } + } + else { + free(env[i]); + if (*eq) + env[i] = xstrdup(name); + else + for (; env[i]; i++) + env[i] = env[i+1]; + } + return env; +} + #undef rename int mingw_rename(const char *pold, const char *pnew) { diff --git a/compat/mingw.h b/compat/mingw.h index 0ce9c96f93..0879894c68 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -165,3 +165,11 @@ sig_handler_t mingw_signal(int sig, sig_handler_t handler); #define is_dir_sep(c) ((c) == '/' || (c) == '\\') #define PATH_SEP ';' #define PRIuMAX "I64u" + +/* + * helpers + */ + +char **copy_environ(void); +void free_environ(char **env); +char **env_setenv(char **env, const char *name); diff --git a/run-command.c b/run-command.c index 44100a749b..dd8b7751ce 100644 --- a/run-command.c +++ b/run-command.c @@ -65,21 +65,8 @@ int start_command(struct child_process *cmd) cmd->err = fderr[0]; } +#ifndef __MINGW32__ cmd->pid = fork(); - if (cmd->pid < 0) { - if (need_in) - close_pair(fdin); - else if (cmd->in) - close(cmd->in); - if (need_out) - close_pair(fdout); - else if (cmd->out) - close(cmd->out); - if (need_err) - close_pair(fderr); - return -ERR_RUN_COMMAND_FORK; - } - if (!cmd->pid) { if (cmd->no_stdin) dup_devnull(0); @@ -128,6 +115,88 @@ int start_command(struct child_process *cmd) } die("exec %s failed.", cmd->argv[0]); } +#else + int s0 = -1, s1 = -1, s2 = -1; /* backups of stdin, stdout, stderr */ + const char *sargv0 = cmd->argv[0]; + char **env = environ; + struct strbuf git_cmd; + + if (cmd->no_stdin) { + s0 = dup(0); + dup_devnull(0); + } else if (need_in) { + s0 = dup(0); + dup2(fdin[0], 0); + } else if (cmd->in) { + s0 = dup(0); + dup2(cmd->in, 0); + } + + if (cmd->no_stderr) { + s2 = dup(2); + dup_devnull(2); + } else if (need_err) { + s2 = dup(2); + dup2(fderr[1], 2); + } + + if (cmd->no_stdout) { + s1 = dup(1); + dup_devnull(1); + } else if (cmd->stdout_to_stderr) { + s1 = dup(1); + dup2(2, 1); + } else if (need_out) { + s1 = dup(1); + dup2(fdout[1], 1); + } else if (cmd->out > 1) { + s1 = dup(1); + dup2(cmd->out, 1); + } + + if (cmd->dir) + die("chdir in start_command() not implemented"); + if (cmd->env) { + env = copy_environ(); + for (; *cmd->env; cmd->env++) + env = env_setenv(env, *cmd->env); + } + + if (cmd->git_cmd) { + strbuf_init(&git_cmd, 0); + strbuf_addf(&git_cmd, "git-%s", cmd->argv[0]); + cmd->argv[0] = git_cmd.buf; + } + + cmd->pid = spawnvpe(_P_NOWAIT, cmd->argv[0], cmd->argv, (const char **)env); + + if (cmd->env) + free_environ(env); + if (cmd->git_cmd) + strbuf_release(&git_cmd); + + cmd->argv[0] = sargv0; + if (s0 >= 0) + dup2(s0, 0), close(s0); + if (s1 >= 0) + dup2(s1, 1), close(s1); + if (s2 >= 0) + dup2(s2, 2), close(s2); +#endif + + if (cmd->pid < 0) { + if (need_in) + close_pair(fdin); + else if (cmd->in) + close(cmd->in); + if (need_out) + close_pair(fdout); + else if (cmd->out) + close(cmd->out); + if (need_err) + close_pair(fderr); + return -ERR_RUN_COMMAND_FORK; + } if (need_in) close(fdin[0]); -- cgit v1.2.3 From 6ed807f8432c558ef102c94cb2e8ae4e03c48d4e Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Sat, 1 Dec 2007 22:00:56 +0100 Subject: Windows: A rudimentary poll() emulation. This emulation of poll() is by far not general. It assumes that the fds that are to be waited for are connected to pipes. The pipes are polled in a loop until data becomes available in at least one of them. If only a single fd is waited for, the implementation actually does not wait at all, but assumes that a subsequent read() will block. In order not to needlessly burn CPU time, the CPU is yielded to other processes before the next round in the poll loop using Sleep(0). Note that any sleep timeout greater than zero will reduce the efficiency by a magnitude. Signed-off-by: Johannes Sixt --- compat/mingw.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/compat/mingw.c b/compat/mingw.c index 7f89a6cb87..2677e78626 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -101,7 +101,62 @@ int pipe(int filedes[2]) int poll(struct pollfd *ufds, unsigned int nfds, int timeout) { - return -1; + int i, pending; + + if (timeout != -1) + return errno = EINVAL, error("poll timeout not supported"); + + /* When there is only one fd to wait for, then we pretend that + * input is available and let the actual wait happen when the + * caller invokes read(). + */ + if (nfds == 1) { + if (!(ufds[0].events & POLLIN)) + return errno = EINVAL, error("POLLIN not set"); + ufds[0].revents = POLLIN; + return 0; + } + +repeat: + pending = 0; + for (i = 0; i < nfds; i++) { + DWORD avail = 0; + HANDLE h = (HANDLE) _get_osfhandle(ufds[i].fd); + if (h == INVALID_HANDLE_VALUE) + return -1; /* errno was set */ + + if (!(ufds[i].events & POLLIN)) + return errno = EINVAL, error("POLLIN not set"); + + /* this emulation works only for pipes */ + if (!PeekNamedPipe(h, NULL, 0, NULL, &avail, NULL)) { + int err = GetLastError(); + if (err == ERROR_BROKEN_PIPE) { + ufds[i].revents = POLLHUP; + pending++; + } else { + errno = EINVAL; + return error("PeekNamedPipe failed," + " GetLastError: %u", err); + } + } else if (avail) { + ufds[i].revents = POLLIN; + pending++; + } else + ufds[i].revents = 0; + } + if (!pending) { + /* The only times that we spin here is when the process + * that is connected through the pipes is waiting for + * its own input data to become available. But since + * the process (pack-objects) is itself CPU intensive, + * it will happily pick up the time slice that we are + * relinguishing here. + */ + Sleep(0); + goto repeat; + } + return 0; } struct tm *gmtime_r(const time_t *timep, struct tm *result) -- cgit v1.2.3 From be501813d25bedc1a441940f349fb91bd9fa4ef6 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Fri, 30 Nov 2007 22:51:10 +0100 Subject: Windows: Disambiguate DOS style paths from SSH URLs. If on Windows a path is specified as C:/path, then this is also a valid SSH URL. To disambiguate between the two interpretations we take an URL that looks like a path with a drive letter as a local URL. Signed-off-by: Johannes Sixt --- connect.c | 2 +- transport.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/connect.c b/connect.c index e92af29735..574f42fa47 100644 --- a/connect.c +++ b/connect.c @@ -529,7 +529,7 @@ struct child_process *git_connect(int fd[2], const char *url_orig, end = host; path = strchr(end, c); - if (path) { + if (path && !has_dos_drive_prefix(end)) { if (c == ':') { protocol = PROTO_SSH; *path++ = '\0'; diff --git a/transport.c b/transport.c index 3ff851935f..4145eed979 100644 --- a/transport.c +++ b/transport.c @@ -709,7 +709,8 @@ static int is_local(const char *url) { const char *colon = strchr(url, ':'); const char *slash = strchr(url, '/'); - return !colon || (slash && slash < colon); + return !colon || (slash && slash < colon) || + has_dos_drive_prefix(url); } static int is_file(const char *url) -- cgit v1.2.3 From 618ebe9ff997d27714487c4a4232720be240badc Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Sat, 8 Dec 2007 22:19:14 +0100 Subject: Windows: Implement asynchronous functions as threads. In upload-pack we must explicitly close the output channel of rev-list. (On Unix, the channel is closed automatically because process that runs rev-list terminates.) Signed-off-by: Johannes Sixt --- run-command.c | 29 ++++++++++++++++++++++++++++- run-command.h | 5 +++++ upload-pack.c | 2 ++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/run-command.c b/run-command.c index dd8b7751ce..63238e412a 100644 --- a/run-command.c +++ b/run-command.c @@ -288,13 +288,23 @@ int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const return run_command(&cmd); } +#ifdef __MINGW32__ +static __stdcall unsigned run_thread(void *data) +{ + struct async *async = data; + return async->proc(async->fd_for_proc, async->data); +} +#endif + int start_async(struct async *async) { int pipe_out[2]; if (pipe(pipe_out) < 0) return error("cannot create pipe: %s", strerror(errno)); + async->out = pipe_out[0]; +#ifndef __MINGW32__ async->pid = fork(); if (async->pid < 0) { error("fork (async) failed: %s", strerror(errno)); @@ -305,16 +315,33 @@ int start_async(struct async *async) close(pipe_out[0]); exit(!!async->proc(pipe_out[1], async->data)); } - async->out = pipe_out[0]; close(pipe_out[1]); +#else + async->fd_for_proc = pipe_out[1]; + async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL); + if (!async->tid) { + error("cannot create thread: %s", strerror(errno)); + close_pair(pipe_out); + return -1; + } +#endif return 0; } int finish_async(struct async *async) { +#ifndef __MINGW32__ int ret = 0; if (wait_or_whine(async->pid)) ret = error("waitpid (async) failed"); +#else + DWORD ret = 0; + if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0) + ret = error("waiting for thread failed: %lu", GetLastError()); + else if (!GetExitCodeThread(async->tid, &ret)) + ret = error("cannot get thread exit code: %lu", GetLastError()); + CloseHandle(async->tid); +#endif return ret; } diff --git a/run-command.h b/run-command.h index debe3074b5..5203a9ebb1 100644 --- a/run-command.h +++ b/run-command.h @@ -76,7 +76,12 @@ struct async { int (*proc)(int fd, void *data); void *data; int out; /* caller reads from here and closes it */ +#ifndef __MINGW32__ pid_t pid; +#else + HANDLE tid; + int fd_for_proc; +#endif }; int start_async(struct async *async); diff --git a/upload-pack.c b/upload-pack.c index b46dd365ea..9f82941f8b 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -135,6 +135,8 @@ static int do_rev_list(int fd, void *create_full_pack) die("revision walk setup failed"); mark_edges_uninteresting(revs.commits, &revs, show_edge); traverse_commit_list(&revs, show_commit, show_object); + fflush(pack_pipe); + fclose(pack_pipe); return 0; } -- cgit v1.2.3 From 87bddba9924045e39e988d9704714db90abd5619 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Fri, 30 Nov 2007 22:06:30 +0100 Subject: Windows: Work around incompatible sort and find. If the PATH lists the Windows system directories before the MSYS directories, Windows's own incompatible sort and find commands would be picked up. We implement these commands as functions and call the real tools by absolute path. Signed-off-by: Johannes Sixt --- git-sh-setup.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/git-sh-setup.sh b/git-sh-setup.sh index a44b1c74a3..9cceb21a82 100755 --- a/git-sh-setup.sh +++ b/git-sh-setup.sh @@ -142,3 +142,16 @@ then } : ${GIT_OBJECT_DIRECTORY="$GIT_DIR/objects"} fi + +# Fix some commands on Windows +case $(uname -s) in +*MINGW*) + # Windows has its own (incompatible) sort and find + sort () { + /usr/bin/sort "$@" + } + find () { + /usr/bin/find "$@" + } + ;; +esac -- cgit v1.2.3 From 746fb8574459798d127e3f9cf782ccb8e31c9e45 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Wed, 26 Dec 2007 13:51:18 +0100 Subject: Windows: Implement wrappers for gethostbyname(), socket(), and connect(). gethostbyname() is the first function that calls into the Winsock library, and it is wrapped only to initialize the library. socket() is wrapped for two reasons: - Windows's socket() creates things that are like low-level file handles, and they must be converted into file descriptors first. - And these handles cannot be used with plain ReadFile()/WriteFile() because they are opened for "overlapped IO". We have to use WSASocket() to create non-overlapped IO sockets. connect() must be wrapped because Windows's connect() expects the low-level sockets, not file descriptors, and we must first unwrap the file descriptor before we can pass it on to Windows's connect(). Signed-off-by: Johannes Sixt --- compat/mingw.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ compat/mingw.h | 9 +++++++++ 2 files changed, 55 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index 2677e78626..d184c582eb 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -432,6 +432,52 @@ char **env_setenv(char **env, const char *name) return env; } +/* this is the first function to call into WS_32; initialize it */ +#undef gethostbyname +struct hostent *mingw_gethostbyname(const char *host) +{ + WSADATA wsa; + + if (WSAStartup(MAKEWORD(2,2), &wsa)) + die("unable to initialize winsock subsystem, error %d", + WSAGetLastError()); + atexit((void(*)(void)) WSACleanup); + return gethostbyname(host); +} + +int mingw_socket(int domain, int type, int protocol) +{ + int sockfd; + SOCKET s = WSASocket(domain, type, protocol, NULL, 0, 0); + if (s == INVALID_SOCKET) { + /* + * WSAGetLastError() values are regular BSD error codes + * biased by WSABASEERR. + * However, strerror() does not know about networking + * specific errors, which are values beginning at 38 or so. + * Therefore, we choose to leave the biased error code + * in errno so that _if_ someone looks up the code somewhere, + * then it is at least the number that are usually listed. + */ + errno = WSAGetLastError(); + return -1; + } + /* convert into a file descriptor */ + if ((sockfd = _open_osfhandle(s, O_RDWR|O_BINARY)) < 0) { + closesocket(s); + return error("unable to make a socket file descriptor: %s", + strerror(errno)); + } + return sockfd; +} + +#undef connect +int mingw_connect(int sockfd, struct sockaddr *sa, size_t sz) +{ + SOCKET s = (SOCKET)_get_osfhandle(sockfd); + return connect(s, sa, sz); +} + #undef rename int mingw_rename(const char *pold, const char *pnew) { diff --git a/compat/mingw.h b/compat/mingw.h index 0879894c68..1017632a24 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -148,6 +148,15 @@ int mingw_open (const char *filename, int oflags, ...); char *mingw_getcwd(char *pointer, int len); #define getcwd mingw_getcwd +struct hostent *mingw_gethostbyname(const char *host); +#define gethostbyname mingw_gethostbyname + +int mingw_socket(int domain, int type, int protocol); +#define socket mingw_socket + +int mingw_connect(int sockfd, struct sockaddr *sa, size_t sz); +#define connect mingw_connect + int mingw_rename(const char*, const char*); #define rename mingw_rename -- cgit v1.2.3 From 7e5d776854e858ae69a4cde9db8de6675ffb5de6 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Sat, 24 Nov 2007 22:49:16 +0100 Subject: Windows: Implement a custom spawnve(). The problem with Windows's own implementation is that it tries to be clever when a console program is invoked from a GUI application: In this case it sometimes automatically allocates a new console window. As a consequence, the IO channels of the spawned program are directed to the console, but the invoking application listens on channels that are now directed to nowhere. In this implementation we use the lowlevel facilities of CreateProcess(), which offers a flag to tell the system not to open a console. As a side effect, only stdin, stdout, and stderr channels will be accessible from C programs that are spawned. Other channels (file handles, pipe handles, etc.) are still inherited by the spawned program, but it doesn't get enough information to access them. Johannes Schindelin integrated path quoting and unified the various *execv* and *spawnv* helpers. Eric Raible suggested to also quote '{'. Signed-off-by: Johannes Sixt --- compat/mingw.c | 203 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- compat/mingw.h | 1 + run-command.c | 2 +- 3 files changed, 199 insertions(+), 7 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index d184c582eb..1ef2a4caf2 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1,4 +1,5 @@ #include "../git-compat-util.h" +#include "../strbuf.h" unsigned int _CRT_fmode = _O_BINARY; @@ -186,6 +187,65 @@ char *mingw_getcwd(char *pointer, int len) return ret; } +/* + * See http://msdn2.microsoft.com/en-us/library/17w5ykft(vs.71).aspx + * (Parsing C++ Command-Line Arguments) + */ +static const char *quote_arg(const char *arg) +{ + /* count chars to quote */ + int len = 0, n = 0; + int force_quotes = 0; + char *q, *d; + const char *p = arg; + if (!*p) force_quotes = 1; + while (*p) { + if (isspace(*p) || *p == '*' || *p == '?' || *p == '{') + force_quotes = 1; + else if (*p == '"') + n++; + else if (*p == '\\') { + int count = 0; + while (*p == '\\') { + count++; + p++; + len++; + } + if (*p == '"') + n += count*2 + 1; + continue; + } + len++; + p++; + } + if (!force_quotes && n == 0) + return arg; + + /* insert \ where necessary */ + d = q = xmalloc(len+n+3); + *d++ = '"'; + while (*arg) { + if (*arg == '"') + *d++ = '\\'; + else if (*arg == '\\') { + int count = 0; + while (*arg == '\\') { + count++; + *d++ = *arg++; + } + if (*arg == '"') { + while (count-- > 0) + *d++ = '\\'; + *d++ = '\\'; + } + } + *d++ = *arg++; + } + *d++ = '"'; + *d++ = 0; + return q; +} + static const char *parse_interpreter(const char *cmd) { static char buf[100]; @@ -307,6 +367,138 @@ static char *path_lookup(const char *cmd, char **path, int exe_only) return prog; } +static int env_compare(const void *a, const void *b) +{ + char *const *ea = a; + char *const *eb = b; + return strcasecmp(*ea, *eb); +} + +static pid_t mingw_spawnve(const char *cmd, const char **argv, char **env, + int prepend_cmd) +{ + STARTUPINFO si; + PROCESS_INFORMATION pi; + struct strbuf envblk, args; + unsigned flags; + BOOL ret; + + /* Determine whether or not we are associated to a console */ + HANDLE cons = CreateFile("CONOUT$", GENERIC_WRITE, + FILE_SHARE_WRITE, NULL, OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, NULL); + if (cons == INVALID_HANDLE_VALUE) { + /* There is no console associated with this process. + * Since the child is a console process, Windows + * would normally create a console window. But + * since we'll be redirecting std streams, we do + * not need the console. + */ + flags = CREATE_NO_WINDOW; + } else { + /* There is already a console. If we specified + * CREATE_NO_WINDOW here, too, Windows would + * disassociate the child from the console. + * Go figure! + */ + flags = 0; + CloseHandle(cons); + } + memset(&si, 0, sizeof(si)); + si.cb = sizeof(si); + si.dwFlags = STARTF_USESTDHANDLES; + si.hStdInput = (HANDLE) _get_osfhandle(0); + si.hStdOutput = (HANDLE) _get_osfhandle(1); + si.hStdError = (HANDLE) _get_osfhandle(2); + + /* concatenate argv, quoting args as we go */ + strbuf_init(&args, 0); + if (prepend_cmd) { + char *quoted = (char *)quote_arg(cmd); + strbuf_addstr(&args, quoted); + if (quoted != cmd) + free(quoted); + } + for (; *argv; argv++) { + char *quoted = (char *)quote_arg(*argv); + if (*args.buf) + strbuf_addch(&args, ' '); + strbuf_addstr(&args, quoted); + if (quoted != *argv) + free(quoted); + } + + if (env) { + int count = 0; + char **e, **sorted_env; + + for (e = env; *e; e++) + count++; + + /* environment must be sorted */ + sorted_env = xmalloc(sizeof(*sorted_env) * (count + 1)); + memcpy(sorted_env, env, sizeof(*sorted_env) * (count + 1)); + qsort(sorted_env, count, sizeof(*sorted_env), env_compare); + + strbuf_init(&envblk, 0); + for (e = sorted_env; *e; e++) { + strbuf_addstr(&envblk, *e); + strbuf_addch(&envblk, '\0'); + } + free(sorted_env); + } + + memset(&pi, 0, sizeof(pi)); + ret = CreateProcess(cmd, args.buf, NULL, NULL, TRUE, flags, + env ? envblk.buf : NULL, NULL, &si, &pi); + + if (env) + strbuf_release(&envblk); + strbuf_release(&args); + + if (!ret) { + errno = ENOENT; + return -1; + } + CloseHandle(pi.hThread); + return (pid_t)pi.hProcess; +} + +pid_t mingw_spawnvpe(const char *cmd, const char **argv, char **env) +{ + pid_t pid; + char **path = get_path_split(); + char *prog = path_lookup(cmd, path, 0); + + if (!prog) { + errno = ENOENT; + pid = -1; + } + else { + const char *interpr = parse_interpreter(prog); + + if (interpr) { + const char *argv0 = argv[0]; + char *iprog = path_lookup(interpr, path, 1); + argv[0] = prog; + if (!iprog) { + errno = ENOENT; + pid = -1; + } + else { + pid = mingw_spawnve(iprog, argv, env, 1); + free(iprog); + } + argv[0] = argv0; + } + else + pid = mingw_spawnve(prog, argv, env, 0); + free(prog); + } + free_path_split(path); + return pid; +} + static int try_shell_exec(const char *cmd, char *const *argv, char **env) { const char *interpr = parse_interpreter(cmd); @@ -322,11 +514,10 @@ static int try_shell_exec(const char *cmd, char *const *argv, char **env) int argc = 0; const char **argv2; while (argv[argc]) argc++; - argv2 = xmalloc(sizeof(*argv) * (argc+2)); - argv2[0] = (char *)interpr; - argv2[1] = (char *)cmd; /* full path to the script file */ - memcpy(&argv2[2], &argv[1], sizeof(*argv) * argc); - pid = spawnve(_P_NOWAIT, prog, argv2, (const char **)env); + argv2 = xmalloc(sizeof(*argv) * (argc+1)); + argv2[0] = (char *)cmd; /* full path to the script file */ + memcpy(&argv2[1], &argv[1], sizeof(*argv) * argc); + pid = mingw_spawnve(prog, argv2, env, 1); if (pid >= 0) { int status; if (waitpid(pid, &status, 0) < 0) @@ -347,7 +538,7 @@ static void mingw_execve(const char *cmd, char *const *argv, char *const *env) if (!try_shell_exec(cmd, argv, (char **)env)) { int pid, status; - pid = spawnve(_P_NOWAIT, cmd, (const char **)argv, (const char **)env); + pid = mingw_spawnve(cmd, (const char **)argv, (char **)env, 0); if (pid < 0) return; if (waitpid(pid, &status, 0) < 0) diff --git a/compat/mingw.h b/compat/mingw.h index 1017632a24..48229d538a 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -160,6 +160,7 @@ int mingw_connect(int sockfd, struct sockaddr *sa, size_t sz); int mingw_rename(const char*, const char*); #define rename mingw_rename +pid_t mingw_spawnvpe(const char *cmd, const char **argv, char **env); void mingw_execvp(const char *cmd, char *const *argv); #define execvp mingw_execvp diff --git a/run-command.c b/run-command.c index 63238e412a..2ce8c2b2f0 100644 --- a/run-command.c +++ b/run-command.c @@ -168,7 +168,7 @@ int start_command(struct child_process *cmd) cmd->argv[0] = git_cmd.buf; } - cmd->pid = spawnvpe(_P_NOWAIT, cmd->argv[0], cmd->argv, (const char **)env); + cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env); if (cmd->env) free_environ(env); -- cgit v1.2.3 From 5411bdc4e4b170a57a61b2d486ab344896c41500 Mon Sep 17 00:00:00 2001 From: Marius Storm-Olsen Date: Mon, 3 Sep 2007 20:40:26 +0200 Subject: Windows: Add a new lstat and fstat implementation based on Win32 API. This gives us a significant speedup when adding, committing and stat'ing files. Also, since Windows doesn't really handle symlinks, we let stat just uses lstat. We also need to replace fstat, since our implementation and the standard stat() functions report slightly different timestamps, possibly due to timezones. We simply report UTC in our implementation, and do our FILETIME to time_t conversion based on the document at http://support.microsoft.com/kb/167296. With Moe's repo structure (100K files in 100 dirs, containing 2-4 bytes) mkdir bummer && cd bummer; for ((i=0;i<100;i++)); do mkdir $i && pushd $i; for ((j=0;j<1000;j++)); do echo "$j" >$j; done; popd; done We get the following performance boost: With normal lstat & stat Custom lstat/fstat ------------------------ ------------------------ Command: git init Command: git init ------------------------ ------------------------ real 0m 0.047s real 0m 0.063s user 0m 0.031s user 0m 0.015s sys 0m 0.000s sys 0m 0.015s ------------------------ ------------------------ Command: git add . Command: git add . ------------------------ ------------------------ real 0m19.390s real 0m12.031s 1.6x user 0m 0.015s user 0m 0.031s sys 0m 0.030s sys 0m 0.000s ------------------------ ------------------------ Command: git commit -a.. Command: git commit -a.. ------------------------ ------------------------ real 0m30.812s real 0m16.875s 1.8x user 0m 0.015s user 0m 0.015s sys 0m 0.000s sys 0m 0.015s ------------------------ ------------------------ 3x Command: git-status 3x Command: git-status ------------------------ ------------------------ real 0m11.860s real 0m 5.266s 2.2x user 0m 0.015s user 0m 0.015s sys 0m 0.015s sys 0m 0.015s real 0m11.703s real 0m 5.234s user 0m 0.015s user 0m 0.015s sys 0m 0.000s sys 0m 0.000s real 0m11.672s real 0m 5.250s user 0m 0.031s user 0m 0.015s sys 0m 0.000s sys 0m 0.000s ------------------------ ------------------------ Command: git commit... Command: git commit... (single file) (single file) ------------------------ ------------------------ real 0m14.234s real 0m 7.735s 1.8x user 0m 0.015s user 0m 0.031s sys 0m 0.000s sys 0m 0.000s Signed-off-by: Marius Storm-Olsen Signed-off-by: Johannes Sixt --- compat/mingw.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ compat/mingw.h | 10 ++++- 2 files changed, 141 insertions(+), 1 deletion(-) diff --git a/compat/mingw.c b/compat/mingw.c index 1ef2a4caf2..6b742873da 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -23,6 +23,138 @@ int mingw_open (const char *filename, int oflags, ...) return fd; } +static inline time_t filetime_to_time_t(const FILETIME *ft) +{ + long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime; + winTime -= 116444736000000000LL; /* Windows to Unix Epoch conversion */ + winTime /= 10000000; /* Nano to seconds resolution */ + return (time_t)winTime; +} + +extern int _getdrive( void ); +/* We keep the do_lstat code in a separate function to avoid recursion. + * When a path ends with a slash, the stat will fail with ENOENT. In + * this case, we strip the trailing slashes and stat again. + */ +static int do_lstat(const char *file_name, struct stat *buf) +{ + WIN32_FILE_ATTRIBUTE_DATA fdata; + + if (GetFileAttributesExA(file_name, GetFileExInfoStandard, &fdata)) { + int fMode = S_IREAD; + if (fdata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + fMode |= S_IFDIR; + else + fMode |= S_IFREG; + if (!(fdata.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) + fMode |= S_IWRITE; + + buf->st_ino = 0; + buf->st_gid = 0; + buf->st_uid = 0; + buf->st_nlink = 1; + buf->st_mode = fMode; + buf->st_size = fdata.nFileSizeLow; /* Can't use nFileSizeHigh, since it's not a stat64 */ + buf->st_dev = buf->st_rdev = (_getdrive() - 1); + buf->st_atime = filetime_to_time_t(&(fdata.ftLastAccessTime)); + buf->st_mtime = filetime_to_time_t(&(fdata.ftLastWriteTime)); + buf->st_ctime = filetime_to_time_t(&(fdata.ftCreationTime)); + errno = 0; + return 0; + } + + switch (GetLastError()) { + case ERROR_ACCESS_DENIED: + case ERROR_SHARING_VIOLATION: + case ERROR_LOCK_VIOLATION: + case ERROR_SHARING_BUFFER_EXCEEDED: + errno = EACCES; + break; + case ERROR_BUFFER_OVERFLOW: + errno = ENAMETOOLONG; + break; + case ERROR_NOT_ENOUGH_MEMORY: + errno = ENOMEM; + break; + default: + errno = ENOENT; + break; + } + return -1; +} + +/* We provide our own lstat/fstat functions, since the provided + * lstat/fstat functions are so slow. These stat functions are + * tailored for Git's usage (read: fast), and are not meant to be + * complete. Note that Git stat()s are redirected to mingw_lstat() + * too, since Windows doesn't really handle symlinks that well. + */ +int mingw_lstat(const char *file_name, struct stat *buf) +{ + int namelen; + static char alt_name[PATH_MAX]; + + if (!do_lstat(file_name, buf)) + return 0; + + /* if file_name ended in a '/', Windows returned ENOENT; + * try again without trailing slashes + */ + if (errno != ENOENT) + return -1; + + namelen = strlen(file_name); + if (namelen && file_name[namelen-1] != '/') + return -1; + while (namelen && file_name[namelen-1] == '/') + --namelen; + if (!namelen || namelen >= PATH_MAX) + return -1; + + memcpy(alt_name, file_name, namelen); + alt_name[namelen] = 0; + return do_lstat(alt_name, buf); +} + +#undef fstat +int mingw_fstat(int fd, struct stat *buf) +{ + HANDLE fh = (HANDLE)_get_osfhandle(fd); + BY_HANDLE_FILE_INFORMATION fdata; + + if (fh == INVALID_HANDLE_VALUE) { + errno = EBADF; + return -1; + } + /* direct non-file handles to MS's fstat() */ + if (GetFileType(fh) != FILE_TYPE_DISK) + return fstat(fd, buf); + + if (GetFileInformationByHandle(fh, &fdata)) { + int fMode = S_IREAD; + if (fdata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + fMode |= S_IFDIR; + else + fMode |= S_IFREG; + if (!(fdata.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) + fMode |= S_IWRITE; + + buf->st_ino = 0; + buf->st_gid = 0; + buf->st_uid = 0; + buf->st_nlink = 1; + buf->st_mode = fMode; + buf->st_size = fdata.nFileSizeLow; /* Can't use nFileSizeHigh, since it's not a stat64 */ + buf->st_dev = buf->st_rdev = (_getdrive() - 1); + buf->st_atime = filetime_to_time_t(&(fdata.ftLastAccessTime)); + buf->st_mtime = filetime_to_time_t(&(fdata.ftLastWriteTime)); + buf->st_ctime = filetime_to_time_t(&(fdata.ftCreationTime)); + return 0; + } + errno = EBADF; + return -1; +} + unsigned int sleep (unsigned int seconds) { Sleep(seconds*1000); diff --git a/compat/mingw.h b/compat/mingw.h index 48229d538a..69b1dde3ca 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -60,7 +60,6 @@ struct itimerval { #define ITIMER_REAL 0 #define st_blocks st_size/512 /* will be cleaned up later */ -#define lstat stat /* * trivial stubs @@ -160,6 +159,15 @@ int mingw_connect(int sockfd, struct sockaddr *sa, size_t sz); int mingw_rename(const char*, const char*); #define rename mingw_rename +/* Use mingw_lstat() instead of lstat()/stat() and + * mingw_fstat() instead of fstat() on Windows. + */ +int mingw_lstat(const char *file_name, struct stat *buf); +int mingw_fstat(int fd, struct stat *buf); +#define fstat mingw_fstat +#define lstat mingw_lstat +#define stat(x,y) mingw_lstat(x,y) + pid_t mingw_spawnvpe(const char *cmd, const char **argv, char **env); void mingw_execvp(const char *cmd, char *const *argv); #define execvp mingw_execvp -- cgit v1.2.3 From 7c0ffa1cb753f9b909dfb3bc7a5d5417b8de39c2 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Fri, 7 Sep 2007 13:05:00 +0200 Subject: Windows: Add a custom implementation for utime(). This is a necessary pendant to our lstat implementation: MSVCRT's implementations of lstat and utime do some adjustments if daylight saving time is in effect, but our lstat implementation doesn't do these adjustments and report the correct UTC time. With this implementation we omit the adjustments in utime() as well and always write UTC. Signed-off-by: Johannes Sixt --- compat/mingw.c | 27 +++++++++++++++++++++++++++ compat/mingw.h | 3 +++ 2 files changed, 30 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index 6b742873da..2e47555443 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -155,6 +155,33 @@ int mingw_fstat(int fd, struct stat *buf) return -1; } +static inline void time_t_to_filetime(time_t t, FILETIME *ft) +{ + long long winTime = t * 10000000LL + 116444736000000000LL; + ft->dwLowDateTime = winTime; + ft->dwHighDateTime = winTime >> 32; +} + +int mingw_utime (const char *file_name, const struct utimbuf *times) +{ + FILETIME mft, aft; + int fh, rc; + + /* must have write permission */ + if ((fh = open(file_name, O_RDWR | O_BINARY)) < 0) + return -1; + + time_t_to_filetime(times->modtime, &mft); + time_t_to_filetime(times->actime, &aft); + if (!SetFileTime((HANDLE)_get_osfhandle(fh), NULL, &aft, &mft)) { + errno = EINVAL; + rc = -1; + } else + rc = 0; + close(fh); + return rc; +} + unsigned int sleep (unsigned int seconds) { Sleep(seconds*1000); diff --git a/compat/mingw.h b/compat/mingw.h index 69b1dde3ca..92e9273dd5 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -168,6 +168,9 @@ int mingw_fstat(int fd, struct stat *buf); #define lstat mingw_lstat #define stat(x,y) mingw_lstat(x,y) +int mingw_utime(const char *file_name, const struct utimbuf *times); +#define utime mingw_utime + pid_t mingw_spawnvpe(const char *cmd, const char **argv, char **env); void mingw_execvp(const char *cmd, char *const *argv); #define execvp mingw_execvp -- cgit v1.2.3 From fc2ded5b08e071beed974117c0148781b1acc94a Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Mon, 12 Nov 2007 12:52:41 +0100 Subject: Windows: Use a customized struct stat that also has the st_blocks member. Windows's struct stat does not have a st_blocks member. Since we already have our own stat/lstat/fstat implementations, we can just as well use a customized struct stat. This patch introduces just that, and also fills in the st_blocks member. On the other hand, we don't provide members that are never used. Signed-off-by: Johannes Sixt --- compat/mingw.c | 36 ++++++++++++++++++++++++++++-------- compat/mingw.h | 18 +++++++++++++----- 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 2e47555443..28d32969b9 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -31,6 +31,11 @@ static inline time_t filetime_to_time_t(const FILETIME *ft) return (time_t)winTime; } +static inline size_t size_to_blocks(size_t s) +{ + return (s+511)/512; +} + extern int _getdrive( void ); /* We keep the do_lstat code in a separate function to avoid recursion. * When a path ends with a slash, the stat will fail with ENOENT. In @@ -52,10 +57,10 @@ static int do_lstat(const char *file_name, struct stat *buf) buf->st_ino = 0; buf->st_gid = 0; buf->st_uid = 0; - buf->st_nlink = 1; buf->st_mode = fMode; buf->st_size = fdata.nFileSizeLow; /* Can't use nFileSizeHigh, since it's not a stat64 */ - buf->st_dev = buf->st_rdev = (_getdrive() - 1); + buf->st_blocks = size_to_blocks(buf->st_size); + buf->st_dev = _getdrive() - 1; buf->st_atime = filetime_to_time_t(&(fdata.ftLastAccessTime)); buf->st_mtime = filetime_to_time_t(&(fdata.ftLastWriteTime)); buf->st_ctime = filetime_to_time_t(&(fdata.ftCreationTime)); @@ -89,7 +94,7 @@ static int do_lstat(const char *file_name, struct stat *buf) * complete. Note that Git stat()s are redirected to mingw_lstat() * too, since Windows doesn't really handle symlinks that well. */ -int mingw_lstat(const char *file_name, struct stat *buf) +int mingw_lstat(const char *file_name, struct mingw_stat *buf) { int namelen; static char alt_name[PATH_MAX]; @@ -117,7 +122,8 @@ int mingw_lstat(const char *file_name, struct stat *buf) } #undef fstat -int mingw_fstat(int fd, struct stat *buf) +#undef stat +int mingw_fstat(int fd, struct mingw_stat *buf) { HANDLE fh = (HANDLE)_get_osfhandle(fd); BY_HANDLE_FILE_INFORMATION fdata; @@ -127,8 +133,22 @@ int mingw_fstat(int fd, struct stat *buf) return -1; } /* direct non-file handles to MS's fstat() */ - if (GetFileType(fh) != FILE_TYPE_DISK) - return fstat(fd, buf); + if (GetFileType(fh) != FILE_TYPE_DISK) { + struct stat st; + if (fstat(fd, &st)) + return -1; + buf->st_ino = st.st_ino; + buf->st_gid = st.st_gid; + buf->st_uid = st.st_uid; + buf->st_mode = st.st_mode; + buf->st_size = st.st_size; + buf->st_blocks = size_to_blocks(buf->st_size); + buf->st_dev = st.st_dev; + buf->st_atime = st.st_atime; + buf->st_mtime = st.st_mtime; + buf->st_ctime = st.st_ctime; + return 0; + } if (GetFileInformationByHandle(fh, &fdata)) { int fMode = S_IREAD; @@ -142,10 +162,10 @@ int mingw_fstat(int fd, struct stat *buf) buf->st_ino = 0; buf->st_gid = 0; buf->st_uid = 0; - buf->st_nlink = 1; buf->st_mode = fMode; buf->st_size = fdata.nFileSizeLow; /* Can't use nFileSizeHigh, since it's not a stat64 */ - buf->st_dev = buf->st_rdev = (_getdrive() - 1); + buf->st_blocks = size_to_blocks(buf->st_size); + buf->st_dev = _getdrive() - 1; buf->st_atime = filetime_to_time_t(&(fdata.ftLastAccessTime)); buf->st_mtime = filetime_to_time_t(&(fdata.ftLastWriteTime)); buf->st_ctime = filetime_to_time_t(&(fdata.ftCreationTime)); diff --git a/compat/mingw.h b/compat/mingw.h index 92e9273dd5..624b32d1f4 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -59,8 +59,6 @@ struct itimerval { }; #define ITIMER_REAL 0 -#define st_blocks st_size/512 /* will be cleaned up later */ - /* * trivial stubs */ @@ -161,12 +159,22 @@ int mingw_rename(const char*, const char*); /* Use mingw_lstat() instead of lstat()/stat() and * mingw_fstat() instead of fstat() on Windows. + * struct stat is redefined because it lacks the st_blocks member. */ -int mingw_lstat(const char *file_name, struct stat *buf); -int mingw_fstat(int fd, struct stat *buf); +struct mingw_stat { + unsigned st_mode; + time_t st_mtime, st_atime, st_ctime; + unsigned st_dev, st_ino, st_uid, st_gid; + size_t st_size; + size_t st_blocks; +}; +int mingw_lstat(const char *file_name, struct mingw_stat *buf); +int mingw_fstat(int fd, struct mingw_stat *buf); #define fstat mingw_fstat #define lstat mingw_lstat -#define stat(x,y) mingw_lstat(x,y) +#define stat mingw_stat +static inline int mingw_stat(const char *file_name, struct mingw_stat *buf) +{ return mingw_lstat(file_name, buf); } int mingw_utime(const char *file_name, const struct utimbuf *times); #define utime mingw_utime -- cgit v1.2.3 From 4ec22a48c0575c8a303cd00b5ef4b3d703fbf8b3 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Wed, 11 Apr 2007 15:26:08 +0200 Subject: Turn builtin_exec_path into a function. builtin_exec_path returns the hard-coded installation path, which is used as the ultimate fallback to look for git commands. Making it into a function enables us in a follow-up patch to return a computed value instead of just a constant string. Signed-off-by: Johannes Sixt --- exec_cmd.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/exec_cmd.c b/exec_cmd.c index a1bc4e04bf..6618aad7ab 100644 --- a/exec_cmd.c +++ b/exec_cmd.c @@ -4,9 +4,13 @@ #define MAX_ARGS 32 extern char **environ; -static const char *builtin_exec_path = GIT_EXEC_PATH; static const char *argv_exec_path; +static const char *builtin_exec_path(void) +{ + return GIT_EXEC_PATH; +} + void git_set_argv_exec_path(const char *exec_path) { argv_exec_path = exec_path; @@ -26,7 +30,7 @@ const char *git_exec_path(void) return env; } - return builtin_exec_path; + return builtin_exec_path(); } static void add_path(struct strbuf *out, const char *path) @@ -50,7 +54,7 @@ void setup_path(const char *cmd_path) add_path(&new_path, argv_exec_path); add_path(&new_path, getenv(EXEC_PATH_ENVIRONMENT)); - add_path(&new_path, builtin_exec_path); + add_path(&new_path, builtin_exec_path()); add_path(&new_path, cmd_path); if (old_path) -- cgit v1.2.3 From 6fad004a37432e4378b6cce53eebe8a079104e93 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Wed, 11 Apr 2007 16:02:45 +0200 Subject: Windows: Compute the fallback for exec_path from the program invocation. Since on Windows the user is fairly free where to install programs, we cannot rely on a hard-coded path. We use the program name to derive the installation directory and use that as exec_path. Signed-off-by: Johannes Sixt --- exec_cmd.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/exec_cmd.c b/exec_cmd.c index 6618aad7ab..84db7ee664 100644 --- a/exec_cmd.c +++ b/exec_cmd.c @@ -8,7 +8,36 @@ static const char *argv_exec_path; static const char *builtin_exec_path(void) { +#ifndef __MINGW32__ return GIT_EXEC_PATH; +#else + int len; + char *p, *q, *sl; + static char *ep; + if (ep) + return ep; + + len = strlen(_pgmptr); + if (len < 2) + return ep = "."; + + p = ep = xmalloc(len+1); + q = _pgmptr; + sl = NULL; + /* copy program name, turn '\\' into '/', skip last part */ + while ((*p = *q)) { + if (*q == '\\' || *q == '/') { + *p = '/'; + sl = p; + } + p++, q++; + } + if (sl) + *sl = '\0'; + else + ep[0] = '.', ep[1] = '\0'; + return ep; +#endif } void git_set_argv_exec_path(const char *exec_path) -- cgit v1.2.3 From 8512439af25c93942d569d250d49d135d8b7062b Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Fri, 30 Nov 2007 22:28:38 +0100 Subject: Windows: Use a relative default template_dir and ETC_GITCONFIG With this definition the templates and system config file will be found irrespective of the installation location. Signed-off-by: Johannes Sixt --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 9c5aae03bb..7233160743 100644 --- a/Makefile +++ b/Makefile @@ -739,6 +739,8 @@ ifneq (,$(findstring MINGW,$(uname_S))) COMPAT_OBJS += compat/mingw.o compat/fnmatch.o compat/regex.o EXTLIBS += -lws2_32 X = .exe + template_dir = ../share/git-core/templates/ + ETC_GITCONFIG = ../etc/gitconfig endif ifneq (,$(findstring arm,$(uname_M))) ARM_SHA1 = YesPlease -- cgit v1.2.3 From 0b50b860a505d4a1d6fa595a400f3968333e7128 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Tue, 1 Jan 2008 22:15:21 +0100 Subject: When installing, be prepared that template_dir may be relative. Since the Makefile in the template/ subdirectory is only used to install the templates, we do not simply pass down the setting of template_dir when it is relative, but construct the intended destination in a new variable: A relative template_dir is relative to gitexecdir. Signed-off-by: Johannes Sixt --- Makefile | 9 ++++++++- templates/Makefile | 8 ++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 7233160743..a2a19a85f0 100644 --- a/Makefile +++ b/Makefile @@ -205,7 +205,7 @@ GITWEB_FAVICON = git-favicon.png GITWEB_SITE_HEADER = GITWEB_SITE_FOOTER = -export prefix bindir gitexecdir sharedir template_dir htmldir sysconfdir +export prefix bindir gitexecdir sharedir htmldir sysconfdir CC = gcc AR = ar @@ -1297,6 +1297,13 @@ remove-dashes: ### Installation rules +ifeq ($(firstword $(subst /, ,$(template_dir))),..) +template_instdir = $(gitexecdir)/$(template_dir) +else +template_instdir = $(template_dir) +endif +export template_instdir + install: all $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(gitexecdir_SQ)' diff --git a/templates/Makefile b/templates/Makefile index bda9d13505..9f3f1fc352 100644 --- a/templates/Makefile +++ b/templates/Makefile @@ -8,12 +8,12 @@ INSTALL ?= install TAR ?= tar RM ?= rm -f prefix ?= $(HOME) -template_dir ?= $(prefix)/share/git-core/templates +template_instdir ?= $(prefix)/share/git-core/templates # DESTDIR= # Shell quote (do not use $(call) to accommodate ancient setups); DESTDIR_SQ = $(subst ','\'',$(DESTDIR)) -template_dir_SQ = $(subst ','\'',$(template_dir)) +template_instdir_SQ = $(subst ','\'',$(template_instdir)) all: boilerplates.made custom @@ -46,6 +46,6 @@ clean: $(RM) -r blt boilerplates.made install: all - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(template_dir_SQ)' + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(template_instdir_SQ)' (cd blt && $(TAR) cf - .) | \ - (cd '$(DESTDIR_SQ)$(template_dir_SQ)' && umask 022 && $(TAR) xf -) + (cd '$(DESTDIR_SQ)$(template_instdir_SQ)' && umask 022 && $(TAR) xf -) -- cgit v1.2.3 From bfdd9ffd2f7376ccb8b9d4c4e39e2e0fe97d6b37 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Sat, 8 Dec 2007 21:28:41 +0100 Subject: Windows: Make the pager work. Since we have neither fork() nor exec(), we have to spawn the pager and feed it with the program's output. Signed-off-by: Johannes Sixt --- pager.c | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/pager.c b/pager.c index dbd941421b..6b5c9e44b4 100644 --- a/pager.c +++ b/pager.c @@ -1,12 +1,13 @@ #include "cache.h" /* - * This is split up from the rest of git so that we might do - * something different on Windows, for example. + * This is split up from the rest of git so that we can do + * something different on Windows. */ static int spawned_pager; +#ifndef __MINGW32__ static void run_pager(const char *pager) { /* @@ -22,11 +23,31 @@ static void run_pager(const char *pager) execlp(pager, pager, NULL); execl("/bin/sh", "sh", "-c", pager, NULL); } +#else +#include "run-command.h" + +static const char *pager_argv[] = { "sh", "-c", NULL, NULL }; +static struct child_process pager_process = { + .argv = pager_argv, + .in = -1 +}; +static void wait_for_pager(void) +{ + fflush(stdout); + fflush(stderr); + /* signal EOF to pager */ + close(1); + close(2); + finish_command(&pager_process); +} +#endif void setup_pager(void) { +#ifndef __MINGW32__ pid_t pid; int fd[2]; +#endif const char *pager = getenv("GIT_PAGER"); if (!isatty(1)) @@ -45,6 +66,7 @@ void setup_pager(void) spawned_pager = 1; /* means we are emitting to terminal */ +#ifndef __MINGW32__ if (pipe(fd) < 0) return; pid = fork(); @@ -72,6 +94,20 @@ void setup_pager(void) run_pager(pager); die("unable to execute pager '%s'", pager); exit(255); +#else + /* spawn the pager */ + pager_argv[2] = pager; + if (start_command(&pager_process)) + return; + + /* original process continues, but writes to the pipe */ + dup2(pager_process.in, 1); + dup2(pager_process.in, 2); + close(pager_process.in); + + /* this makes sure that the parent terminates after the pager */ + atexit(wait_for_pager); +#endif } int pager_in_use(void) -- cgit v1.2.3 From b2f5e2684da060dd821bf90f88df8b6dc9401a40 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Fri, 17 Aug 2007 18:40:36 +0200 Subject: Windows: Work around an oddity when a pipe with no reader is written to. On Windows, write() is implemented using WriteFile(). After the reader closed its end of the pipe, the first WriteFile() returns ERROR_BROKEN_PIPE (which translates to EPIPE), subsequent WriteFile()s return ERROR_NO_DATA, which is translated to EINVAL. Signed-off-by: Johannes Sixt --- write_or_die.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/write_or_die.c b/write_or_die.c index 630be4cb94..e4c8e225fd 100644 --- a/write_or_die.c +++ b/write_or_die.c @@ -34,7 +34,12 @@ void maybe_flush_or_die(FILE *f, const char *desc) return; } if (fflush(f)) { - if (errno == EPIPE) + /* + * On Windows, EPIPE is returned only by the first write() + * after the reading end has closed its handle; subsequent + * write()s return EINVAL. + */ + if (errno == EPIPE || errno == EINVAL) exit(0); die("write failure on %s: %s", desc, strerror(errno)); } -- cgit v1.2.3 From cc3b7a9732f940cb0249a12cb3c02e3d83723eb0 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Mon, 14 Jan 2008 14:05:33 +0100 Subject: Windows: Make 'git help -a' work. git help -a scans the PATH for git commands. On Windows it failed for two reasons: - The PATH separator is ';', not ':' on Windows. - stat() does not set the executable bit. We now open the file and guess whether it is executable. The result of the guess is good enough for the list of git commands, but it is of no use for a general stat() implementation because (1) it is a guess, (2) the user has no way to influence the outcome (via chmod or similar), and (3) it would reduce stat() performance by an unacceptable amount. Therefore, this strategy is a special-case local to help.c. Signed-off-by: Johannes Sixt --- help.c | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/help.c b/help.c index 8aff94c64a..6c16fb4aa1 100644 --- a/help.c +++ b/help.c @@ -391,6 +391,32 @@ static void pretty_print_string_list(struct cmdnames *cmds, int longest) } } +static int is_executable(const char *name) +{ + struct stat st; + + if (stat(name, &st) || /* stat, not lstat */ + !S_ISREG(st.st_mode)) + return 0; + +#ifdef __MINGW32__ + /* cannot trust the executable bit, peek into the file instead */ + char buf[3] = { 0 }; + int n; + int fd = open(name, O_RDONLY); + st.st_mode &= ~S_IXUSR; + if (fd >= 0) { + n = read(fd, buf, 2); + if (n == 2) + /* DOS executables start with "MZ" */ + if (!strcmp(buf, "#!") || !strcmp(buf, "MZ")) + st.st_mode |= S_IXUSR; + close(fd); + } +#endif + return st.st_mode & S_IXUSR; +} + static unsigned int list_commands_in_dir(struct cmdnames *cmds, const char *path) { @@ -404,15 +430,12 @@ static unsigned int list_commands_in_dir(struct cmdnames *cmds, return 0; while ((de = readdir(dir)) != NULL) { - struct stat st; int entlen; if (prefixcmp(de->d_name, prefix)) continue; - if (stat(de->d_name, &st) || /* stat, not lstat */ - !S_ISREG(st.st_mode) || - !(st.st_mode & S_IXUSR)) + if (!is_executable(de->d_name)) continue; entlen = strlen(de->d_name) - prefix_len; @@ -447,7 +470,7 @@ static unsigned int load_command_list(void) path = paths = xstrdup(env_path); while (1) { - if ((colon = strchr(path, ':'))) + if ((colon = strchr(path, PATH_SEP))) *colon = 0; len = list_commands_in_dir(&other_cmds, path); -- cgit v1.2.3 From 6fd6aec44fe79dff61fd37a5fec2456c5458b574 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Sun, 22 Jun 2008 11:35:21 +0200 Subject: Windows: TMP and TEMP environment variables specify a temporary directory. Signed-off-by: Johannes Sixt --- compat/mingw.c | 13 +++++++++++++ compat/mingw.h | 3 +++ 2 files changed, 16 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index 28d32969b9..3a05fe7da6 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -366,6 +366,19 @@ char *mingw_getcwd(char *pointer, int len) return ret; } +#undef getenv +char *mingw_getenv(const char *name) +{ + char *result = getenv(name); + if (!result && !strcmp(name, "TMPDIR")) { + /* on Windows it is TMP and TEMP */ + result = getenv("TMP"); + if (!result) + result = getenv("TEMP"); + } + return result; +} + /* * See http://msdn2.microsoft.com/en-us/library/17w5ykft(vs.71).aspx * (Parsing C++ Command-Line Arguments) diff --git a/compat/mingw.h b/compat/mingw.h index 624b32d1f4..a87cc9679c 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -145,6 +145,9 @@ int mingw_open (const char *filename, int oflags, ...); char *mingw_getcwd(char *pointer, int len); #define getcwd mingw_getcwd +char *mingw_getenv(const char *name); +#define getenv mingw_getenv + struct hostent *mingw_gethostbyname(const char *host); #define gethostbyname mingw_gethostbyname -- cgit v1.2.3 From cd800eecc27ef57cb934f349f116cd7022ec71ed Mon Sep 17 00:00:00 2001 From: Steffen Prohaska Date: Sat, 17 Nov 2007 19:16:53 +0100 Subject: Windows: Fix ntohl() related warnings about printf formatting On Windows, ntohl() returns unsigned long. On Unix it returns uint32_t. This makes choosing a suitable printf format string hard. This commit introduces a mingw specific helper function git_ntohl() that casts to unsigned int before returning. This makes gcc's printf format check happy. It should be safe because we expect ntohl to use 32-bit numbers. Signed-off-by: Steffen Prohaska Signed-off-by: Johannes Sixt --- compat/mingw.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/compat/mingw.h b/compat/mingw.h index a87cc9679c..6bc049ad99 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -186,6 +186,10 @@ pid_t mingw_spawnvpe(const char *cmd, const char **argv, char **env); void mingw_execvp(const char *cmd, char *const *argv); #define execvp mingw_execvp +static inline unsigned int git_ntohl(unsigned int x) +{ return (unsigned int)ntohl(x); } +#define ntohl git_ntohl + sig_handler_t mingw_signal(int sig, sig_handler_t handler); #define signal mingw_signal -- cgit v1.2.3 From 14086b0a13f5f5ac456cb9ae16a263f92908ae61 Mon Sep 17 00:00:00 2001 From: Steffen Prohaska Date: Sat, 17 Nov 2007 20:48:14 +0100 Subject: compat/pread.c: Add a forward declaration to fix a warning read_in_full()'s is used in compat/pread.c. read_in_full() is declared in cache.h. But we can't include cache.h because too many macros are defined there. Using read_in_full() without including cache.h is dangerous because we wouldn't recognize if its prototyp changed. gcc issues a warning about that. This commit adds a forward declaration to git-compat-util.h. git-compat-util.h is included by compat/pread.c _and_ cache.h. Hence, changes in cache.h would be detected. Signed-off-by: Steffen Prohaska Signed-off-by: Johannes Sixt --- git-compat-util.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/git-compat-util.h b/git-compat-util.h index 58cdc087fa..51823ae7af 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -185,6 +185,12 @@ extern int git_munmap(void *start, size_t length); #define pread git_pread extern ssize_t git_pread(int fd, void *buf, size_t count, off_t offset); #endif +/* + * Forward decl that will remind us if its twin in cache.h changes. + * This function is used in compat/pread.c. But we can't include + * cache.h there. + */ +extern ssize_t read_in_full(int fd, void *buf, size_t count); #ifdef NO_SETENV #define setenv gitsetenv -- cgit v1.2.3