fnmatch.c 11.1 KB
Newer Older
1
/* Copyright (C) 1991-1993, 1996-2007, 2009-2013 Free Software Foundation, Inc.
2 3 4

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; either version 3, or (at your option)
6 7 8 9 10 11 12 13
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
14
   along with this program; if not, see <http://www.gnu.org/licenses/>.  */
15 16 17 18 19 20 21

#ifndef _LIBC
# include <config.h>
#endif

/* Enable GNU extensions in fnmatch.h.  */
#ifndef _GNU_SOURCE
22
# define _GNU_SOURCE    1
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
#endif

#if ! defined __builtin_expect && __GNUC__ < 3
# define __builtin_expect(expr, expected) (expr)
#endif

#include <fnmatch.h>

#include <alloca.h>
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <stddef.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>

#define WIDE_CHAR_SUPPORT \
  (HAVE_WCTYPE_H && HAVE_BTOWC && HAVE_ISWCTYPE \
   && HAVE_WMEMCHR && (HAVE_WMEMCPY || HAVE_WMEMPCPY))

44
/* For platform which support the ISO C amendment 1 functionality we
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
   support user defined character classes.  */
#if defined _LIBC || WIDE_CHAR_SUPPORT
# include <wctype.h>
# include <wchar.h>
#endif

/* We need some of the locale data (the collation sequence information)
   but there is no interface to get this information in general.  Therefore
   we support a correct implementation only in glibc.  */
#ifdef _LIBC
# include "../locale/localeinfo.h"
# include "../locale/elem-hash.h"
# include "../locale/coll-lookup.h"
# include <shlib-compat.h>

# define CONCAT(a,b) __CONCAT(a,b)
# define mbsrtowcs __mbsrtowcs
# define fnmatch __fnmatch
extern int fnmatch (const char *pattern, const char *string, int flags);
#endif

#ifndef SIZE_MAX
# define SIZE_MAX ((size_t) -1)
#endif

/* We often have to test for FNM_FILE_NAME and FNM_PERIOD being both set.  */
#define NO_LEADING_PERIOD(flags) \
  ((flags & (FNM_FILE_NAME | FNM_PERIOD)) == (FNM_FILE_NAME | FNM_PERIOD))

/* Comment out all this code if we are using the GNU C Library, and are not
   actually compiling the library itself, and have not detected a bug
   in the library.  This code is part of the GNU C
   Library, but also included in many other GNU distributions.  Compiling
   and linking in this code is a waste when using the GNU C library
   (especially if it is a shared library).  Rather than having every GNU
80
   program understand 'configure --with-gnu-libc' and omit the object files,
81 82 83 84 85 86 87 88 89
   it is simpler to just do this in the source for each such file.  */

#if defined _LIBC || !defined __GNU_LIBRARY__ || !HAVE_FNMATCH_GNU


# if ! (defined isblank || (HAVE_ISBLANK && HAVE_DECL_ISBLANK))
#  define isblank(c) ((c) == ' ' || (c) == '\t')
# endif

90
# define STREQ(s1, s2) (strcmp (s1, s2) == 0)
91 92 93

# if defined _LIBC || WIDE_CHAR_SUPPORT
/* The GNU C library provides support for user-defined character classes
94
   and the functions from ISO C amendment 1.  */
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
#  ifdef CHARCLASS_NAME_MAX
#   define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
#  else
/* This shouldn't happen but some implementation might still have this
   problem.  Use a reasonable default value.  */
#   define CHAR_CLASS_MAX_LENGTH 256
#  endif

#  ifdef _LIBC
#   define IS_CHAR_CLASS(string) __wctype (string)
#  else
#   define IS_CHAR_CLASS(string) wctype (string)
#  endif

#  ifdef _LIBC
110
#   define ISWCTYPE(WC, WT)     __iswctype (WC, WT)
111
#  else
112
#   define ISWCTYPE(WC, WT)     iswctype (WC, WT)
113 114 115 116
#  endif

#  if (HAVE_MBSTATE_T && HAVE_MBSRTOWCS) || _LIBC
/* In this case we are implementing the multibyte character handling.  */
117
#   define HANDLE_MULTIBYTE     1
118 119 120
#  endif

# else
121
#  define CHAR_CLASS_MAX_LENGTH  6 /* Namely, 'xdigit'.  */
122

123 124 125 126 127 128
#  define IS_CHAR_CLASS(string)                                               \
   (STREQ (string, "alpha") || STREQ (string, "upper")                        \
    || STREQ (string, "lower") || STREQ (string, "digit")                     \
    || STREQ (string, "alnum") || STREQ (string, "xdigit")                    \
    || STREQ (string, "space") || STREQ (string, "print")                     \
    || STREQ (string, "punct") || STREQ (string, "graph")                     \
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
    || STREQ (string, "cntrl") || STREQ (string, "blank"))
# endif

/* Avoid depending on library functions or files
   whose names are inconsistent.  */

/* Global variable.  */
static int posixly_correct;

# ifndef internal_function
/* Inside GNU libc we mark some function in a special way.  In other
   environments simply ignore the marking.  */
#  define internal_function
# endif

/* Note that this evaluates C many times.  */
# define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c))
146 147 148 149 150 151 152
# define CHAR   char
# define UCHAR  unsigned char
# define INT    int
# define FCT    internal_fnmatch
# define EXT    ext_match
# define END    end_pattern
# define L_(CS) CS
153
# ifdef _LIBC
154
#  define BTOWC(C)      __btowc (C)
155
# else
156
#  define BTOWC(C)      btowc (C)
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
# endif
# define STRLEN(S) strlen (S)
# define STRCAT(D, S) strcat (D, S)
# ifdef _LIBC
#  define MEMPCPY(D, S, N) __mempcpy (D, S, N)
# else
#  if HAVE_MEMPCPY
#   define MEMPCPY(D, S, N) mempcpy (D, S, N)
#  else
#   define MEMPCPY(D, S, N) ((void *) ((char *) memcpy (D, S, N) + (N)))
#  endif
# endif
# define MEMCHR(S, C, N) memchr (S, C, N)
# include "fnmatch_loop.c"


# if HANDLE_MULTIBYTE
#  define FOLD(c) ((flags & FNM_CASEFOLD) ? towlower (c) : (c))
175 176 177 178 179 180 181 182
#  define CHAR  wchar_t
#  define UCHAR wint_t
#  define INT   wint_t
#  define FCT   internal_fnwmatch
#  define EXT   ext_wmatch
#  define END   end_wpattern
#  define L_(CS)        L##CS
#  define BTOWC(C)      (C)
183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
#  ifdef _LIBC
#   define STRLEN(S) __wcslen (S)
#   define STRCAT(D, S) __wcscat (D, S)
#   define MEMPCPY(D, S, N) __wmempcpy (D, S, N)
#  else
#   define STRLEN(S) wcslen (S)
#   define STRCAT(D, S) wcscat (D, S)
#   if HAVE_WMEMPCPY
#    define MEMPCPY(D, S, N) wmempcpy (D, S, N)
#   else
#    define MEMPCPY(D, S, N) (wmemcpy (D, S, N) + (N))
#   endif
#  endif
#  define MEMCHR(S, C, N) wmemchr (S, C, N)
#  define WIDE_CHAR_VERSION 1

#  undef IS_CHAR_CLASS
/* We have to convert the wide character string in a multibyte string.  But
   we know that the character class names consist of alphanumeric characters
   from the portable character set, and since the wide character encoding
   for a member of the portable character set is the same code point as
   its single-byte encoding, we can use a simplified method to convert the
   string to a multibyte character string.  */
static wctype_t
is_char_class (const wchar_t *wcs)
{
  char s[CHAR_CLASS_MAX_LENGTH + 1];
  char *cp = s;

  do
    {
      /* Test for a printable character from the portable character set.  */
#  ifdef _LIBC
      if (*wcs < 0x20 || *wcs > 0x7e
217 218
          || *wcs == 0x24 || *wcs == 0x40 || *wcs == 0x60)
        return (wctype_t) 0;
219 220
#  else
      switch (*wcs)
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
        {
        case L' ': case L'!': case L'"': case L'#': case L'%':
        case L'&': case L'\'': case L'(': case L')': case L'*':
        case L'+': case L',': case L'-': case L'.': case L'/':
        case L'0': case L'1': case L'2': case L'3': case L'4':
        case L'5': case L'6': case L'7': case L'8': case L'9':
        case L':': case L';': case L'<': case L'=': case L'>':
        case L'?':
        case L'A': case L'B': case L'C': case L'D': case L'E':
        case L'F': case L'G': case L'H': case L'I': case L'J':
        case L'K': case L'L': case L'M': case L'N': case L'O':
        case L'P': case L'Q': case L'R': case L'S': case L'T':
        case L'U': case L'V': case L'W': case L'X': case L'Y':
        case L'Z':
        case L'[': case L'\\': case L']': case L'^': case L'_':
        case L'a': case L'b': case L'c': case L'd': case L'e':
        case L'f': case L'g': case L'h': case L'i': case L'j':
        case L'k': case L'l': case L'm': case L'n': case L'o':
        case L'p': case L'q': case L'r': case L's': case L't':
        case L'u': case L'v': case L'w': case L'x': case L'y':
        case L'z': case L'{': case L'|': case L'}': case L'~':
          break;
        default:
          return (wctype_t) 0;
        }
246 247 248 249
#  endif

      /* Avoid overrunning the buffer.  */
      if (cp == s + CHAR_CLASS_MAX_LENGTH)
250
        return (wctype_t) 0;
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285

      *cp++ = (char) *wcs++;
    }
  while (*wcs != L'\0');

  *cp = '\0';

#  ifdef _LIBC
  return __wctype (s);
#  else
  return wctype (s);
#  endif
}
#  define IS_CHAR_CLASS(string) is_char_class (string)

#  include "fnmatch_loop.c"
# endif


int
fnmatch (const char *pattern, const char *string, int flags)
{
# if HANDLE_MULTIBYTE
#  define ALLOCA_LIMIT 2000
  if (__builtin_expect (MB_CUR_MAX, 1) != 1)
    {
      mbstate_t ps;
      size_t patsize;
      size_t strsize;
      size_t totsize;
      wchar_t *wpattern;
      wchar_t *wstring;
      int res;

      /* Calculate the size needed to convert the strings to
286
         wide characters.  */
287 288 289
      memset (&ps, '\0', sizeof (ps));
      patsize = mbsrtowcs (NULL, &pattern, 0, &ps) + 1;
      if (__builtin_expect (patsize != 0, 1))
290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331
        {
          assert (mbsinit (&ps));
          strsize = mbsrtowcs (NULL, &string, 0, &ps) + 1;
          if (__builtin_expect (strsize != 0, 1))
            {
              assert (mbsinit (&ps));
              totsize = patsize + strsize;
              if (__builtin_expect (! (patsize <= totsize
                                       && totsize <= SIZE_MAX / sizeof (wchar_t)),
                                    0))
                {
                  errno = ENOMEM;
                  return -1;
                }

              /* Allocate room for the wide characters.  */
              if (__builtin_expect (totsize < ALLOCA_LIMIT, 1))
                wpattern = (wchar_t *) alloca (totsize * sizeof (wchar_t));
              else
                {
                  wpattern = malloc (totsize * sizeof (wchar_t));
                  if (__builtin_expect (! wpattern, 0))
                    {
                      errno = ENOMEM;
                      return -1;
                    }
                }
              wstring = wpattern + patsize;

              /* Convert the strings into wide characters.  */
              mbsrtowcs (wpattern, &pattern, patsize, &ps);
              assert (mbsinit (&ps));
              mbsrtowcs (wstring, &string, strsize, &ps);

              res = internal_fnwmatch (wpattern, wstring, wstring + strsize - 1,
                                       flags & FNM_PERIOD, flags);

              if (__builtin_expect (! (totsize < ALLOCA_LIMIT), 0))
                free (wpattern);
              return res;
            }
        }
332 333 334 335 336
    }

# endif /* HANDLE_MULTIBYTE */

  return internal_fnmatch (pattern, string, string + strlen (string),
337
                           flags & FNM_PERIOD, flags);
338 339 340 341 342 343 344 345 346 347 348 349
}

# ifdef _LIBC
#  undef fnmatch
versioned_symbol (libc, __fnmatch, fnmatch, GLIBC_2_2_3);
#  if SHLIB_COMPAT(libc, GLIBC_2_0, GLIBC_2_2_3)
strong_alias (__fnmatch, __fnmatch_old)
compat_symbol (libc, __fnmatch_old, fnmatch, GLIBC_2_0);
#  endif
libc_hidden_ver (__fnmatch, fnmatch)
# endif

350
#endif  /* _LIBC or not __GNU_LIBRARY__.  */