Logo Search packages:      
Sourcecode: yafc version File versions  Download package

fnmatch.c

/* fnmatch.c -- ksh-like extended pattern matching for the shell and filename
            globbing. */

/* Copyright (C) 1991, 1997 Free Software Foundation, Inc.

   This file is part of GNU Bash, the Bourne Again SHell.
   
   Bash is free software; you can redistribute it and/or modify it under
   the terms of the GNU General Public License as published by the Free
   Software Foundation; either version 2, or (at your option) any later
   version.
              
   Bash is distributed in the hope that it will be useful, but WITHOUT ANY
   WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   for more details.
                         
   You should have received a copy of the GNU General Public License along
   with Bash; see the file COPYING.  If not, write to the Free Software
   Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */

#include <config.h>

#define EXTENDED_GLOB

#include <stdio.h>      /* for debugging */
                                
#include "fnmatch.h"
#include "collsyms.h"
#include <ctype.h>

#if defined (STDC_HEADERS) /* (HAVE_STRING_H)*/
#  include <string.h>
#else
#  include <strings.h>
#endif /* HAVE_STRING_H */

static int gmatch ();
static char *brackmatch ();
#ifdef EXTENDED_GLOB
static int extmatch ();
static char *patscan ();
#endif
  
#if !defined (isascii)
#  define isascii(c)    ((unsigned int)(c) <= 0177)
#endif

/* Note that these evaluate C many times.  */

#ifndef isblank
#  define isblank(c)    ((c) == ' ' || (c) == '\t')
#endif

#ifndef isgraph
#  define isgraph(c)    ((c) != ' ' && isprint((c)))
#endif

#ifndef isxdigit
#  define isxdigit(c)   (((c) >= '0' && (c) <= '9') || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))
#endif

/* The result of FOLD is an `unsigned char' */
# define FOLD(c) ((flags & FNM_CASEFOLD) && isupper ((unsigned char)c) \
      ? tolower ((unsigned char)c) \
      : ((unsigned char)c))

#ifndef STREQ
#define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
#define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
#endif

/* We don't use strcoll(3) for range comparisons in bracket expressions,
   even if we have it, since it can have unwanted side effects in locales
   other than POSIX or US.  For instance, in the de locale, [A-Z] matches
   all characters.  So, for ranges we use ASCII collation, and for
   collating symbol equivalence we use strcoll().  The casts to int are
   to handle tests that use unsigned chars. */

#define rangecmp(c1, c2)      ((int)(c1) - (int)(c2))

#if defined (HAVE_STRCOLL)
/* Helper function for collating symbol equivalence. */
static int rangecmp2 (c1, c2)
     int c1, c2;
{
  static char s1[2] = { ' ', '\0' };
  static char s2[2] = { ' ', '\0' };
  int ret;

  /* Eight bits only.  Period. */
  c1 &= 0xFF;
  c2 &= 0xFF;

  if (c1 == c2)
    return (0);

  s1[0] = c1;
  s2[0] = c2;

  if ((ret = strcoll (s1, s2)) != 0)
    return ret;
  return (c1 - c2);
}
#else /* !HAVE_STRCOLL */
#  define rangecmp2(c1, c2)   ((int)(c1) - (int)(c2))
#endif /* !HAVE_STRCOLL */

#if defined (HAVE_STRCOLL)
static int collequiv (c1, c2)
     int c1, c2;
{
  return (rangecmp2 (c1, c2) == 0);
}
#else
#  define collequiv(c1, c2)   ((c1) == (c2))
#endif

static int
collsym (s, len)
     char *s;
     int len;
{
  register struct _collsym *csp;

  for (csp = posix_collsyms; csp->name; csp++)
    {
      if (STREQN(csp->name, s, len) && csp->name[len] == '\0')
        return (csp->code);
    }
  if (len == 1)
    return s[0];
  return -1;
}

int
fnmatch (pattern, string, flags)
     const char *pattern;
     const char *string;
     int flags;
{
  const char *se, *pe;

  if (string == 0 || pattern == 0)
    return FNM_NOMATCH;

  se = string + strlen (string);
  pe = pattern + strlen (pattern);

  return (gmatch (string, se, pattern, pe, flags));
}

/* Match STRING against the filename pattern PATTERN, returning zero if
   it matches, FNM_NOMATCH if not.  */
static int
gmatch (string, se, pattern, pe, flags)
     char *string, *se;
     char *pattern, *pe;
     int flags;
{
  register char *p, *n;       /* pattern, string */
  register char c;            /* current pattern character */
  register char sc;           /* current string character */

  p = pattern;
  n = string;

  if (string == 0 || pattern == 0)
    return FNM_NOMATCH;

  while (p < pe)
    {
      c = *p++;
      c = FOLD (c);

      sc = n < se ? *n : '\0';

#ifdef EXTENDED_GLOB
      /* extmatch () will handle recursively calling gmatch, so we can
       just return what extmatch() returns. */
      if ((flags & FNM_EXTMATCH) && *p == '(' &&
        (c == '+' || c == '*' || c == '?' || c == '@' || c == '!')) /* ) */
      {
        int lflags;
        /* If we're not matching the start of the string, we're not
           concerned about the special cases for matching `.' */
        lflags = (n == string) ? flags : (flags & ~FNM_PERIOD);
        return (extmatch (c, n, se, p, pe, lflags));
      }
#endif

      switch (c)
      {
      case '?':         /* Match single character */
        if (sc == '\0')
          return FNM_NOMATCH;
        else if ((flags & FNM_PATHNAME) && sc == '/')
          /* If we are matching a pathname, `?' can never match a `/'. */
          return FNM_NOMATCH;
        else if ((flags & FNM_PERIOD) && sc == '.' &&
               (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
          /* `?' cannot match a `.' if it is the first character of the
             string or if it is the first character following a slash and
             we are matching a pathname. */
          return FNM_NOMATCH;
        break;

      case '\\':        /* backslash escape removes special meaning */
        if (p == pe)
          return FNM_NOMATCH;

        if ((flags & FNM_NOESCAPE) == 0)
          {
            c = *p++;
            /* A trailing `\' cannot match. */
            if (p > pe)
            return FNM_NOMATCH;
            c = FOLD (c);
          }
        if (FOLD (sc) != (unsigned char)c)
          return FNM_NOMATCH;
        break;

      case '*':         /* Match zero or more characters */
        if (p == pe)
          return 0;
        
        if ((flags & FNM_PERIOD) && sc == '.' &&
            (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
          /* `*' cannot match a `.' if it is the first character of the
             string or if it is the first character following a slash and
             we are matching a pathname. */
          return FNM_NOMATCH;

        /* Collapse multiple consecutive, `*' and `?', but make sure that
           one character of the string is consumed for each `?'. */
        for (c = *p++; (c == '?' || c == '*'); c = *p++)
          {
            if ((flags & FNM_PATHNAME) && sc == '/')
            /* A slash does not match a wildcard under FNM_PATHNAME. */
            return FNM_NOMATCH;
            else if (c == '?')
            {
              if (sc == '\0')
                return FNM_NOMATCH;
              /* One character of the string is consumed in matching
                 this ? wildcard, so *??? won't match if there are
                 fewer than three characters. */
              n++;
              sc = n < se ? *n : '\0';
            }

#ifdef EXTENDED_GLOB
            /* Handle ******(patlist) */
            if ((flags & FNM_EXTMATCH) && c == '*' && *p == '(')  /*)*/
            {
              char *newn;
              /* We need to check whether or not the extended glob
                 pattern matches the remainder of the string.
                 If it does, we match the entire pattern. */
              for (newn = n; newn < se; ++newn)
                {
                  if (extmatch (c, newn, se, p, pe, flags) == 0)
                  return (0);
                }
              /* We didn't match the extended glob pattern, but
                 that's OK, since we can match 0 or more occurrences.
                 We need to skip the glob pattern and see if we
                 match the rest of the string. */
              newn = patscan (p + 1, pe, 0);
              p = newn;
            }
#endif
            if (p == pe)
              break;
          }

        /* If we've hit the end of the pattern and the last character of
           the pattern was handled by the loop above, we've succeeded.
           Otherwise, we need to match that last character. */
        if (p == pe && (c == '?' || c == '*'))
          return (0);

        /* General case, use recursion. */
        {
          unsigned char c1;

          c1 = (unsigned char)((flags & FNM_NOESCAPE) == 0 && c == '\\') ? *p : c;
          c1 = FOLD (c1);
          for (--p; n < se; ++n)
            {
            /* Only call fnmatch if the first character indicates a
               possible match.  We can check the first character if
               we're not doing an extended glob match. */
            if ((flags & FNM_EXTMATCH) == 0 && c != '[' && FOLD (*n) != c1)
              continue;

            /* If we're doing an extended glob match and the pattern is not
               one of the extended glob patterns, we can check the first
               character. */
            if ((flags & FNM_EXTMATCH) && p[1] != '(' && /*)*/
                strchr ("?*+@!", *p) == 0 && c != '[' && FOLD (*n) != c1)
              continue;

            /* Otherwise, we just recurse. */
            if (gmatch (n, se, p, pe, flags & ~FNM_PERIOD) == 0)
              return (0);
            }
          return FNM_NOMATCH;
        }

      case '[':
        {
          if (sc == '\0' || n == se)
            return FNM_NOMATCH;

          /* A character class cannot match a `.' if it is the first
             character of the string or if it is the first character
             following a slash and we are matching a pathname. */
          if ((flags & FNM_PERIOD) && sc == '.' &&
            (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
            return (FNM_NOMATCH);

          p = brackmatch (p, sc, flags);
          if (p == 0)
            return FNM_NOMATCH;
        }
        break;

      default:
        if ((unsigned char)c != FOLD (sc))
          return (FNM_NOMATCH);
      }

      ++n;
    }

  if (n == se)
    return (0);

  if ((flags & FNM_LEADING_DIR) && *n == '/')
    /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
    return 0;
          
  return (FNM_NOMATCH);
}

/* Parse a bracket expression collating symbol ([.sym.]) starting at P, find
   the value of the symbol, and move P past the collating symbol expression.
   The value is returned in *VP, if VP is not null. */
static char *
parse_collsym (p, vp)
     char *p;
     int *vp;
{
  register int pc;
  int val;

  p++;                        /* move past the `.' */
          
  for (pc = 0; p[pc]; pc++)
    if (p[pc] == '.' && p[pc+1] == ']')
      break;
   val = collsym (p, pc);
   if (vp)
     *vp = val;
   return (p + pc + 2);
}

static char *
brackmatch (p, test, flags)
     char *p;
     unsigned char test;
     int flags;
{
  register char cstart, cend, c;
  register int not;    /* Nonzero if the sense of the character class is inverted.  */
  int pc, brcnt;
  char *savep;

  test = FOLD (test);

  savep = p;

  /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the
     circumflex (`^') in its role in a `nonmatching list'.  A bracket
     expression starging with an unquoted circumflex character produces
     unspecified results.  This implementation treats the two identically. */
  if ((not = (*p == '!' || *p == '^')) != 0)
    ++p;

  c = *p++;
  for (;;)
    {
      /* Initialize cstart and cend in case `-' is the last
       character of the pattern. */
      cstart = cend = c;

      /* POSIX.2 equivalence class:  [=c=].  See POSIX.2 2.8.3.2.  Find
       the end of the equivalence class, move the pattern pointer past
       it, and check for equivalence.  XXX - this handles only
       single-character equivalence classes, which is wrong, or at
       least incomplete. */
      if (c == '[' && *p == '=' && p[2] == '=' && p[3] == ']')
      {
        pc = FOLD (p[1]);
        p += 4;
        if (collequiv (test, pc))
          goto matched;
        else
          {
            c = *p++;
            if (c == '\0')
            return ((test == '[') ? savep : (char *)0);
            c = FOLD (c);
            continue;
          }
        }

      /* POSIX.2 character class expression.  See POSIX.2 2.8.3.2. */
      if (c == '[' && *p == ':')
      {
        pc = 0;   /* make sure invalid char classes don't match. */
        if (STREQN (p+1, "alnum:]", 7))
          { pc = isalnum (test); p += 8; }
        else if (STREQN (p+1, "alpha:]", 7))
          { pc = isalpha (test); p += 8; }
        else if (STREQN (p+1, "blank:]", 7))
          { pc = isblank (test); p += 8; }
        else if (STREQN (p+1, "cntrl:]", 7))
          { pc = iscntrl (test); p += 8; }
        else if (STREQN (p+1, "digit:]", 7))
          { pc = isdigit (test); p += 8; }
        else if (STREQN (p+1, "graph:]", 7))
          { pc = isgraph (test); p += 8; }
        else if (STREQN (p+1, "lower:]", 7))
          { pc = islower (test); p += 8; }
        else if (STREQN (p+1, "print:]", 7))
          { pc = isprint (test); p += 8; }
        else if (STREQN (p+1, "punct:]", 7))
          { pc = ispunct (test); p += 8; }
        else if (STREQN (p+1, "space:]", 7))
          { pc = isspace (test); p += 8; }
        else if (STREQN (p+1, "upper:]", 7))
          { pc = isupper (test); p += 8; }
        else if (STREQN (p+1, "xdigit:]", 8))
          { pc = isxdigit (test); p += 9; }
        else if (STREQN (p+1, "ascii:]", 7))
          { pc = isascii (test); p += 8; }
        if (pc)
            goto matched;
        else
          {
            /* continue the loop here, since this expression can't be
             the first part of a range expression. */
            c = *p++;
            if (c == '\0')
            return ((test == '[') ? savep : (char *)0);
            else if (c == ']')
              break;
            c = FOLD (c);
            continue;
          }
      }
 
      /* POSIX.2 collating symbols.  See POSIX.2 2.8.3.2.  Find the end of
       the symbol name, make sure it is terminated by `.]', translate
       the name to a character using the external table, and do the
       comparison. */
      if (c == '[' && *p == '.')
      {
        p = parse_collsym (p, &pc);
        /* An invalid collating symbol cannot be the first point of a
           range.  If it is, we set cstart to one greater than `test',
           so any comparisons later will fail. */
        cstart = (pc == -1) ? test + 1 : pc;
      }

      if (!(flags & FNM_NOESCAPE) && c == '\\')
      {
        if (*p == '\0')
          return (char *)0;
        cstart = cend = *p++;
      }

      cstart = cend = FOLD (cstart);

      /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that
       is not preceded by a backslash and is not part of a bracket
       expression produces undefined results.'  This implementation
       treats the `[' as just a character to be matched if there is
       not a closing `]'. */
      if (c == '\0')
      return ((test == '[') ? savep : (char *)0);

      c = *p++;
      c = FOLD (c);

      if ((flags & FNM_PATHNAME) && c == '/')
      /* [/] can never match when matching a pathname.  */
      return (char *)0;

      /* This introduces a range, unless the `-' is the last
       character of the class.  Find the end of the range
       and move past it. */
      if (c == '-' && *p != ']')
      {
        cend = *p++;
        if (!(flags & FNM_NOESCAPE) && cend == '\\')
          cend = *p++;
        if (cend == '\0')
          return (char *)0;
        if (cend == '[' && *p == '.')
          {
            p = parse_collsym (p, &pc);
            /* An invalid collating symbol cannot be the second part of a
             range expression.  If we get one, we set cend to one fewer
             than the test character to make sure the range test fails. */
            cend = (pc == -1) ? test - 1 : pc;
          }
        cend = FOLD (cend);

        c = *p++;

        /* POSIX.2 2.8.3.2:  ``The ending range point shall collate
           equal to or higher than the starting range point; otherwise
           the expression shall be treated as invalid.''  Note that this
           applies to only the range expression; the rest of the bracket
           expression is still checked for matches. */
        if (rangecmp (cstart, cend) > 0)
          {
            if (c == ']')
              break;
            c = FOLD (c);
            continue;
          }
      }

      if (rangecmp (test, cstart) >= 0 && rangecmp (test, cend) <= 0)
      goto matched;

      if (c == ']')
      break;
    }
  /* No match. */
  return (!not ? (char *)0 : p);

matched:
  /* Skip the rest of the [...] that already matched.  */
  brcnt = (c != ']') + (c == '[' && (*p == '=' || *p == ':' || *p == '.'));
  while (brcnt > 0)
    {
      /* A `[' without a matching `]' is just another character to match. */
      if (c == '\0')
      return ((test == '[') ? savep : (char *)0);

      c = *p++;
      if (c == '[' && (*p == '=' || *p == ':' || *p == '.'))
        brcnt++;
      else if (c == ']')
        brcnt--;
      else if (!(flags & FNM_NOESCAPE) && c == '\\')
      {
        if (*p == '\0')
          return (char *)0;
        /* XXX 1003.2d11 is unclear if this is right. */
        ++p;
      }
    }
  return (not ? (char *)0 : p);
}

#if defined (EXTENDED_GLOB)
/* ksh-like extended pattern matching:

      [?*+@!](pat-list)

   where pat-list is a list of one or patterns separated by `|'.  Operation
   is as follows:

      ?(patlist)  match zero or one of the given patterns
      *(patlist)  match zero or more of the given patterns
      +(patlist)  match one or more of the given patterns
      @(patlist)  match exactly one of the given patterns
      !(patlist)  match anything except one of the given patterns
*/

/* Scan a pattern starting at STRING and ending at END, keeping track of
   embedded () and [].  If DELIM is 0, we scan until a matching `)'
   because we're scanning a `patlist'.  Otherwise, we scan until we see
   DELIM.  In all cases, we never scan past END.  The return value is the
   first character after the matching DELIM. */
static char *
patscan (string, end, delim)
     char *string, *end;
     int delim;
{
  int pnest, bnest;
  char *s, c;

  pnest = bnest = 0;
  for (s = string; (c = *s) != 0; s++)
    {
      if (s >= end)
        return (s);
      switch (c)
      {
      case '\0':
        return ((char *)0);
      case '[':
        bnest++;
        break;
      case ']':
        if (bnest)
          bnest--;
        break;
      case '(':
        if (bnest == 0)
          pnest++;
        break;
      case ')':
#if 0
        if (bnest == 0)
          pnest--;
        if (pnest <= 0)
          return ++s;
#else
        if (bnest == 0 && pnest-- <= 0)
          return ++s;
#endif
        break;
      case '|':
        if (bnest == 0 && pnest == 0 && delim == '|')
          return ++s;
        break;
      }
    }

  return (char *)0;
}

/* Return 0 if dequoted pattern matches S in the current locale. */
static int
strcompare (p, pe, s, se)
     char *p, *pe, *s, *se;
{
  int ret;
  char c1, c2;

  c1 = *pe;
  c2 = *se;

  *pe = *se = '\0';
#if defined (HAVE_STRCOLL)
  ret = strcoll (p, s);
#else
  ret = strcmp (p, s);
#endif

  *pe = c1;
  *se = c2;

  return (ret == 0 ? ret : FNM_NOMATCH);
}

/* Match a ksh extended pattern specifier.  Return FNM_NOMATCH on failure or
   0 on success.  This is handed the entire rest of the pattern and string
   the first time an extended pattern specifier is encountered, so it calls
   gmatch recursively. */
static int
extmatch (xc, s, se, p, pe, flags)
     int xc;            /* select which operation */
     char *s, *se;
     char *p, *pe;
     int flags;
{
  char *prest;                /* pointer to rest of pattern */
  char *psub;                 /* pointer to sub-pattern */
  char *pnext;                /* pointer to next sub-pattern */
  char *srest;                /* pointer to rest of string */
  int m1, m2=0;

#if 0
fprintf(stderr, "extmatch: xc = %c\n", xc);
fprintf(stderr, "extmatch: s = %s; se = %s\n", s, se);
fprintf(stderr, "extmatch: p = %s; pe = %s\n", p, pe);
#endif

  prest = patscan (p + (*p == '('), pe, 0); /* ) */
  if (prest == 0)
    /* If PREST is 0, we failed to scan a valid pattern.  In this
       case, we just want to compare the two as strings. */
    return (strcompare (p - 1, pe, s, se));

  switch (xc)
    {
    case '+':                 /* match one or more occurrences */
    case '*':                 /* match zero or more occurrences */
      /* If we can get away with no matches, don't even bother.  Just
       call gmatch on the rest of the pattern and return success if
       it succeeds. */
      if (xc == '*' && (gmatch (s, se, prest, pe, flags) == 0))
      return 0;

      /* OK, we have to do this the hard way.  First, we make sure one of
         the subpatterns matches, then we try to match the rest of the
         string. */
      for (psub = p + 1; ; psub = pnext)
      {
        pnext = patscan (psub, pe, '|');
        for (srest = s; srest <= se; srest++)
          {
            /* Match this substring (S -> SREST) against this
             subpattern (psub -> pnext - 1) */
            m1 = gmatch (s, srest, psub, pnext - 1, flags) == 0;
            /* OK, we matched a subpattern, so make sure the rest of the
             string matches the rest of the pattern.  Also handle
             multiple matches of the pattern. */
            if (m1)
            m2 = (gmatch (srest, se, prest, pe, flags) == 0) ||
                  (s != srest && gmatch (srest, se, p - 1, pe, flags) == 0);
            if (m1 && m2)
              return (0);
          }
        if (pnext == prest)
          break;
      }
      return (FNM_NOMATCH);

    case '?':           /* match zero or one of the patterns */
    case '@':           /* match exactly one of the patterns */
      /* If we can get away with no matches, don't even bother.  Just
       call gmatch on the rest of the pattern and return success if
       it succeeds. */
      if (xc == '?' && (gmatch (s, se, prest, pe, flags) == 0))
      return 0;

      /* OK, we have to do this the hard way.  First, we see if one of
       the subpatterns matches, then, if it does, we try to match the
       rest of the string. */
      for (psub = p + 1; ; psub = pnext)
      {
        pnext = patscan (psub, pe, '|');
        srest = (prest == pe) ? se : s;
        for ( ; srest <= se; srest++)
          {
            if (gmatch (s, srest, psub, pnext - 1, flags) == 0 &&
              gmatch (srest, se, prest, pe, flags) == 0)
            return (0);
          }
        if (pnext == prest)
          break;
      }
      return (FNM_NOMATCH);

    case '!':           /* match anything *except* one of the patterns */
      for (srest = s; srest <= se; srest++)
      {
        m1 = 0;
        for (psub = p + 1; ; psub = pnext)
          {
            pnext = patscan (psub, pe, '|');
            /* If one of the patterns matches, just bail immediately. */
            if ((m1 = (gmatch (s, srest, psub, pnext - 1, flags) == 0)) != 0)
            break;
            if (pnext == prest)
            break;
          }
        if (m1 == 0 && gmatch (srest, se, prest, pe, flags) == 0)
          return (0);   
      }
      return (FNM_NOMATCH);
    }

  return (FNM_NOMATCH);
}
#endif /* EXTENDED_GLOB */

#ifdef TEST_FNMATCH
main (c, v)
     int c;
     char **v;
{
  char *string, *pat;

  string = v[1];
  pat = v[2];

  if (fnmatch (pat, string, 0) == 0)
    {
      printf ("%s matches %s\n", string, pat);
      exit (0);
    }
  else
    {
      printf ("%s does not match %s\n", string, pat);
      exit (1);
    }
}
#endif

Generated by  Doxygen 1.6.0   Back to index