[muscle] Re: regex library used in MUSCLE

  • From: Jeremy Friesner <jfriesne@xxxxxxxxx>
  • To: muscle@xxxxxxxxxxxxx
  • Date: Mon, 16 Mar 2015 19:04:10 -0700

Hi Mika,

Thanks for the patch… I’m tempted to just go find the most recent version of 
the regex library and upgrade MUSCLE to use that, so that any other bugs might 
get fixed at the same time…. and also so that MUSCLE can continue to use an 
official distribution, which will be more likely than a hand-patched build to 
get some support if I e.g. need to ask for help on a mailing list.  What do you 
see as the potential down side to doing that?

-Jeremy

> On Mar 16, 2015, at 7:02 PM, Mika Lindqvist <linki@xxxxxxx> wrote:
> 
> I noticed there was security bulletin regarding the regex library used in 
> MUSCLE... I made a patch that also includes some fixes for 64-bit MSVC 
> environment. I know there is also a lot newer version of same regex library 
> but I didn't think it would be a wise thing to skip so much forward...
> 
> Index: regcomp.c
> ===================================================================
> --- regcomp.c    (revision 1080)
> +++ regcomp.c    (working copy)
> @@ -94,6 +94,7 @@
>     register struct parse *p = &pa;
>     register int i;
>     register size_t len;
> +    register size_t maxlen;
> #ifdef REDEBUG
> #    define    GOODFLAGS(f)    (f)
> #else
> @@ -116,7 +117,23 @@
>                             (NC-1)*sizeof(cat_t));
>     if (g == NULL)
>         return(REG_ESPACE);
> +    /*
> +     * Limit the pattern space to avoid a 32-bit overflow on buffer
> +     * extension.  Also avoid any signed overflow in case of conversion
> +     * so make the real limit based on a 31-bit overflow.
> +     *
> +     * Likely not applicable on 64-bit systems but handle the case
> +     * generically (who are we to stop people from using ~715MB+
> +     * patterns?).
> +     */
> +    maxlen = ((size_t)-1 >> 1) / sizeof(sop) * 2 / 3;
> +    if (len >= maxlen) {
> +        free((char *)g);
> +        return(REG_ESPACE);
> +    }
>     p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;    /* ugh */
> +    assert(p->ssize >= len);
> +
>     p->strip = (sop *)malloc(p->ssize * sizeof(sop));
>     p->slen = 0;
>     if (p->strip == NULL) {
> @@ -795,7 +812,11 @@
> {
>     register char *sp = p->next;
>     register struct cname *cp;
> +#ifdef _M_AMD64
> +    register __int64 len;
> +#else
>     register int len;
> +#endif
> 
>     while (MORE() && !SEETWO(endc, ']'))
>         NEXT();
> Index: regex.h
> ===================================================================
> --- regex.h    (revision 1080)
> +++ regex.h    (working copy)
> @@ -29,7 +29,11 @@
> #endif
> 
> /* === regex2.h === */
> +#ifdef _M_AMD64
> +typedef __int64 regoff_t;
> +#else
> typedef off_t regoff_t;
> +#endif
> typedef struct {
>     int re_magic;
>     size_t re_nsub;        /* number of parenthesized subexpressions */
> Index: regex2.h
> ===================================================================
> --- regex2.h    (revision 1080)
> +++ regex2.h    (working copy)
> @@ -36,8 +36,13 @@
>  * In state representations, an operator's bit is on to signify a state
>  * immediately *preceding* "execution" of that operator.
>  */
> +#ifdef _M_AMD64
> +typedef __int64 sop;        /* strip operator */
> +typedef __int64 sopno;
> +#else
> typedef long sop;        /* strip operator */
> typedef long sopno;
> +#endif
> #define    OPRMASK    0x7c000000
> #define    OPDMASK    0x03ffffff
> #define    OPSHIFT    (26)
> @@ -121,7 +126,11 @@
>     int ncategories;    /* how many character categories */
>     cat_t *categories;    /* ->catspace[-CHAR_MIN] */
>     char *must;        /* match must contain this string */
> +#ifdef _M_AMD64
> +    __int64 mlen;        /* length of must */
> +#else
>     int mlen;        /* length of must */
> +#endif
>     size_t nsub;        /* copy of re_nsub */
>     int backrefs;        /* does it use back references? */
>     sopno nplus;        /* how deep does it nest +s? */
> Index: regexec.c
> ===================================================================
> --- regexec.c    (revision 1080)
> +++ regexec.c    (working copy)
> @@ -80,7 +80,11 @@
>                 (m)->vn = 0; }
> #define    STATETEARDOWN(m)    { free((m)->space); }
> #define    SETUP(v)    ((v) = &m->space[m->vn++ * m->g->nstates])
> +#ifdef _M_AMD64
> +#define    onestate    __int64
> +#else
> #define    onestate    int
> +#endif
> #define    INIT(o, n)    ((o) = (n))
> #define    INC(o)    ((o)++)
> #define    ISSTATEIN(v, o)    ((v)[o])
> 
> 


Other related posts: