Hi Mika, Thanks for the patch… I’m tempted to just go find the most recent version of the regex library and upgrade MUSCLE to use that, so that any other bugs might get fixed at the same time…. and also so that MUSCLE can continue to use an official distribution, which will be more likely than a hand-patched build to get some support if I e.g. need to ask for help on a mailing list. What do you see as the potential down side to doing that? -Jeremy > On Mar 16, 2015, at 7:02 PM, Mika Lindqvist <linki@xxxxxxx> wrote: > > I noticed there was security bulletin regarding the regex library used in > MUSCLE... I made a patch that also includes some fixes for 64-bit MSVC > environment. I know there is also a lot newer version of same regex library > but I didn't think it would be a wise thing to skip so much forward... > > Index: regcomp.c > =================================================================== > --- regcomp.c (revision 1080) > +++ regcomp.c (working copy) > @@ -94,6 +94,7 @@ > register struct parse *p = &pa; > register int i; > register size_t len; > + register size_t maxlen; > #ifdef REDEBUG > # define GOODFLAGS(f) (f) > #else > @@ -116,7 +117,23 @@ > (NC-1)*sizeof(cat_t)); > if (g == NULL) > return(REG_ESPACE); > + /* > + * Limit the pattern space to avoid a 32-bit overflow on buffer > + * extension. Also avoid any signed overflow in case of conversion > + * so make the real limit based on a 31-bit overflow. > + * > + * Likely not applicable on 64-bit systems but handle the case > + * generically (who are we to stop people from using ~715MB+ > + * patterns?). > + */ > + maxlen = ((size_t)-1 >> 1) / sizeof(sop) * 2 / 3; > + if (len >= maxlen) { > + free((char *)g); > + return(REG_ESPACE); > + } > p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ > + assert(p->ssize >= len); > + > p->strip = (sop *)malloc(p->ssize * sizeof(sop)); > p->slen = 0; > if (p->strip == NULL) { > @@ -795,7 +812,11 @@ > { > register char *sp = p->next; > register struct cname *cp; > +#ifdef _M_AMD64 > + register __int64 len; > +#else > register int len; > +#endif > > while (MORE() && !SEETWO(endc, ']')) > NEXT(); > Index: regex.h > =================================================================== > --- regex.h (revision 1080) > +++ regex.h (working copy) > @@ -29,7 +29,11 @@ > #endif > > /* === regex2.h === */ > +#ifdef _M_AMD64 > +typedef __int64 regoff_t; > +#else > typedef off_t regoff_t; > +#endif > typedef struct { > int re_magic; > size_t re_nsub; /* number of parenthesized subexpressions */ > Index: regex2.h > =================================================================== > --- regex2.h (revision 1080) > +++ regex2.h (working copy) > @@ -36,8 +36,13 @@ > * In state representations, an operator's bit is on to signify a state > * immediately *preceding* "execution" of that operator. > */ > +#ifdef _M_AMD64 > +typedef __int64 sop; /* strip operator */ > +typedef __int64 sopno; > +#else > typedef long sop; /* strip operator */ > typedef long sopno; > +#endif > #define OPRMASK 0x7c000000 > #define OPDMASK 0x03ffffff > #define OPSHIFT (26) > @@ -121,7 +126,11 @@ > int ncategories; /* how many character categories */ > cat_t *categories; /* ->catspace[-CHAR_MIN] */ > char *must; /* match must contain this string */ > +#ifdef _M_AMD64 > + __int64 mlen; /* length of must */ > +#else > int mlen; /* length of must */ > +#endif > size_t nsub; /* copy of re_nsub */ > int backrefs; /* does it use back references? */ > sopno nplus; /* how deep does it nest +s? */ > Index: regexec.c > =================================================================== > --- regexec.c (revision 1080) > +++ regexec.c (working copy) > @@ -80,7 +80,11 @@ > (m)->vn = 0; } > #define STATETEARDOWN(m) { free((m)->space); } > #define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) > +#ifdef _M_AMD64 > +#define onestate __int64 > +#else > #define onestate int > +#endif > #define INIT(o, n) ((o) = (n)) > #define INC(o) ((o)++) > #define ISSTATEIN(v, o) ((v)[o]) > >