NeoMutt  2021-02-05-89-gabe350
Teaching an old dog new tricks
DOXYGEN
regex.c
Go to the documentation of this file.
1 
30 #include "config.h"
31 #include <ctype.h>
32 #include <stdbool.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include "buffer.h"
38 #include "logging.h"
39 #include "mbyte.h"
40 #include "memory.h"
41 #include "message.h"
42 #include "queue.h"
43 #include "regex3.h"
44 #include "string2.h"
45 
53 struct Regex *mutt_regex_compile(const char *str, uint16_t flags)
54 {
55  if (!str || (*str == '\0'))
56  return NULL;
57  struct Regex *rx = mutt_mem_calloc(1, sizeof(struct Regex));
58  rx->pattern = mutt_str_dup(str);
59  rx->regex = mutt_mem_calloc(1, sizeof(regex_t));
60  if (REG_COMP(rx->regex, str, flags) != 0)
61  mutt_regex_free(&rx);
62 
63  return rx;
64 }
65 
74 struct Regex *mutt_regex_new(const char *str, uint32_t flags, struct Buffer *err)
75 {
76  if (!str || (*str == '\0'))
77  return NULL;
78 
79  uint16_t rflags = 0;
80  struct Regex *reg = mutt_mem_calloc(1, sizeof(struct Regex));
81 
82  reg->regex = mutt_mem_calloc(1, sizeof(regex_t));
83  reg->pattern = mutt_str_dup(str);
84 
85  /* Should we use smart case matching? */
86  if (((flags & DT_REGEX_MATCH_CASE) == 0) && mutt_mb_is_lower(str))
87  rflags |= REG_ICASE;
88 
89  /* Is a prefix of '!' allowed? */
90  if (((flags & DT_REGEX_ALLOW_NOT) != 0) && (str[0] == '!'))
91  {
92  reg->pat_not = true;
93  str++;
94  }
95 
96  int rc = REG_COMP(reg->regex, str, rflags);
97  if ((rc != 0) && err)
98  {
99  regerror(rc, reg->regex, err->data, err->dsize);
100  mutt_regex_free(&reg);
101  return NULL;
102  }
103 
104  return reg;
105 }
106 
111 void mutt_regex_free(struct Regex **r)
112 {
113  if (!r || !*r)
114  return;
115 
116  FREE(&(*r)->pattern);
117  if ((*r)->regex)
118  regfree((*r)->regex);
119  FREE(&(*r)->regex);
120  FREE(r);
121 }
122 
132 int mutt_regexlist_add(struct RegexList *rl, const char *str, uint16_t flags,
133  struct Buffer *err)
134 {
135  if (!rl || !str || (*str == '\0'))
136  return 0;
137 
138  struct Regex *rx = mutt_regex_compile(str, flags);
139  if (!rx)
140  {
141  mutt_buffer_printf(err, "Bad regex: %s\n", str);
142  return -1;
143  }
144 
145  /* check to make sure the item is not already on this rl */
146  struct RegexNode *np = NULL;
147  STAILQ_FOREACH(np, rl, entries)
148  {
149  if (mutt_istr_equal(rx->pattern, np->regex->pattern))
150  break; /* already on the rl */
151  }
152 
153  if (np)
154  {
155  mutt_regex_free(&rx);
156  }
157  else
158  {
159  np = mutt_regexlist_new();
160  np->regex = rx;
161  STAILQ_INSERT_TAIL(rl, np, entries);
162  }
163 
164  return 0;
165 }
166 
171 void mutt_regexlist_free(struct RegexList *rl)
172 {
173  if (!rl)
174  return;
175 
176  struct RegexNode *np = NULL, *tmp = NULL;
177  STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
178  {
179  STAILQ_REMOVE(rl, np, RegexNode, entries);
180  mutt_regex_free(&np->regex);
181  FREE(&np);
182  }
183  STAILQ_INIT(rl);
184 }
185 
192 bool mutt_regexlist_match(struct RegexList *rl, const char *str)
193 {
194  if (!rl || !str)
195  return false;
196  struct RegexNode *np = NULL;
197  STAILQ_FOREACH(np, rl, entries)
198  {
199  if (mutt_regex_match(np->regex, str))
200  {
201  mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
202  return true;
203  }
204  }
205 
206  return false;
207 }
208 
214 {
215  return mutt_mem_calloc(1, sizeof(struct RegexNode));
216 }
217 
227 int mutt_regexlist_remove(struct RegexList *rl, const char *str)
228 {
229  if (!rl || !str)
230  return -1;
231 
232  if (mutt_str_equal("*", str))
233  {
234  mutt_regexlist_free(rl); /* "unCMD *" means delete all current entries */
235  return 0;
236  }
237 
238  int rc = -1;
239  struct RegexNode *np = NULL, *tmp = NULL;
240  STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
241  {
242  if (mutt_istr_equal(str, np->regex->pattern))
243  {
244  STAILQ_REMOVE(rl, np, RegexNode, entries);
245  mutt_regex_free(&np->regex);
246  FREE(&np);
247  rc = 0;
248  }
249  }
250 
251  return rc;
252 }
253 
263 int mutt_replacelist_add(struct ReplaceList *rl, const char *pat,
264  const char *templ, struct Buffer *err)
265 {
266  if (!rl || !pat || (*pat == '\0') || !templ)
267  return 0;
268 
269  struct Regex *rx = mutt_regex_compile(pat, REG_ICASE);
270  if (!rx)
271  {
272  if (err)
273  mutt_buffer_printf(err, _("Bad regex: %s"), pat);
274  return -1;
275  }
276 
277  /* check to make sure the item is not already on this rl */
278  struct Replace *np = NULL;
279  STAILQ_FOREACH(np, rl, entries)
280  {
281  if (mutt_istr_equal(rx->pattern, np->regex->pattern))
282  {
283  /* Already on the rl. Formerly we just skipped this case, but
284  * now we're supporting removals, which means we're supporting
285  * re-adds conceptually. So we probably want this to imply a
286  * removal, then do an add. We can achieve the removal by freeing
287  * the template, and leaving t pointed at the current item. */
288  FREE(&np->templ);
289  break;
290  }
291  }
292 
293  /* If np is set, it's pointing into an extant ReplaceList* that we want to
294  * update. Otherwise we want to make a new one to link at the rl's end. */
295  if (np)
296  {
297  mutt_regex_free(&rx);
298  }
299  else
300  {
301  np = mutt_replacelist_new();
302  np->regex = rx;
303  rx = NULL;
304  STAILQ_INSERT_TAIL(rl, np, entries);
305  }
306 
307  /* Now np is the Replace that we want to modify. It is prepared. */
308  np->templ = mutt_str_dup(templ);
309 
310  /* Find highest match number in template string */
311  np->nmatch = 0;
312  for (const char *p = templ; *p;)
313  {
314  if (*p == '%')
315  {
316  int n = 0;
317  if (mutt_str_atoi(++p, &n) < 0)
318  mutt_debug(LL_DEBUG2, "Invalid match number in replacelist: '%s'\n", p);
319  if (n > np->nmatch)
320  np->nmatch = n;
321  while (*p && isdigit((int) *p))
322  p++;
323  }
324  else
325  p++;
326  }
327 
328  if (np->nmatch > np->regex->regex->re_nsub)
329  {
330  if (err)
331  mutt_buffer_printf(err, "%s", _("Not enough subexpressions for template"));
332  mutt_replacelist_remove(rl, pat);
333  return -1;
334  }
335 
336  np->nmatch++; /* match 0 is always the whole expr */
337  return 0;
338 }
339 
353 char *mutt_replacelist_apply(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
354 {
355  static regmatch_t *pmatch = NULL;
356  static size_t nmatch = 0;
357  static char twinbuf[2][1024];
358  int switcher = 0;
359  char *p = NULL;
360  size_t cpysize, tlen;
361  char *src = NULL, *dst = NULL;
362 
363  if (buf && (buflen != 0))
364  buf[0] = '\0';
365 
366  if (!rl || !str || (*str == '\0') || (buf && (buflen == 0)))
367  return buf;
368 
369  twinbuf[0][0] = '\0';
370  twinbuf[1][0] = '\0';
371  src = twinbuf[switcher];
372  dst = src;
373 
374  mutt_str_copy(src, str, 1024);
375 
376  struct Replace *np = NULL;
377  STAILQ_FOREACH(np, rl, entries)
378  {
379  /* If this pattern needs more matches, expand pmatch. */
380  if (np->nmatch > nmatch)
381  {
382  mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
383  nmatch = np->nmatch;
384  }
385 
386  if (mutt_regex_capture(np->regex, src, np->nmatch, pmatch))
387  {
388  tlen = 0;
389  switcher ^= 1;
390  dst = twinbuf[switcher];
391 
392  mutt_debug(LL_DEBUG5, "%s matches %s\n", src, np->regex->pattern);
393 
394  /* Copy into other twinbuf with substitutions */
395  if (np->templ)
396  {
397  for (p = np->templ; *p && (tlen < 1023);)
398  {
399  if (*p == '%')
400  {
401  p++;
402  if (*p == 'L')
403  {
404  p++;
405  cpysize = MIN(pmatch[0].rm_so, 1023 - tlen);
406  strncpy(&dst[tlen], src, cpysize);
407  tlen += cpysize;
408  }
409  else if (*p == 'R')
410  {
411  p++;
412  cpysize = MIN(strlen(src) - pmatch[0].rm_eo, 1023 - tlen);
413  strncpy(&dst[tlen], &src[pmatch[0].rm_eo], cpysize);
414  tlen += cpysize;
415  }
416  else
417  {
418  long n = strtoul(p, &p, 10); /* get subst number */
419  while (isdigit((unsigned char) *p)) /* skip subst token */
420  p++;
421  for (int i = pmatch[n].rm_so; (i < pmatch[n].rm_eo) && (tlen < 1023); i++)
422  {
423  dst[tlen++] = src[i];
424  }
425  }
426  }
427  else
428  dst[tlen++] = *p++;
429  }
430  }
431  dst[tlen] = '\0';
432  mutt_debug(LL_DEBUG5, "subst %s\n", dst);
433  }
434  src = dst;
435  }
436 
437  if (buf)
438  mutt_str_copy(buf, dst, buflen);
439  else
440  buf = mutt_str_dup(dst);
441  return buf;
442 }
443 
448 void mutt_replacelist_free(struct ReplaceList *rl)
449 {
450  if (!rl)
451  return;
452 
453  struct Replace *np = NULL, *tmp = NULL;
454  STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
455  {
456  STAILQ_REMOVE(rl, np, Replace, entries);
457  mutt_regex_free(&np->regex);
458  FREE(&np->templ);
459  FREE(&np);
460  }
461 }
462 
476 bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
477 {
478  if (!rl || !buf || !str)
479  return false;
480 
481  static regmatch_t *pmatch = NULL;
482  static size_t nmatch = 0;
483  int tlen = 0;
484  char *p = NULL;
485 
486  struct Replace *np = NULL;
487  STAILQ_FOREACH(np, rl, entries)
488  {
489  /* If this pattern needs more matches, expand pmatch. */
490  if (np->nmatch > nmatch)
491  {
492  mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
493  nmatch = np->nmatch;
494  }
495 
496  /* Does this pattern match? */
497  if (mutt_regex_capture(np->regex, str, (size_t) np->nmatch, pmatch))
498  {
499  mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
500  mutt_debug(LL_DEBUG5, "%d subs\n", (int) np->regex->regex->re_nsub);
501 
502  /* Copy template into buf, with substitutions. */
503  for (p = np->templ; *p && (tlen < (buflen - 1));)
504  {
505  /* backreference to pattern match substring, eg. %1, %2, etc) */
506  if (*p == '%')
507  {
508  char *e = NULL; /* used as pointer to end of integer backreference in strtol() call */
509 
510  p++; /* skip over % char */
511  long n = strtol(p, &e, 10);
512  /* Ensure that the integer conversion succeeded (e!=p) and bounds check. The upper bound check
513  * should not strictly be necessary since add_to_spam_list() finds the largest value, and
514  * the static array above is always large enough based on that value. */
515  if ((e != p) && (n >= 0) && (n <= np->nmatch) && (pmatch[n].rm_so != -1))
516  {
517  /* copy as much of the substring match as will fit in the output buffer, saving space for
518  * the terminating nul char */
519  int idx;
520  for (idx = pmatch[n].rm_so;
521  (idx < pmatch[n].rm_eo) && (tlen < (buflen - 1)); idx++)
522  {
523  buf[tlen++] = str[idx];
524  }
525  }
526  p = e; /* skip over the parsed integer */
527  }
528  else
529  {
530  buf[tlen++] = *p++;
531  }
532  }
533  /* tlen should always be less than buflen except when buflen<=0
534  * because the bounds checks in the above code leave room for the
535  * terminal nul char. This should avoid returning an unterminated
536  * string to the caller. When buflen<=0 we make no assumption about
537  * the validity of the buf pointer. */
538  if (tlen < buflen)
539  {
540  buf[tlen] = '\0';
541  mutt_debug(LL_DEBUG5, "\"%s\"\n", buf);
542  }
543  return true;
544  }
545  }
546 
547  return false;
548 }
549 
555 {
556  return mutt_mem_calloc(1, sizeof(struct Replace));
557 }
558 
565 int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
566 {
567  if (!rl || !pat)
568  return 0;
569 
570  int nremoved = 0;
571  struct Replace *np = NULL, *tmp = NULL;
572  STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
573  {
574  if (mutt_str_equal(np->regex->pattern, pat))
575  {
576  STAILQ_REMOVE(rl, np, Replace, entries);
577  mutt_regex_free(&np->regex);
578  FREE(&np->templ);
579  FREE(&np);
580  nremoved++;
581  }
582  }
583 
584  return nremoved;
585 }
586 
595 bool mutt_regex_capture(const struct Regex *regex, const char *str,
596  size_t nmatch, regmatch_t matches[])
597 {
598  if (!regex || !str || !regex->regex)
599  return false;
600 
601  int rc = regexec(regex->regex, str, nmatch, matches, 0);
602  return ((rc == 0) ^ regex->pat_not);
603 }
604 
611 bool mutt_regex_match(const struct Regex *regex, const char *str)
612 {
613  return mutt_regex_capture(regex, str, 0, NULL);
614 }
DT_REGEX_ALLOW_NOT
#define DT_REGEX_ALLOW_NOT
Regex can begin with '!'.
Definition: regex3.h:37
STAILQ_INIT
#define STAILQ_INIT(head)
Definition: queue.h:369
Replace::nmatch
size_t nmatch
Match the 'nth' occurrence (0 means the whole expression)
Definition: regex3.h:112
mutt_replacelist_match
bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
Does a string match a pattern?
Definition: regex.c:476
mutt_regexlist_add
int mutt_regexlist_add(struct RegexList *rl, const char *str, uint16_t flags, struct Buffer *err)
Compile a regex string and add it to a list.
Definition: regex.c:132
mutt_replacelist_apply
char * mutt_replacelist_apply(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
Apply replacements to a buffer.
Definition: regex.c:353
Regex::pat_not
bool pat_not
do not match
Definition: regex3.h:93
mutt_mem_calloc
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
_
#define _(a)
Definition: message.h:28
Buffer
String manipulation buffer.
Definition: buffer.h:33
string2.h
STAILQ_REMOVE
#define STAILQ_REMOVE(head, elm, type, field)
Definition: queue.h:399
mutt_regexlist_new
struct RegexNode * mutt_regexlist_new(void)
Create a new RegexList.
Definition: regex.c:213
mutt_regexlist_remove
int mutt_regexlist_remove(struct RegexList *rl, const char *str)
Remove a Regex from a list.
Definition: regex.c:227
Regex
Cached regular expression.
Definition: regex3.h:89
mutt_str_dup
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition: string.c:370
FREE
#define FREE(x)
Definition: memory.h:40
mutt_regex_compile
struct Regex * mutt_regex_compile(const char *str, uint16_t flags)
Create an Regex from a string.
Definition: regex.c:53
Buffer::dsize
size_t dsize
Length of data.
Definition: buffer.h:37
mutt_regex_capture
bool mutt_regex_capture(const struct Regex *regex, const char *str, size_t nmatch, regmatch_t matches[])
match a regex against a string, with provided options
Definition: regex.c:595
LL_DEBUG5
@ LL_DEBUG5
Log at debug level 5.
Definition: logging.h:44
mutt_replacelist_add
int mutt_replacelist_add(struct ReplaceList *rl, const char *pat, const char *templ, struct Buffer *err)
Add a pattern and a template to a list.
Definition: regex.c:263
mutt_str_atoi
int mutt_str_atoi(const char *str, int *dst)
Convert ASCII string to an integer.
Definition: string.c:252
STAILQ_FOREACH
#define STAILQ_FOREACH(var, head, field)
Definition: queue.h:349
mutt_istr_equal
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:883
mutt_str_equal
bool mutt_str_equal(const char *a, const char *b)
Compare two strings.
Definition: string.c:871
mbyte.h
RegexNode
List of regular expressions.
Definition: regex3.h:99
mutt_regex_new
struct Regex * mutt_regex_new(const char *str, uint32_t flags, struct Buffer *err)
Create an Regex from a string.
Definition: regex.c:74
mutt_mem_realloc
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
mutt_debug
#define mutt_debug(LEVEL,...)
Definition: logging.h:81
queue.h
REG_COMP
#define REG_COMP(preg, regex, cflags)
Compile a regular expression.
Definition: regex3.h:54
mutt_regexlist_free
void mutt_regexlist_free(struct RegexList *rl)
Free a RegexList object.
Definition: regex.c:171
mutt_regex_match
bool mutt_regex_match(const struct Regex *regex, const char *str)
Shorthand to mutt_regex_capture()
Definition: regex.c:611
STAILQ_FOREACH_SAFE
#define STAILQ_FOREACH_SAFE(var, head, field, tvar)
Definition: queue.h:359
regex3.h
buffer.h
message.h
mutt_replacelist_remove
int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
Remove a pattern from a list.
Definition: regex.c:565
STAILQ_INSERT_TAIL
#define STAILQ_INSERT_TAIL(head, elm, field)
Definition: queue.h:386
mutt_mb_is_lower
bool mutt_mb_is_lower(const char *s)
Does a multi-byte string contain only lowercase characters?
Definition: mbyte.c:358
DT_REGEX_MATCH_CASE
#define DT_REGEX_MATCH_CASE
Case-sensitive matching.
Definition: regex3.h:36
logging.h
mutt_replacelist_new
struct Replace * mutt_replacelist_new(void)
Create a new ReplaceList.
Definition: regex.c:554
Regex::regex
regex_t * regex
compiled expression
Definition: regex3.h:92
RegexNode::regex
struct Regex * regex
Regex containing a regular expression.
Definition: regex3.h:101
Replace
List of regular expressions.
Definition: regex3.h:109
Regex::pattern
char * pattern
printable version
Definition: regex3.h:91
mutt_replacelist_free
void mutt_replacelist_free(struct ReplaceList *rl)
Free a ReplaceList object.
Definition: regex.c:448
Buffer::data
char * data
Pointer to data.
Definition: buffer.h:35
mutt_buffer_printf
int mutt_buffer_printf(struct Buffer *buf, const char *fmt,...)
Format a string overwriting a Buffer.
Definition: buffer.c:160
mutt_regexlist_match
bool mutt_regexlist_match(struct RegexList *rl, const char *str)
Does a string match any Regex in the list?
Definition: regex.c:192
Replace::regex
struct Regex * regex
Regex containing a regular expression.
Definition: regex3.h:111
memory.h
LL_DEBUG2
@ LL_DEBUG2
Log at debug level 2.
Definition: logging.h:41
Replace::templ
char * templ
Template to match.
Definition: regex3.h:113
mutt_regex_free
void mutt_regex_free(struct Regex **r)
Free a Regex object.
Definition: regex.c:111
idx
size_t idx
Definition: mailbox.c:234
MIN
#define MIN(a, b)
Definition: memory.h:31
mutt_str_copy
size_t mutt_str_copy(char *dest, const char *src, size_t dsize)
Copy a string into a buffer (guaranteeing NUL-termination)
Definition: string.c:716