NeoMutt  2021-10-29-220-g2b1eec
Teaching an old dog new tricks
DOXYGEN
regex.c
Go to the documentation of this file.
1 
30 #include "config.h"
31 #include <ctype.h>
32 #include <errno.h>
33 #include <stdbool.h>
34 #include <stdint.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include "atoi.h"
39 #include "buffer.h"
40 #include "logging.h"
41 #include "mbyte.h"
42 #include "memory.h"
43 #include "message.h"
44 #include "queue.h"
45 #include "regex3.h"
46 #include "string2.h"
47 
55 struct Regex *mutt_regex_compile(const char *str, uint16_t flags)
56 {
57  if (!str || (*str == '\0'))
58  return NULL;
59  struct Regex *rx = mutt_mem_calloc(1, sizeof(struct Regex));
60  rx->pattern = mutt_str_dup(str);
61  rx->regex = mutt_mem_calloc(1, sizeof(regex_t));
62  if (REG_COMP(rx->regex, str, flags) != 0)
63  mutt_regex_free(&rx);
64 
65  return rx;
66 }
67 
76 struct Regex *mutt_regex_new(const char *str, uint32_t flags, struct Buffer *err)
77 {
78  if (!str || (*str == '\0'))
79  return NULL;
80 
81  uint16_t rflags = 0;
82  struct Regex *reg = mutt_mem_calloc(1, sizeof(struct Regex));
83 
84  reg->regex = mutt_mem_calloc(1, sizeof(regex_t));
85  reg->pattern = mutt_str_dup(str);
86 
87  /* Should we use smart case matching? */
88  if (((flags & DT_REGEX_MATCH_CASE) == 0) && mutt_mb_is_lower(str))
89  rflags |= REG_ICASE;
90 
91  /* Is a prefix of '!' allowed? */
92  if (((flags & DT_REGEX_ALLOW_NOT) != 0) && (str[0] == '!'))
93  {
94  reg->pat_not = true;
95  str++;
96  }
97 
98  int rc = REG_COMP(reg->regex, str, rflags);
99  if ((rc != 0) && err)
100  {
101  regerror(rc, reg->regex, err->data, err->dsize);
102  mutt_regex_free(&reg);
103  return NULL;
104  }
105 
106  return reg;
107 }
108 
113 void mutt_regex_free(struct Regex **r)
114 {
115  if (!r || !*r)
116  return;
117 
118  FREE(&(*r)->pattern);
119  if ((*r)->regex)
120  regfree((*r)->regex);
121  FREE(&(*r)->regex);
122  FREE(r);
123 }
124 
134 int mutt_regexlist_add(struct RegexList *rl, const char *str, uint16_t flags,
135  struct Buffer *err)
136 {
137  if (!rl || !str || (*str == '\0'))
138  return 0;
139 
140  struct Regex *rx = mutt_regex_compile(str, flags);
141  if (!rx)
142  {
143  mutt_buffer_printf(err, "Bad regex: %s\n", str);
144  return -1;
145  }
146 
147  /* check to make sure the item is not already on this rl */
148  struct RegexNode *np = NULL;
149  STAILQ_FOREACH(np, rl, entries)
150  {
151  if (mutt_istr_equal(rx->pattern, np->regex->pattern))
152  break; /* already on the rl */
153  }
154 
155  if (np)
156  {
157  mutt_regex_free(&rx);
158  }
159  else
160  {
161  np = mutt_regexlist_new();
162  np->regex = rx;
163  STAILQ_INSERT_TAIL(rl, np, entries);
164  }
165 
166  return 0;
167 }
168 
173 void mutt_regexlist_free(struct RegexList *rl)
174 {
175  if (!rl)
176  return;
177 
178  struct RegexNode *np = NULL, *tmp = NULL;
179  STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
180  {
181  STAILQ_REMOVE(rl, np, RegexNode, entries);
182  mutt_regex_free(&np->regex);
183  FREE(&np);
184  }
185  STAILQ_INIT(rl);
186 }
187 
194 bool mutt_regexlist_match(struct RegexList *rl, const char *str)
195 {
196  if (!rl || !str)
197  return false;
198  struct RegexNode *np = NULL;
199  STAILQ_FOREACH(np, rl, entries)
200  {
201  if (mutt_regex_match(np->regex, str))
202  {
203  mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
204  return true;
205  }
206  }
207 
208  return false;
209 }
210 
216 {
217  return mutt_mem_calloc(1, sizeof(struct RegexNode));
218 }
219 
229 int mutt_regexlist_remove(struct RegexList *rl, const char *str)
230 {
231  if (!rl || !str)
232  return -1;
233 
234  if (mutt_str_equal("*", str))
235  {
236  mutt_regexlist_free(rl); /* "unCMD *" means delete all current entries */
237  return 0;
238  }
239 
240  int rc = -1;
241  struct RegexNode *np = NULL, *tmp = NULL;
242  STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
243  {
244  if (mutt_istr_equal(str, np->regex->pattern))
245  {
246  STAILQ_REMOVE(rl, np, RegexNode, entries);
247  mutt_regex_free(&np->regex);
248  FREE(&np);
249  rc = 0;
250  }
251  }
252 
253  return rc;
254 }
255 
265 int mutt_replacelist_add(struct ReplaceList *rl, const char *pat,
266  const char *templ, struct Buffer *err)
267 {
268  if (!rl || !pat || (*pat == '\0') || !templ)
269  return 0;
270 
271  struct Regex *rx = mutt_regex_compile(pat, REG_ICASE);
272  if (!rx)
273  {
274  if (err)
275  mutt_buffer_printf(err, _("Bad regex: %s"), pat);
276  return -1;
277  }
278 
279  /* check to make sure the item is not already on this rl */
280  struct Replace *np = NULL;
281  STAILQ_FOREACH(np, rl, entries)
282  {
283  if (mutt_istr_equal(rx->pattern, np->regex->pattern))
284  {
285  /* Already on the rl. Formerly we just skipped this case, but
286  * now we're supporting removals, which means we're supporting
287  * re-adds conceptually. So we probably want this to imply a
288  * removal, then do an add. We can achieve the removal by freeing
289  * the template, and leaving t pointed at the current item. */
290  FREE(&np->templ);
291  break;
292  }
293  }
294 
295  /* If np is set, it's pointing into an extant ReplaceList* that we want to
296  * update. Otherwise we want to make a new one to link at the rl's end. */
297  if (np)
298  {
299  mutt_regex_free(&rx);
300  }
301  else
302  {
303  np = mutt_replacelist_new();
304  np->regex = rx;
305  rx = NULL;
306  STAILQ_INSERT_TAIL(rl, np, entries);
307  }
308 
309  /* Now np is the Replace that we want to modify. It is prepared. */
310  np->templ = mutt_str_dup(templ);
311 
312  /* Find highest match number in template string */
313  np->nmatch = 0;
314  for (const char *p = templ; *p;)
315  {
316  if (*p == '%')
317  {
318  int n = 0;
319  const char *end = mutt_str_atoi(++p, &n);
320  if (!end)
321  {
322  // this is not an error, we might have matched %R or %L in subjectrx
323  mutt_debug(LL_DEBUG2, "Invalid match number in replacelist: '%s'\n", p);
324  }
325  if (n > np->nmatch)
326  {
327  np->nmatch = n;
328  }
329  if (end)
330  {
331  p = end;
332  }
333  else
334  {
335  p++;
336  }
337  }
338  else
339  p++;
340  }
341 
342  if (np->nmatch > np->regex->regex->re_nsub)
343  {
344  if (err)
345  mutt_buffer_printf(err, "%s", _("Not enough subexpressions for template"));
346  mutt_replacelist_remove(rl, pat);
347  return -1;
348  }
349 
350  np->nmatch++; /* match 0 is always the whole expr */
351  return 0;
352 }
353 
367 char *mutt_replacelist_apply(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
368 {
369  static regmatch_t *pmatch = NULL;
370  static size_t nmatch = 0;
371  static char twinbuf[2][1024];
372  int switcher = 0;
373  char *p = NULL;
374  size_t cpysize, tlen;
375  char *src = NULL, *dst = NULL;
376 
377  if (buf && (buflen != 0))
378  buf[0] = '\0';
379 
380  if (!rl || !str || (*str == '\0') || (buf && (buflen == 0)))
381  return buf;
382 
383  twinbuf[0][0] = '\0';
384  twinbuf[1][0] = '\0';
385  src = twinbuf[switcher];
386  dst = src;
387 
388  mutt_str_copy(src, str, 1024);
389 
390  struct Replace *np = NULL;
391  STAILQ_FOREACH(np, rl, entries)
392  {
393  /* If this pattern needs more matches, expand pmatch. */
394  if (np->nmatch > nmatch)
395  {
396  mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
397  nmatch = np->nmatch;
398  }
399 
400  if (mutt_regex_capture(np->regex, src, np->nmatch, pmatch))
401  {
402  tlen = 0;
403  switcher ^= 1;
404  dst = twinbuf[switcher];
405 
406  mutt_debug(LL_DEBUG5, "%s matches %s\n", src, np->regex->pattern);
407 
408  /* Copy into other twinbuf with substitutions */
409  if (np->templ)
410  {
411  for (p = np->templ; *p && (tlen < 1023);)
412  {
413  if (*p == '%')
414  {
415  p++;
416  if (*p == 'L')
417  {
418  p++;
419  cpysize = MIN(pmatch[0].rm_so, 1023 - tlen);
420  strncpy(&dst[tlen], src, cpysize);
421  tlen += cpysize;
422  }
423  else if (*p == 'R')
424  {
425  p++;
426  cpysize = MIN(strlen(src) - pmatch[0].rm_eo, 1023 - tlen);
427  strncpy(&dst[tlen], &src[pmatch[0].rm_eo], cpysize);
428  tlen += cpysize;
429  }
430  else
431  {
432  long n = strtoul(p, &p, 10); /* get subst number */
433  if (n < np->nmatch)
434  {
435  while (isdigit((unsigned char) *p)) /* skip subst token */
436  p++;
437  for (int i = pmatch[n].rm_so; (i < pmatch[n].rm_eo) && (tlen < 1023); i++)
438  {
439  dst[tlen++] = src[i];
440  }
441  }
442  }
443  }
444  else
445  dst[tlen++] = *p++;
446  }
447  }
448  dst[tlen] = '\0';
449  mutt_debug(LL_DEBUG5, "subst %s\n", dst);
450  }
451  src = dst;
452  }
453 
454  if (buf)
455  mutt_str_copy(buf, dst, buflen);
456  else
457  buf = mutt_str_dup(dst);
458  return buf;
459 }
460 
465 void mutt_replacelist_free(struct ReplaceList *rl)
466 {
467  if (!rl)
468  return;
469 
470  struct Replace *np = NULL, *tmp = NULL;
471  STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
472  {
473  STAILQ_REMOVE(rl, np, Replace, entries);
474  mutt_regex_free(&np->regex);
475  FREE(&np->templ);
476  FREE(&np);
477  }
478 }
479 
493 bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
494 {
495  if (!rl || !buf || !str)
496  return false;
497 
498  static regmatch_t *pmatch = NULL;
499  static size_t nmatch = 0;
500  int tlen = 0;
501  char *p = NULL;
502 
503  struct Replace *np = NULL;
504  STAILQ_FOREACH(np, rl, entries)
505  {
506  /* If this pattern needs more matches, expand pmatch. */
507  if (np->nmatch > nmatch)
508  {
509  mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
510  nmatch = np->nmatch;
511  }
512 
513  /* Does this pattern match? */
514  if (mutt_regex_capture(np->regex, str, (size_t) np->nmatch, pmatch))
515  {
516  mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
517  mutt_debug(LL_DEBUG5, "%d subs\n", (int) np->regex->regex->re_nsub);
518 
519  /* Copy template into buf, with substitutions. */
520  for (p = np->templ; *p && (tlen < (buflen - 1));)
521  {
522  /* backreference to pattern match substring, eg. %1, %2, etc) */
523  if (*p == '%')
524  {
525  char *e = NULL; /* used as pointer to end of integer backreference in strtol() call */
526 
527  p++; /* skip over % char */
528  long n = strtol(p, &e, 10);
529  /* Ensure that the integer conversion succeeded (e!=p) and bounds check. The upper bound check
530  * should not strictly be necessary since add_to_spam_list() finds the largest value, and
531  * the static array above is always large enough based on that value. */
532  if ((e != p) && (n >= 0) && (n < np->nmatch) && (pmatch[n].rm_so != -1))
533  {
534  /* copy as much of the substring match as will fit in the output buffer, saving space for
535  * the terminating nul char */
536  for (int idx = pmatch[n].rm_so;
537  (idx < pmatch[n].rm_eo) && (tlen < (buflen - 1)); idx++)
538  {
539  buf[tlen++] = str[idx];
540  }
541  }
542  p = e; /* skip over the parsed integer */
543  }
544  else
545  {
546  buf[tlen++] = *p++;
547  }
548  }
549  /* tlen should always be less than buflen except when buflen<=0
550  * because the bounds checks in the above code leave room for the
551  * terminal nul char. This should avoid returning an unterminated
552  * string to the caller. When buflen<=0 we make no assumption about
553  * the validity of the buf pointer. */
554  if (tlen < buflen)
555  {
556  buf[tlen] = '\0';
557  mutt_debug(LL_DEBUG5, "\"%s\"\n", buf);
558  }
559  return true;
560  }
561  }
562 
563  return false;
564 }
565 
571 {
572  return mutt_mem_calloc(1, sizeof(struct Replace));
573 }
574 
581 int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
582 {
583  if (!rl || !pat)
584  return 0;
585 
586  int nremoved = 0;
587  struct Replace *np = NULL, *tmp = NULL;
588  STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
589  {
590  if (mutt_str_equal(np->regex->pattern, pat))
591  {
592  STAILQ_REMOVE(rl, np, Replace, entries);
593  mutt_regex_free(&np->regex);
594  FREE(&np->templ);
595  FREE(&np);
596  nremoved++;
597  }
598  }
599 
600  return nremoved;
601 }
602 
612 bool mutt_regex_capture(const struct Regex *regex, const char *str,
613  size_t nmatch, regmatch_t matches[])
614 {
615  if (!regex || !str || !regex->regex)
616  return false;
617 
618  int rc = regexec(regex->regex, str, nmatch, matches, 0);
619  return ((rc == 0) ^ regex->pat_not);
620 }
621 
629 bool mutt_regex_match(const struct Regex *regex, const char *str)
630 {
631  return mutt_regex_capture(regex, str, 0, NULL);
632 }
const char * mutt_str_atoi(const char *str, int *dst)
Convert ASCII string to an integer.
Definition: atoi.c:178
Parse a number in a string.
int mutt_buffer_printf(struct Buffer *buf, const char *fmt,...)
Format a string overwriting a Buffer.
Definition: buffer.c:160
General purpose object for storing and parsing strings.
#define mutt_debug(LEVEL,...)
Definition: logging.h:84
Logging Dispatcher.
@ LL_DEBUG5
Log at debug level 5.
Definition: logging.h:44
@ LL_DEBUG2
Log at debug level 2.
Definition: logging.h:41
bool mutt_mb_is_lower(const char *s)
Does a multi-byte string contain only lowercase characters?
Definition: mbyte.c:355
Multi-byte String manipulation functions.
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
Memory management wrappers.
#define FREE(x)
Definition: memory.h:40
#define MIN(a, b)
Definition: memory.h:31
Message logging.
#define _(a)
Definition: message.h:28
int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
Remove a pattern from a list.
Definition: regex.c:581
void mutt_regexlist_free(struct RegexList *rl)
Free a RegexList object.
Definition: regex.c:173
int mutt_regexlist_add(struct RegexList *rl, const char *str, uint16_t flags, struct Buffer *err)
Compile a regex string and add it to a list.
Definition: regex.c:134
bool mutt_regex_capture(const struct Regex *regex, const char *str, size_t nmatch, regmatch_t matches[])
Match a regex against a string, with provided options.
Definition: regex.c:612
void mutt_replacelist_free(struct ReplaceList *rl)
Free a ReplaceList object.
Definition: regex.c:465
struct Regex * mutt_regex_new(const char *str, uint32_t flags, struct Buffer *err)
Create an Regex from a string.
Definition: regex.c:76
int mutt_regexlist_remove(struct RegexList *rl, const char *str)
Remove a Regex from a list.
Definition: regex.c:229
bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
Does a string match a pattern?
Definition: regex.c:493
int mutt_replacelist_add(struct ReplaceList *rl, const char *pat, const char *templ, struct Buffer *err)
Add a pattern and a template to a list.
Definition: regex.c:265
void mutt_regex_free(struct Regex **r)
Free a Regex object.
Definition: regex.c:113
bool mutt_regexlist_match(struct RegexList *rl, const char *str)
Does a string match any Regex in the list?
Definition: regex.c:194
struct Replace * mutt_replacelist_new(void)
Create a new ReplaceList.
Definition: regex.c:570
struct RegexNode * mutt_regexlist_new(void)
Create a new RegexList.
Definition: regex.c:215
bool mutt_regex_match(const struct Regex *regex, const char *str)
Shorthand to mutt_regex_capture()
Definition: regex.c:629
char * mutt_replacelist_apply(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
Apply replacements to a buffer.
Definition: regex.c:367
struct Regex * mutt_regex_compile(const char *str, uint16_t flags)
Create an Regex from a string.
Definition: regex.c:55
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:727
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition: string.c:181
bool mutt_str_equal(const char *a, const char *b)
Compare two strings.
Definition: string.c:715
size_t mutt_str_copy(char *dest, const char *src, size_t dsize)
Copy a string into a buffer (guaranteeing NUL-termination)
Definition: string.c:560
#define STAILQ_REMOVE(head, elm, type, field)
Definition: queue.h:402
#define STAILQ_INIT(head)
Definition: queue.h:372
#define STAILQ_FOREACH(var, head, field)
Definition: queue.h:352
#define STAILQ_INSERT_TAIL(head, elm, field)
Definition: queue.h:389
#define STAILQ_FOREACH_SAFE(var, head, field, tvar)
Definition: queue.h:362
Manage regular expressions.
#define DT_REGEX_ALLOW_NOT
Regex can begin with '!'.
Definition: regex3.h:37
#define DT_REGEX_MATCH_CASE
Case-sensitive matching.
Definition: regex3.h:36
#define REG_COMP(preg, regex, cflags)
Compile a regular expression.
Definition: regex3.h:54
String manipulation functions.
String manipulation buffer.
Definition: buffer.h:34
size_t dsize
Length of data.
Definition: buffer.h:37
char * data
Pointer to data.
Definition: buffer.h:35
List of regular expressions.
Definition: regex3.h:100
struct Regex * regex
Regex containing a regular expression.
Definition: regex3.h:101
Cached regular expression.
Definition: regex3.h:90
char * pattern
printable version
Definition: regex3.h:91
bool pat_not
do not match
Definition: regex3.h:93
regex_t * regex
compiled expression
Definition: regex3.h:92
List of regular expressions.
Definition: regex3.h:110
char * templ
Template to match.
Definition: regex3.h:113
size_t nmatch
Match the 'nth' occurrence (0 means the whole expression)
Definition: regex3.h:112
struct Regex * regex
Regex containing a regular expression.
Definition: regex3.h:111