NeoMutt  2022-04-29-145-g9b6a0e
Teaching an old dog new tricks
DOXYGEN
regex.c
Go to the documentation of this file.
1 
30 #include "config.h"
31 #include <ctype.h>
32 #include <regex.h>
33 #include <stdbool.h>
34 #include <stdint.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include "atoi.h"
39 #include "buffer.h"
40 #include "logging.h"
41 #include "mbyte.h"
42 #include "memory.h"
43 #include "message.h"
44 #include "queue.h"
45 #include "regex3.h"
46 #include "string2.h"
47 
55 struct Regex *mutt_regex_compile(const char *str, uint16_t flags)
56 {
57  if (!str || (*str == '\0'))
58  return NULL;
59  struct Regex *rx = mutt_mem_calloc(1, sizeof(struct Regex));
60  rx->pattern = mutt_str_dup(str);
61  rx->regex = mutt_mem_calloc(1, sizeof(regex_t));
62  if (REG_COMP(rx->regex, str, flags) != 0)
63  mutt_regex_free(&rx);
64 
65  return rx;
66 }
67 
76 struct Regex *mutt_regex_new(const char *str, uint32_t flags, struct Buffer *err)
77 {
78  if (!str || (*str == '\0'))
79  return NULL;
80 
81  uint16_t rflags = 0;
82  struct Regex *reg = mutt_mem_calloc(1, sizeof(struct Regex));
83 
84  reg->regex = mutt_mem_calloc(1, sizeof(regex_t));
85  reg->pattern = mutt_str_dup(str);
86 
87  /* Should we use smart case matching? */
88  if (((flags & DT_REGEX_MATCH_CASE) == 0) && mutt_mb_is_lower(str))
89  rflags |= REG_ICASE;
90 
91  /* Is a prefix of '!' allowed? */
92  if (((flags & DT_REGEX_ALLOW_NOT) != 0) && (str[0] == '!'))
93  {
94  reg->pat_not = true;
95  str++;
96  }
97 
98  int rc = REG_COMP(reg->regex, str, rflags);
99  if (rc != 0)
100  {
101  if (err)
102  regerror(rc, reg->regex, err->data, err->dsize);
103  mutt_regex_free(&reg);
104  return NULL;
105  }
106 
107  return reg;
108 }
109 
114 void mutt_regex_free(struct Regex **r)
115 {
116  if (!r || !*r)
117  return;
118 
119  FREE(&(*r)->pattern);
120  if ((*r)->regex)
121  regfree((*r)->regex);
122  FREE(&(*r)->regex);
123  FREE(r);
124 }
125 
135 int mutt_regexlist_add(struct RegexList *rl, const char *str, uint16_t flags,
136  struct Buffer *err)
137 {
138  if (!rl || !str || (*str == '\0'))
139  return 0;
140 
141  struct Regex *rx = mutt_regex_compile(str, flags);
142  if (!rx)
143  {
144  mutt_buffer_printf(err, "Bad regex: %s\n", str);
145  return -1;
146  }
147 
148  /* check to make sure the item is not already on this rl */
149  struct RegexNode *np = NULL;
150  STAILQ_FOREACH(np, rl, entries)
151  {
152  if (mutt_istr_equal(rx->pattern, np->regex->pattern))
153  break; /* already on the rl */
154  }
155 
156  if (np)
157  {
158  mutt_regex_free(&rx);
159  }
160  else
161  {
162  np = mutt_regexlist_new();
163  np->regex = rx;
164  STAILQ_INSERT_TAIL(rl, np, entries);
165  }
166 
167  return 0;
168 }
169 
174 void mutt_regexlist_free(struct RegexList *rl)
175 {
176  if (!rl)
177  return;
178 
179  struct RegexNode *np = NULL, *tmp = NULL;
180  STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
181  {
182  STAILQ_REMOVE(rl, np, RegexNode, entries);
183  mutt_regex_free(&np->regex);
184  FREE(&np);
185  }
186  STAILQ_INIT(rl);
187 }
188 
195 bool mutt_regexlist_match(struct RegexList *rl, const char *str)
196 {
197  if (!rl || !str)
198  return false;
199  struct RegexNode *np = NULL;
200  STAILQ_FOREACH(np, rl, entries)
201  {
202  if (mutt_regex_match(np->regex, str))
203  {
204  mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
205  return true;
206  }
207  }
208 
209  return false;
210 }
211 
217 {
218  return mutt_mem_calloc(1, sizeof(struct RegexNode));
219 }
220 
230 int mutt_regexlist_remove(struct RegexList *rl, const char *str)
231 {
232  if (!rl || !str)
233  return -1;
234 
235  if (mutt_str_equal("*", str))
236  {
237  mutt_regexlist_free(rl); /* "unCMD *" means delete all current entries */
238  return 0;
239  }
240 
241  int rc = -1;
242  struct RegexNode *np = NULL, *tmp = NULL;
243  STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
244  {
245  if (mutt_istr_equal(str, np->regex->pattern))
246  {
247  STAILQ_REMOVE(rl, np, RegexNode, entries);
248  mutt_regex_free(&np->regex);
249  FREE(&np);
250  rc = 0;
251  }
252  }
253 
254  return rc;
255 }
256 
266 int mutt_replacelist_add(struct ReplaceList *rl, const char *pat,
267  const char *templ, struct Buffer *err)
268 {
269  if (!rl || !pat || (*pat == '\0') || !templ)
270  return 0;
271 
272  struct Regex *rx = mutt_regex_compile(pat, REG_ICASE);
273  if (!rx)
274  {
275  if (err)
276  mutt_buffer_printf(err, _("Bad regex: %s"), pat);
277  return -1;
278  }
279 
280  /* check to make sure the item is not already on this rl */
281  struct Replace *np = NULL;
282  STAILQ_FOREACH(np, rl, entries)
283  {
284  if (mutt_istr_equal(rx->pattern, np->regex->pattern))
285  {
286  /* Already on the rl. Formerly we just skipped this case, but
287  * now we're supporting removals, which means we're supporting
288  * re-adds conceptually. So we probably want this to imply a
289  * removal, then do an add. We can achieve the removal by freeing
290  * the template, and leaving t pointed at the current item. */
291  FREE(&np->templ);
292  break;
293  }
294  }
295 
296  /* If np is set, it's pointing into an extant ReplaceList* that we want to
297  * update. Otherwise we want to make a new one to link at the rl's end. */
298  if (np)
299  {
300  mutt_regex_free(&rx);
301  }
302  else
303  {
304  np = mutt_replacelist_new();
305  np->regex = rx;
306  rx = NULL;
307  STAILQ_INSERT_TAIL(rl, np, entries);
308  }
309 
310  /* Now np is the Replace that we want to modify. It is prepared. */
311  np->templ = mutt_str_dup(templ);
312 
313  /* Find highest match number in template string */
314  np->nmatch = 0;
315  for (const char *p = templ; *p;)
316  {
317  if (*p == '%')
318  {
319  int n = 0;
320  const char *end = mutt_str_atoi(++p, &n);
321  if (!end)
322  {
323  // this is not an error, we might have matched %R or %L in subjectrx
324  mutt_debug(LL_DEBUG2, "Invalid match number in replacelist: '%s'\n", p);
325  }
326  if (n > np->nmatch)
327  {
328  np->nmatch = n;
329  }
330  if (end)
331  {
332  p = end;
333  }
334  else
335  {
336  p++;
337  }
338  }
339  else
340  p++;
341  }
342 
343  if (np->nmatch > np->regex->regex->re_nsub)
344  {
345  if (err)
346  mutt_buffer_printf(err, "%s", _("Not enough subexpressions for template"));
347  mutt_replacelist_remove(rl, pat);
348  return -1;
349  }
350 
351  np->nmatch++; /* match 0 is always the whole expr */
352  return 0;
353 }
354 
368 char *mutt_replacelist_apply(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
369 {
370  static regmatch_t *pmatch = NULL;
371  static size_t nmatch = 0;
372  static char twinbuf[2][1024];
373  int switcher = 0;
374  char *p = NULL;
375  size_t cpysize, tlen;
376  char *src = NULL, *dst = NULL;
377 
378  if (buf && (buflen != 0))
379  buf[0] = '\0';
380 
381  if (!rl || !str || (*str == '\0') || (buf && (buflen == 0)))
382  return buf;
383 
384  twinbuf[0][0] = '\0';
385  twinbuf[1][0] = '\0';
386  src = twinbuf[switcher];
387  dst = src;
388 
389  mutt_str_copy(src, str, sizeof(*twinbuf));
390 
391  struct Replace *np = NULL;
392  STAILQ_FOREACH(np, rl, entries)
393  {
394  /* If this pattern needs more matches, expand pmatch. */
395  if (np->nmatch > nmatch)
396  {
397  mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
398  nmatch = np->nmatch;
399  }
400 
401  if (mutt_regex_capture(np->regex, src, np->nmatch, pmatch))
402  {
403  tlen = 0;
404  switcher ^= 1;
405  dst = twinbuf[switcher];
406 
407  mutt_debug(LL_DEBUG5, "%s matches %s\n", src, np->regex->pattern);
408 
409  /* Copy into other twinbuf with substitutions */
410  if (np->templ)
411  {
412  for (p = np->templ; *p && (tlen < (sizeof(*twinbuf) - 1));)
413  {
414  if (*p == '%')
415  {
416  p++;
417  if (*p == 'L')
418  {
419  p++;
420  cpysize = MIN(pmatch[0].rm_so, (sizeof(*twinbuf) - 1) - tlen);
421  strncpy(&dst[tlen], src, cpysize);
422  tlen += cpysize;
423  }
424  else if (*p == 'R')
425  {
426  p++;
427  cpysize = MIN(strlen(src) - pmatch[0].rm_eo, (sizeof(*twinbuf) - 1) - tlen);
428  strncpy(&dst[tlen], &src[pmatch[0].rm_eo], cpysize);
429  tlen += cpysize;
430  }
431  else
432  {
433  long n = strtoul(p, &p, 10); /* get subst number */
434  if (n < np->nmatch)
435  {
436  while (isdigit((unsigned char) *p)) /* skip subst token */
437  p++;
438  for (int i = pmatch[n].rm_so;
439  (i < pmatch[n].rm_eo) && (tlen < (sizeof(*twinbuf) - 1)); i++)
440  {
441  dst[tlen++] = src[i];
442  }
443  }
444  }
445  }
446  else
447  dst[tlen++] = *p++;
448  }
449  }
450  dst[tlen] = '\0';
451  mutt_debug(LL_DEBUG5, "subst %s\n", dst);
452  }
453  src = dst;
454  }
455 
456  if (buf)
457  mutt_str_copy(buf, dst, buflen);
458  else
459  buf = mutt_str_dup(dst);
460  return buf;
461 }
462 
467 void mutt_replacelist_free(struct ReplaceList *rl)
468 {
469  if (!rl)
470  return;
471 
472  struct Replace *np = NULL, *tmp = NULL;
473  STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
474  {
475  STAILQ_REMOVE(rl, np, Replace, entries);
476  mutt_regex_free(&np->regex);
477  FREE(&np->templ);
478  FREE(&np);
479  }
480 }
481 
495 bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
496 {
497  if (!rl || !buf || !str)
498  return false;
499 
500  static regmatch_t *pmatch = NULL;
501  static size_t nmatch = 0;
502  int tlen = 0;
503  char *p = NULL;
504 
505  struct Replace *np = NULL;
506  STAILQ_FOREACH(np, rl, entries)
507  {
508  /* If this pattern needs more matches, expand pmatch. */
509  if (np->nmatch > nmatch)
510  {
511  mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
512  nmatch = np->nmatch;
513  }
514 
515  /* Does this pattern match? */
516  if (mutt_regex_capture(np->regex, str, (size_t) np->nmatch, pmatch))
517  {
518  mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
519  mutt_debug(LL_DEBUG5, "%d subs\n", (int) np->regex->regex->re_nsub);
520 
521  /* Copy template into buf, with substitutions. */
522  for (p = np->templ; *p && (tlen < (buflen - 1));)
523  {
524  /* backreference to pattern match substring, eg. %1, %2, etc) */
525  if (*p == '%')
526  {
527  char *e = NULL; /* used as pointer to end of integer backreference in strtol() call */
528 
529  p++; /* skip over % char */
530  long n = strtol(p, &e, 10);
531  /* Ensure that the integer conversion succeeded (e!=p) and bounds check. The upper bound check
532  * should not strictly be necessary since add_to_spam_list() finds the largest value, and
533  * the static array above is always large enough based on that value. */
534  if ((e != p) && (n >= 0) && (n < np->nmatch) && (pmatch[n].rm_so != -1))
535  {
536  /* copy as much of the substring match as will fit in the output buffer, saving space for
537  * the terminating nul char */
538  for (int idx = pmatch[n].rm_so;
539  (idx < pmatch[n].rm_eo) && (tlen < (buflen - 1)); idx++)
540  {
541  buf[tlen++] = str[idx];
542  }
543  }
544  p = e; /* skip over the parsed integer */
545  }
546  else
547  {
548  buf[tlen++] = *p++;
549  }
550  }
551  /* tlen should always be less than buflen except when buflen<=0
552  * because the bounds checks in the above code leave room for the
553  * terminal nul char. This should avoid returning an unterminated
554  * string to the caller. When buflen<=0 we make no assumption about
555  * the validity of the buf pointer. */
556  if (tlen < buflen)
557  {
558  buf[tlen] = '\0';
559  mutt_debug(LL_DEBUG5, "\"%s\"\n", buf);
560  }
561  return true;
562  }
563  }
564 
565  return false;
566 }
567 
573 {
574  return mutt_mem_calloc(1, sizeof(struct Replace));
575 }
576 
583 int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
584 {
585  if (!rl || !pat)
586  return 0;
587 
588  int nremoved = 0;
589  struct Replace *np = NULL, *tmp = NULL;
590  STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
591  {
592  if (mutt_str_equal(np->regex->pattern, pat))
593  {
594  STAILQ_REMOVE(rl, np, Replace, entries);
595  mutt_regex_free(&np->regex);
596  FREE(&np->templ);
597  FREE(&np);
598  nremoved++;
599  }
600  }
601 
602  return nremoved;
603 }
604 
614 bool mutt_regex_capture(const struct Regex *regex, const char *str,
615  size_t nmatch, regmatch_t matches[])
616 {
617  if (!regex || !str || !regex->regex)
618  return false;
619 
620  int rc = regexec(regex->regex, str, nmatch, matches, 0);
621  return ((rc == 0) ^ regex->pat_not);
622 }
623 
631 bool mutt_regex_match(const struct Regex *regex, const char *str)
632 {
633  return mutt_regex_capture(regex, str, 0, NULL);
634 }
const char * mutt_str_atoi(const char *str, int *dst)
Convert ASCII string to an integer.
Definition: atoi.c:178
Parse a number in a string.
int mutt_buffer_printf(struct Buffer *buf, const char *fmt,...)
Format a string overwriting a Buffer.
Definition: buffer.c:158
General purpose object for storing and parsing strings.
#define mutt_debug(LEVEL,...)
Definition: logging.h:84
Logging Dispatcher.
@ LL_DEBUG5
Log at debug level 5.
Definition: logging.h:44
@ LL_DEBUG2
Log at debug level 2.
Definition: logging.h:41
bool mutt_mb_is_lower(const char *s)
Does a multi-byte string contain only lowercase characters?
Definition: mbyte.c:355
Multi-byte String manipulation functions.
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
Memory management wrappers.
#define FREE(x)
Definition: memory.h:43
#define MIN(a, b)
Definition: memory.h:31
Message logging.
#define _(a)
Definition: message.h:28
int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
Remove a pattern from a list.
Definition: regex.c:583
void mutt_regexlist_free(struct RegexList *rl)
Free a RegexList object.
Definition: regex.c:174
int mutt_regexlist_add(struct RegexList *rl, const char *str, uint16_t flags, struct Buffer *err)
Compile a regex string and add it to a list.
Definition: regex.c:135
bool mutt_regex_capture(const struct Regex *regex, const char *str, size_t nmatch, regmatch_t matches[])
Match a regex against a string, with provided options.
Definition: regex.c:614
void mutt_replacelist_free(struct ReplaceList *rl)
Free a ReplaceList object.
Definition: regex.c:467
struct Regex * mutt_regex_new(const char *str, uint32_t flags, struct Buffer *err)
Create an Regex from a string.
Definition: regex.c:76
int mutt_regexlist_remove(struct RegexList *rl, const char *str)
Remove a Regex from a list.
Definition: regex.c:230
bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
Does a string match a pattern?
Definition: regex.c:495
int mutt_replacelist_add(struct ReplaceList *rl, const char *pat, const char *templ, struct Buffer *err)
Add a pattern and a template to a list.
Definition: regex.c:266
void mutt_regex_free(struct Regex **r)
Free a Regex object.
Definition: regex.c:114
bool mutt_regexlist_match(struct RegexList *rl, const char *str)
Does a string match any Regex in the list?
Definition: regex.c:195
struct Replace * mutt_replacelist_new(void)
Create a new ReplaceList.
Definition: regex.c:572
struct RegexNode * mutt_regexlist_new(void)
Create a new RegexList.
Definition: regex.c:216
bool mutt_regex_match(const struct Regex *regex, const char *str)
Shorthand to mutt_regex_capture()
Definition: regex.c:631
char * mutt_replacelist_apply(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
Apply replacements to a buffer.
Definition: regex.c:368
struct Regex * mutt_regex_compile(const char *str, uint16_t flags)
Create an Regex from a string.
Definition: regex.c:55
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:796
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition: string.c:250
bool mutt_str_equal(const char *a, const char *b)
Compare two strings.
Definition: string.c:784
size_t mutt_str_copy(char *dest, const char *src, size_t dsize)
Copy a string into a buffer (guaranteeing NUL-termination)
Definition: string.c:629
#define STAILQ_REMOVE(head, elm, type, field)
Definition: queue.h:402
#define STAILQ_INIT(head)
Definition: queue.h:372
#define STAILQ_FOREACH(var, head, field)
Definition: queue.h:352
#define STAILQ_INSERT_TAIL(head, elm, field)
Definition: queue.h:389
#define STAILQ_FOREACH_SAFE(var, head, field, tvar)
Definition: queue.h:362
Manage regular expressions.
#define DT_REGEX_ALLOW_NOT
Regex can begin with '!'.
Definition: regex3.h:36
#define DT_REGEX_MATCH_CASE
Case-sensitive matching.
Definition: regex3.h:35
#define REG_COMP(preg, regex, cflags)
Compile a regular expression.
Definition: regex3.h:53
String manipulation functions.
String manipulation buffer.
Definition: buffer.h:34
size_t dsize
Length of data.
Definition: buffer.h:37
char * data
Pointer to data.
Definition: buffer.h:35
List of regular expressions.
Definition: regex3.h:99
struct Regex * regex
Regex containing a regular expression.
Definition: regex3.h:100
Cached regular expression.
Definition: regex3.h:89
char * pattern
printable version
Definition: regex3.h:90
bool pat_not
do not match
Definition: regex3.h:92
regex_t * regex
compiled expression
Definition: regex3.h:91
List of regular expressions.
Definition: regex3.h:109
char * templ
Template to match.
Definition: regex3.h:112
size_t nmatch
Match the 'nth' occurrence (0 means the whole expression)
Definition: regex3.h:111
struct Regex * regex
Regex containing a regular expression.
Definition: regex3.h:110