NeoMutt  2020-09-25
Teaching an old dog new tricks
DOXYGEN
regex.c
Go to the documentation of this file.
1 
30 #include "config.h"
31 #include <ctype.h>
32 #include <stdbool.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include "buffer.h"
37 #include "logging.h"
38 #include "mbyte.h"
39 #include "memory.h"
40 #include "message.h"
41 #include "queue.h"
42 #include "regex3.h"
43 #include "string2.h"
44 
52 struct Regex *mutt_regex_compile(const char *str, int flags)
53 {
54  if (!str || (*str == '\0'))
55  return NULL;
56  struct Regex *rx = mutt_mem_calloc(1, sizeof(struct Regex));
57  rx->pattern = mutt_str_dup(str);
58  rx->regex = mutt_mem_calloc(1, sizeof(regex_t));
59  if (REG_COMP(rx->regex, str, flags) != 0)
60  mutt_regex_free(&rx);
61 
62  return rx;
63 }
64 
73 struct Regex *mutt_regex_new(const char *str, int flags, struct Buffer *err)
74 {
75  if (!str || (*str == '\0'))
76  return NULL;
77 
78  int rflags = 0;
79  struct Regex *reg = mutt_mem_calloc(1, sizeof(struct Regex));
80 
81  reg->regex = mutt_mem_calloc(1, sizeof(regex_t));
82  reg->pattern = mutt_str_dup(str);
83 
84  /* Should we use smart case matching? */
85  if (((flags & DT_REGEX_MATCH_CASE) == 0) && mutt_mb_is_lower(str))
86  rflags |= REG_ICASE;
87 
88  /* Is a prefix of '!' allowed? */
89  if (((flags & DT_REGEX_ALLOW_NOT) != 0) && (str[0] == '!'))
90  {
91  reg->pat_not = true;
92  str++;
93  }
94 
95  int rc = REG_COMP(reg->regex, str, rflags);
96  if ((rc != 0) && err)
97  {
98  regerror(rc, reg->regex, err->data, err->dsize);
99  mutt_regex_free(&reg);
100  return NULL;
101  }
102 
103  return reg;
104 }
105 
110 void mutt_regex_free(struct Regex **r)
111 {
112  if (!r || !*r)
113  return;
114 
115  FREE(&(*r)->pattern);
116  if ((*r)->regex)
117  regfree((*r)->regex);
118  FREE(&(*r)->regex);
119  FREE(r);
120 }
121 
131 int mutt_regexlist_add(struct RegexList *rl, const char *str, int flags, struct Buffer *err)
132 {
133  if (!rl || !str || (*str == '\0'))
134  return 0;
135 
136  struct Regex *rx = mutt_regex_compile(str, flags);
137  if (!rx)
138  {
139  mutt_buffer_printf(err, "Bad regex: %s\n", str);
140  return -1;
141  }
142 
143  /* check to make sure the item is not already on this rl */
144  struct RegexNode *np = NULL;
145  STAILQ_FOREACH(np, rl, entries)
146  {
147  if (mutt_istr_equal(rx->pattern, np->regex->pattern))
148  break; /* already on the rl */
149  }
150 
151  if (np)
152  {
153  mutt_regex_free(&rx);
154  }
155  else
156  {
157  np = mutt_regexlist_new();
158  np->regex = rx;
159  STAILQ_INSERT_TAIL(rl, np, entries);
160  }
161 
162  return 0;
163 }
164 
169 void mutt_regexlist_free(struct RegexList *rl)
170 {
171  if (!rl)
172  return;
173 
174  struct RegexNode *np = NULL, *tmp = NULL;
175  STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
176  {
177  STAILQ_REMOVE(rl, np, RegexNode, entries);
178  mutt_regex_free(&np->regex);
179  FREE(&np);
180  }
181  STAILQ_INIT(rl);
182 }
183 
190 bool mutt_regexlist_match(struct RegexList *rl, const char *str)
191 {
192  if (!rl || !str)
193  return false;
194  struct RegexNode *np = NULL;
195  STAILQ_FOREACH(np, rl, entries)
196  {
197  if (mutt_regex_match(np->regex, str))
198  {
199  mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
200  return true;
201  }
202  }
203 
204  return false;
205 }
206 
212 {
213  return mutt_mem_calloc(1, sizeof(struct RegexNode));
214 }
215 
225 int mutt_regexlist_remove(struct RegexList *rl, const char *str)
226 {
227  if (!rl || !str)
228  return -1;
229 
230  if (mutt_str_equal("*", str))
231  {
232  mutt_regexlist_free(rl); /* "unCMD *" means delete all current entries */
233  return 0;
234  }
235 
236  int rc = -1;
237  struct RegexNode *np = NULL, *tmp = NULL;
238  STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
239  {
240  if (mutt_istr_equal(str, np->regex->pattern))
241  {
242  STAILQ_REMOVE(rl, np, RegexNode, entries);
243  mutt_regex_free(&np->regex);
244  FREE(&np);
245  rc = 0;
246  }
247  }
248 
249  return rc;
250 }
251 
261 int mutt_replacelist_add(struct ReplaceList *rl, const char *pat,
262  const char *templ, struct Buffer *err)
263 {
264  if (!rl || !pat || (*pat == '\0') || !templ)
265  return 0;
266 
267  struct Regex *rx = mutt_regex_compile(pat, REG_ICASE);
268  if (!rx)
269  {
270  if (err)
271  mutt_buffer_printf(err, _("Bad regex: %s"), pat);
272  return -1;
273  }
274 
275  /* check to make sure the item is not already on this rl */
276  struct Replace *np = NULL;
277  STAILQ_FOREACH(np, rl, entries)
278  {
279  if (mutt_istr_equal(rx->pattern, np->regex->pattern))
280  {
281  /* Already on the rl. Formerly we just skipped this case, but
282  * now we're supporting removals, which means we're supporting
283  * re-adds conceptually. So we probably want this to imply a
284  * removal, then do an add. We can achieve the removal by freeing
285  * the template, and leaving t pointed at the current item. */
286  FREE(&np->templ);
287  break;
288  }
289  }
290 
291  /* If np is set, it's pointing into an extant ReplaceList* that we want to
292  * update. Otherwise we want to make a new one to link at the rl's end. */
293  if (np)
294  {
295  mutt_regex_free(&rx);
296  }
297  else
298  {
299  np = mutt_replacelist_new();
300  np->regex = rx;
301  rx = NULL;
302  STAILQ_INSERT_TAIL(rl, np, entries);
303  }
304 
305  /* Now np is the Replace that we want to modify. It is prepared. */
306  np->templ = mutt_str_dup(templ);
307 
308  /* Find highest match number in template string */
309  np->nmatch = 0;
310  for (const char *p = templ; *p;)
311  {
312  if (*p == '%')
313  {
314  int n = 0;
315  if (mutt_str_atoi(++p, &n) < 0)
316  mutt_debug(LL_DEBUG2, "Invalid match number in replacelist: '%s'\n", p);
317  if (n > np->nmatch)
318  np->nmatch = n;
319  while (*p && isdigit((int) *p))
320  p++;
321  }
322  else
323  p++;
324  }
325 
326  if (np->nmatch > np->regex->regex->re_nsub)
327  {
328  if (err)
329  mutt_buffer_printf(err, "%s", _("Not enough subexpressions for template"));
330  mutt_replacelist_remove(rl, pat);
331  return -1;
332  }
333 
334  np->nmatch++; /* match 0 is always the whole expr */
335  return 0;
336 }
337 
351 char *mutt_replacelist_apply(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
352 {
353  static regmatch_t *pmatch = NULL;
354  static size_t nmatch = 0;
355  static char twinbuf[2][1024];
356  int switcher = 0;
357  char *p = NULL;
358  size_t cpysize, tlen;
359  char *src = NULL, *dst = NULL;
360 
361  if (buf && (buflen != 0))
362  buf[0] = '\0';
363 
364  if (!rl || !str || (*str == '\0') || (buf && (buflen == 0)))
365  return buf;
366 
367  twinbuf[0][0] = '\0';
368  twinbuf[1][0] = '\0';
369  src = twinbuf[switcher];
370  dst = src;
371 
372  mutt_str_copy(src, str, 1024);
373 
374  struct Replace *np = NULL;
375  STAILQ_FOREACH(np, rl, entries)
376  {
377  /* If this pattern needs more matches, expand pmatch. */
378  if (np->nmatch > nmatch)
379  {
380  mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
381  nmatch = np->nmatch;
382  }
383 
384  if (mutt_regex_capture(np->regex, src, np->nmatch, pmatch))
385  {
386  tlen = 0;
387  switcher ^= 1;
388  dst = twinbuf[switcher];
389 
390  mutt_debug(LL_DEBUG5, "%s matches %s\n", src, np->regex->pattern);
391 
392  /* Copy into other twinbuf with substitutions */
393  if (np->templ)
394  {
395  for (p = np->templ; *p && (tlen < 1023);)
396  {
397  if (*p == '%')
398  {
399  p++;
400  if (*p == 'L')
401  {
402  p++;
403  cpysize = MIN(pmatch[0].rm_so, 1023 - tlen);
404  strncpy(&dst[tlen], src, cpysize);
405  tlen += cpysize;
406  }
407  else if (*p == 'R')
408  {
409  p++;
410  cpysize = MIN(strlen(src) - pmatch[0].rm_eo, 1023 - tlen);
411  strncpy(&dst[tlen], &src[pmatch[0].rm_eo], cpysize);
412  tlen += cpysize;
413  }
414  else
415  {
416  long n = strtoul(p, &p, 10); /* get subst number */
417  while (isdigit((unsigned char) *p)) /* skip subst token */
418  p++;
419  for (int i = pmatch[n].rm_so; (i < pmatch[n].rm_eo) && (tlen < 1023); i++)
420  {
421  dst[tlen++] = src[i];
422  }
423  }
424  }
425  else
426  dst[tlen++] = *p++;
427  }
428  }
429  dst[tlen] = '\0';
430  mutt_debug(LL_DEBUG5, "subst %s\n", dst);
431  }
432  src = dst;
433  }
434 
435  if (buf)
436  mutt_str_copy(buf, dst, buflen);
437  else
438  buf = mutt_str_dup(dst);
439  return buf;
440 }
441 
446 void mutt_replacelist_free(struct ReplaceList *rl)
447 {
448  if (!rl)
449  return;
450 
451  struct Replace *np = NULL, *tmp = NULL;
452  STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
453  {
454  STAILQ_REMOVE(rl, np, Replace, entries);
455  mutt_regex_free(&np->regex);
456  FREE(&np->templ);
457  FREE(&np);
458  }
459 }
460 
474 bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
475 {
476  if (!rl || !buf || !str)
477  return false;
478 
479  static regmatch_t *pmatch = NULL;
480  static size_t nmatch = 0;
481  int tlen = 0;
482  char *p = NULL;
483 
484  struct Replace *np = NULL;
485  STAILQ_FOREACH(np, rl, entries)
486  {
487  /* If this pattern needs more matches, expand pmatch. */
488  if (np->nmatch > nmatch)
489  {
490  mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
491  nmatch = np->nmatch;
492  }
493 
494  /* Does this pattern match? */
495  if (mutt_regex_capture(np->regex, str, (size_t) np->nmatch, pmatch))
496  {
497  mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
498  mutt_debug(LL_DEBUG5, "%d subs\n", (int) np->regex->regex->re_nsub);
499 
500  /* Copy template into buf, with substitutions. */
501  for (p = np->templ; *p && (tlen < (buflen - 1));)
502  {
503  /* backreference to pattern match substring, eg. %1, %2, etc) */
504  if (*p == '%')
505  {
506  char *e = NULL; /* used as pointer to end of integer backreference in strtol() call */
507 
508  p++; /* skip over % char */
509  long n = strtol(p, &e, 10);
510  /* Ensure that the integer conversion succeeded (e!=p) and bounds check. The upper bound check
511  * should not strictly be necessary since add_to_spam_list() finds the largest value, and
512  * the static array above is always large enough based on that value. */
513  if ((e != p) && (n >= 0) && (n <= np->nmatch) && (pmatch[n].rm_so != -1))
514  {
515  /* copy as much of the substring match as will fit in the output buffer, saving space for
516  * the terminating nul char */
517  int idx;
518  for (idx = pmatch[n].rm_so;
519  (idx < pmatch[n].rm_eo) && (tlen < (buflen - 1)); idx++)
520  {
521  buf[tlen++] = str[idx];
522  }
523  }
524  p = e; /* skip over the parsed integer */
525  }
526  else
527  {
528  buf[tlen++] = *p++;
529  }
530  }
531  /* tlen should always be less than buflen except when buflen<=0
532  * because the bounds checks in the above code leave room for the
533  * terminal nul char. This should avoid returning an unterminated
534  * string to the caller. When buflen<=0 we make no assumption about
535  * the validity of the buf pointer. */
536  if (tlen < buflen)
537  {
538  buf[tlen] = '\0';
539  mutt_debug(LL_DEBUG5, "\"%s\"\n", buf);
540  }
541  return true;
542  }
543  }
544 
545  return false;
546 }
547 
553 {
554  return mutt_mem_calloc(1, sizeof(struct Replace));
555 }
556 
563 int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
564 {
565  if (!rl || !pat)
566  return 0;
567 
568  int nremoved = 0;
569  struct Replace *np = NULL, *tmp = NULL;
570  STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
571  {
572  if (mutt_str_equal(np->regex->pattern, pat))
573  {
574  STAILQ_REMOVE(rl, np, Replace, entries);
575  mutt_regex_free(&np->regex);
576  FREE(&np->templ);
577  FREE(&np);
578  nremoved++;
579  }
580  }
581 
582  return nremoved;
583 }
584 
593 bool mutt_regex_capture(const struct Regex *regex, const char *str,
594  size_t nmatch, regmatch_t matches[])
595 {
596  if (!regex || !str || !regex->regex)
597  return false;
598 
599  int rc = regexec(regex->regex, str, nmatch, matches, 0);
600  return ((rc == 0) ^ regex->pat_not);
601 }
602 
609 bool mutt_regex_match(const struct Regex *regex, const char *str)
610 {
611  return mutt_regex_capture(regex, str, 0, NULL);
612 }
bool mutt_str_equal(const char *a, const char *b)
Compare two strings.
Definition: string.c:871
struct RegexNode * mutt_regexlist_new(void)
Create a new RegexList.
Definition: regex.c:211
char * templ
Template to match.
Definition: regex3.h:112
bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
Does a string match a pattern?
Definition: regex.c:474
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
#define STAILQ_REMOVE(head, elm, type, field)
Definition: queue.h:399
int mutt_replacelist_add(struct ReplaceList *rl, const char *pat, const char *templ, struct Buffer *err)
Add a pattern and a template to a list.
Definition: regex.c:261
int mutt_str_atoi(const char *str, int *dst)
Convert ASCII string to an integer.
Definition: string.c:252
int mutt_regexlist_remove(struct RegexList *rl, const char *str)
Remove a Regex from a list.
Definition: regex.c:225
#define MIN(a, b)
Definition: memory.h:31
Memory management wrappers.
regex_t * regex
compiled expression
Definition: regex3.h:91
String manipulation buffer.
Definition: buffer.h:33
bool pat_not
do not match
Definition: regex3.h:92
#define DT_REGEX_MATCH_CASE
Case-sensitive matching.
Definition: regex3.h:35
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition: string.c:370
#define _(a)
Definition: message.h:28
#define REG_COMP(preg, regex, cflags)
Compile a regular expression.
Definition: regex3.h:53
#define STAILQ_INSERT_TAIL(head, elm, field)
Definition: queue.h:386
Multi-byte String manipulation functions.
int mutt_buffer_printf(struct Buffer *buf, const char *fmt,...)
Format a string overwriting a Buffer.
Definition: buffer.c:160
void mutt_replacelist_free(struct ReplaceList *rl)
Free a ReplaceList object.
Definition: regex.c:446
List of regular expressions.
Definition: regex3.h:108
struct Regex * regex
Regex containing a regular expression.
Definition: regex3.h:110
Logging Dispatcher.
struct Regex * mutt_regex_new(const char *str, int flags, struct Buffer *err)
Create an Regex from a string.
Definition: regex.c:73
String manipulation functions.
size_t dsize
Length of data.
Definition: buffer.h:37
Log at debug level 2.
Definition: logging.h:41
#define STAILQ_INIT(head)
Definition: queue.h:369
struct Regex * mutt_regex_compile(const char *str, int flags)
Create an Regex from a string.
Definition: regex.c:52
int mutt_regexlist_add(struct RegexList *rl, const char *str, int flags, struct Buffer *err)
Compile a regex string and add it to a list.
Definition: regex.c:131
bool mutt_regexlist_match(struct RegexList *rl, const char *str)
Does a string match any Regex in the list?
Definition: regex.c:190
#define STAILQ_FOREACH_SAFE(var, head, field, tvar)
Definition: queue.h:359
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:883
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
Message logging.
struct Regex * regex
Regex containing a regular expression.
Definition: regex3.h:100
char * data
Pointer to data.
Definition: buffer.h:35
char * mutt_replacelist_apply(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
Apply replacements to a buffer.
Definition: regex.c:351
#define STAILQ_FOREACH(var, head, field)
Definition: queue.h:349
struct Replace * mutt_replacelist_new(void)
Create a new ReplaceList.
Definition: regex.c:552
List of regular expressions.
Definition: regex3.h:98
bool mutt_regex_capture(const struct Regex *regex, const char *str, size_t nmatch, regmatch_t matches[])
match a regex against a string, with provided options
Definition: regex.c:593
General purpose object for storing and parsing strings.
void mutt_regex_free(struct Regex **r)
Free a Regex object.
Definition: regex.c:110
size_t mutt_str_copy(char *dest, const char *src, size_t dsize)
Copy a string into a buffer (guaranteeing NUL-termination)
Definition: string.c:716
int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
Remove a pattern from a list.
Definition: regex.c:563
#define DT_REGEX_ALLOW_NOT
Regex can begin with &#39;!&#39;.
Definition: regex3.h:36
Cached regular expression.
Definition: regex3.h:88
bool mutt_regex_match(const struct Regex *regex, const char *str)
Shorthand to mutt_regex_capture()
Definition: regex.c:609
#define FREE(x)
Definition: memory.h:40
#define mutt_debug(LEVEL,...)
Definition: logging.h:81
char * pattern
printable version
Definition: regex3.h:90
Log at debug level 5.
Definition: logging.h:44
bool mutt_mb_is_lower(const char *s)
Does a multi-byte string contain only lowercase characters?
Definition: mbyte.c:358
void mutt_regexlist_free(struct RegexList *rl)
Free a RegexList object.
Definition: regex.c:169
size_t nmatch
Match the &#39;nth&#39; occurrence (0 means the whole expression)
Definition: regex3.h:111
Manage regular expressions.