NeoMutt  2018-07-16 +2481-68dcde
Teaching an old dog new tricks
DOXYGEN
regex.c
Go to the documentation of this file.
1 
30 #include "config.h"
31 #include <ctype.h>
32 #include <regex.h>
33 #include <stdbool.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include "buffer.h"
38 #include "logging.h"
39 #include "mbyte.h"
40 #include "memory.h"
41 #include "message.h"
42 #include "queue.h"
43 #include "regex3.h"
44 #include "string2.h"
45 
53 struct Regex *mutt_regex_compile(const char *str, int flags)
54 {
55  if (!str || !*str)
56  return NULL;
57  struct Regex *rx = mutt_mem_calloc(1, sizeof(struct Regex));
58  rx->pattern = mutt_str_strdup(str);
59  rx->regex = mutt_mem_calloc(1, sizeof(regex_t));
60  if (REG_COMP(rx->regex, str, flags) != 0)
61  mutt_regex_free(&rx);
62 
63  return rx;
64 }
65 
74 struct Regex *mutt_regex_new(const char *str, int flags, struct Buffer *err)
75 {
76  if (!str || !*str)
77  return NULL;
78 
79  int rflags = 0;
80  struct Regex *reg = mutt_mem_calloc(1, sizeof(struct Regex));
81 
82  reg->regex = mutt_mem_calloc(1, sizeof(regex_t));
83  reg->pattern = mutt_str_strdup(str);
84 
85  /* Should we use smart case matching? */
86  if (((flags & DT_REGEX_MATCH_CASE) == 0) && mutt_mb_is_lower(str))
87  rflags |= REG_ICASE;
88 
89  /* Is a prefix of '!' allowed? */
90  if (((flags & DT_REGEX_ALLOW_NOT) != 0) && (str[0] == '!'))
91  {
92  reg->pat_not = true;
93  str++;
94  }
95 
96  int rc = REG_COMP(reg->regex, str, rflags);
97  if ((rc != 0) && err)
98  {
99  regerror(rc, reg->regex, err->data, err->dsize);
100  mutt_regex_free(&reg);
101  return NULL;
102  }
103 
104  return reg;
105 }
106 
111 void mutt_regex_free(struct Regex **r)
112 {
113  if (!r || !*r)
114  return;
115 
116  FREE(&(*r)->pattern);
117  if ((*r)->regex)
118  regfree((*r)->regex);
119  FREE(&(*r)->regex);
120  FREE(r);
121 }
122 
132 int mutt_regexlist_add(struct RegexList *rl, const char *str, int flags, struct Buffer *err)
133 {
134  if (!rl || !str || !*str)
135  return 0;
136 
137  struct Regex *rx = mutt_regex_compile(str, flags);
138  if (!rx)
139  {
140  mutt_buffer_printf(err, "Bad regex: %s\n", str);
141  return -1;
142  }
143 
144  /* check to make sure the item is not already on this rl */
145  struct RegexNode *np = NULL;
146  STAILQ_FOREACH(np, rl, entries)
147  {
148  if (mutt_str_strcasecmp(rx->pattern, np->regex->pattern) == 0)
149  break; /* already on the rl */
150  }
151 
152  if (np)
153  {
154  mutt_regex_free(&rx);
155  }
156  else
157  {
158  np = mutt_regexlist_new();
159  np->regex = rx;
160  STAILQ_INSERT_TAIL(rl, np, entries);
161  }
162 
163  return 0;
164 }
165 
170 void mutt_regexlist_free(struct RegexList *rl)
171 {
172  if (!rl)
173  return;
174 
175  struct RegexNode *np = NULL, *tmp = NULL;
176  STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
177  {
178  STAILQ_REMOVE(rl, np, RegexNode, entries);
179  mutt_regex_free(&np->regex);
180  FREE(&np);
181  }
182  STAILQ_INIT(rl);
183 }
184 
191 bool mutt_regexlist_match(struct RegexList *rl, const char *str)
192 {
193  if (!rl || !str)
194  return false;
195  struct RegexNode *np = NULL;
196  STAILQ_FOREACH(np, rl, entries)
197  {
198  if (mutt_regex_match(np->regex, str))
199  {
200  mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
201  return true;
202  }
203  }
204 
205  return false;
206 }
207 
213 {
214  return mutt_mem_calloc(1, sizeof(struct RegexNode));
215 }
216 
226 int mutt_regexlist_remove(struct RegexList *rl, const char *str)
227 {
228  if (!rl || !str)
229  return -1;
230 
231  if (mutt_str_strcmp("*", str) == 0)
232  {
233  mutt_regexlist_free(rl); /* "unCMD *" means delete all current entries */
234  return 0;
235  }
236 
237  int rc = -1;
238  struct RegexNode *np = NULL, *tmp = NULL;
239  STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
240  {
241  if (mutt_str_strcasecmp(str, np->regex->pattern) == 0)
242  {
243  STAILQ_REMOVE(rl, np, RegexNode, entries);
244  mutt_regex_free(&np->regex);
245  FREE(&np);
246  rc = 0;
247  }
248  }
249 
250  return rc;
251 }
252 
262 int mutt_replacelist_add(struct ReplaceList *rl, const char *pat,
263  const char *templ, struct Buffer *err)
264 {
265  if (!rl || !pat || !*pat || !templ)
266  return 0;
267 
268  struct Regex *rx = mutt_regex_compile(pat, REG_ICASE);
269  if (!rx)
270  {
271  if (err)
272  mutt_buffer_printf(err, _("Bad regex: %s"), pat);
273  return -1;
274  }
275 
276  /* check to make sure the item is not already on this rl */
277  struct Replace *np = NULL;
278  STAILQ_FOREACH(np, rl, entries)
279  {
280  if (mutt_str_strcasecmp(rx->pattern, np->regex->pattern) == 0)
281  {
282  /* Already on the rl. Formerly we just skipped this case, but
283  * now we're supporting removals, which means we're supporting
284  * re-adds conceptually. So we probably want this to imply a
285  * removal, then do an add. We can achieve the removal by freeing
286  * the template, and leaving t pointed at the current item. */
287  FREE(&np->templ);
288  break;
289  }
290  }
291 
292  /* If np is set, it's pointing into an extant ReplaceList* that we want to
293  * update. Otherwise we want to make a new one to link at the rl's end. */
294  if (np)
295  {
296  mutt_regex_free(&rx);
297  }
298  else
299  {
300  np = mutt_replacelist_new();
301  np->regex = rx;
302  rx = NULL;
303  STAILQ_INSERT_TAIL(rl, np, entries);
304  }
305 
306  /* Now np is the Replace that we want to modify. It is prepared. */
307  np->templ = mutt_str_strdup(templ);
308 
309  /* Find highest match number in template string */
310  np->nmatch = 0;
311  for (const char *p = templ; *p;)
312  {
313  if (*p == '%')
314  {
315  int n = 0;
316  if (mutt_str_atoi(++p, &n) < 0)
317  mutt_debug(LL_DEBUG2, "Invalid match number in replacelist: '%s'\n", p);
318  if (n > np->nmatch)
319  np->nmatch = n;
320  while (*p && isdigit((int) *p))
321  p++;
322  }
323  else
324  p++;
325  }
326 
327  if (np->nmatch > np->regex->regex->re_nsub)
328  {
329  if (err)
330  mutt_buffer_printf(err, "%s", _("Not enough subexpressions for template"));
331  mutt_replacelist_remove(rl, pat);
332  return -1;
333  }
334 
335  np->nmatch++; /* match 0 is always the whole expr */
336  return 0;
337 }
338 
352 char *mutt_replacelist_apply(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
353 {
354  static regmatch_t *pmatch = NULL;
355  static size_t nmatch = 0;
356  static char twinbuf[2][1024];
357  int switcher = 0;
358  char *p = NULL;
359  size_t cpysize, tlen;
360  char *src = NULL, *dst = NULL;
361 
362  if (buf && buflen)
363  buf[0] = '\0';
364 
365  if (!rl || !str || (*str == '\0') || (buf && !buflen))
366  return buf;
367 
368  twinbuf[0][0] = '\0';
369  twinbuf[1][0] = '\0';
370  src = twinbuf[switcher];
371  dst = src;
372 
373  mutt_str_strfcpy(src, str, 1024);
374 
375  struct Replace *np = NULL;
376  STAILQ_FOREACH(np, rl, entries)
377  {
378  /* If this pattern needs more matches, expand pmatch. */
379  if (np->nmatch > nmatch)
380  {
381  mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
382  nmatch = np->nmatch;
383  }
384 
385  if (mutt_regex_capture(np->regex, src, np->nmatch, pmatch))
386  {
387  tlen = 0;
388  switcher ^= 1;
389  dst = twinbuf[switcher];
390 
391  mutt_debug(LL_DEBUG5, "%s matches %s\n", src, np->regex->pattern);
392 
393  /* Copy into other twinbuf with substitutions */
394  if (np->templ)
395  {
396  for (p = np->templ; *p && (tlen < 1023);)
397  {
398  if (*p == '%')
399  {
400  p++;
401  if (*p == 'L')
402  {
403  p++;
404  cpysize = MIN(pmatch[0].rm_so, 1023 - tlen);
405  strncpy(&dst[tlen], src, cpysize);
406  tlen += cpysize;
407  }
408  else if (*p == 'R')
409  {
410  p++;
411  cpysize = MIN(strlen(src) - pmatch[0].rm_eo, 1023 - tlen);
412  strncpy(&dst[tlen], &src[pmatch[0].rm_eo], cpysize);
413  tlen += cpysize;
414  }
415  else
416  {
417  long n = strtoul(p, &p, 10); /* get subst number */
418  while (isdigit((unsigned char) *p)) /* skip subst token */
419  p++;
420  for (int i = pmatch[n].rm_so; (i < pmatch[n].rm_eo) && (tlen < 1023); i++)
421  {
422  dst[tlen++] = src[i];
423  }
424  }
425  }
426  else
427  dst[tlen++] = *p++;
428  }
429  }
430  dst[tlen] = '\0';
431  mutt_debug(LL_DEBUG5, "subst %s\n", dst);
432  }
433  src = dst;
434  }
435 
436  if (buf)
437  mutt_str_strfcpy(buf, dst, buflen);
438  else
439  buf = mutt_str_strdup(dst);
440  return buf;
441 }
442 
447 void mutt_replacelist_free(struct ReplaceList *rl)
448 {
449  if (!rl)
450  return;
451 
452  struct Replace *np = NULL, *tmp = NULL;
453  STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
454  {
455  STAILQ_REMOVE(rl, np, Replace, entries);
456  mutt_regex_free(&np->regex);
457  FREE(&np->templ);
458  FREE(&np);
459  }
460 }
461 
475 bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
476 {
477  if (!rl || !buf || !str)
478  return false;
479 
480  static regmatch_t *pmatch = NULL;
481  static size_t nmatch = 0;
482  int tlen = 0;
483  char *p = NULL;
484 
485  struct Replace *np = NULL;
486  STAILQ_FOREACH(np, rl, entries)
487  {
488  /* If this pattern needs more matches, expand pmatch. */
489  if (np->nmatch > nmatch)
490  {
491  mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
492  nmatch = np->nmatch;
493  }
494 
495  /* Does this pattern match? */
496  if (mutt_regex_capture(np->regex, str, (size_t) np->nmatch, pmatch))
497  {
498  mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
499  mutt_debug(LL_DEBUG5, "%d subs\n", (int) np->regex->regex->re_nsub);
500 
501  /* Copy template into buf, with substitutions. */
502  for (p = np->templ; *p && tlen < buflen - 1;)
503  {
504  /* backreference to pattern match substring, eg. %1, %2, etc) */
505  if (*p == '%')
506  {
507  char *e = NULL; /* used as pointer to end of integer backreference in strtol() call */
508 
509  p++; /* skip over % char */
510  long n = strtol(p, &e, 10);
511  /* Ensure that the integer conversion succeeded (e!=p) and bounds check. The upper bound check
512  * should not strictly be necessary since add_to_spam_list() finds the largest value, and
513  * the static array above is always large enough based on that value. */
514  if ((e != p) && (n >= 0) && (n <= np->nmatch) && (pmatch[n].rm_so != -1))
515  {
516  /* copy as much of the substring match as will fit in the output buffer, saving space for
517  * the terminating nul char */
518  int idx;
519  for (idx = pmatch[n].rm_so;
520  (idx < pmatch[n].rm_eo) && (tlen < buflen - 1); idx++)
521  {
522  buf[tlen++] = str[idx];
523  }
524  }
525  p = e; /* skip over the parsed integer */
526  }
527  else
528  {
529  buf[tlen++] = *p++;
530  }
531  }
532  /* tlen should always be less than buflen except when buflen<=0
533  * because the bounds checks in the above code leave room for the
534  * terminal nul char. This should avoid returning an unterminated
535  * string to the caller. When buflen<=0 we make no assumption about
536  * the validity of the buf pointer. */
537  if (tlen < buflen)
538  {
539  buf[tlen] = '\0';
540  mutt_debug(LL_DEBUG5, "\"%s\"\n", buf);
541  }
542  return true;
543  }
544  }
545 
546  return false;
547 }
548 
554 {
555  return mutt_mem_calloc(1, sizeof(struct Replace));
556 }
557 
564 int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
565 {
566  if (!rl || !pat)
567  return 0;
568 
569  int nremoved = 0;
570  struct Replace *np = NULL, *tmp = NULL;
571  STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
572  {
573  if (mutt_str_strcmp(np->regex->pattern, pat) == 0)
574  {
575  STAILQ_REMOVE(rl, np, Replace, entries);
576  mutt_regex_free(&np->regex);
577  FREE(&np->templ);
578  FREE(&np);
579  nremoved++;
580  }
581  }
582 
583  return nremoved;
584 }
585 
594 bool mutt_regex_capture(const struct Regex *regex, const char *str,
595  size_t nmatch, regmatch_t matches[])
596 {
597  if (!regex || !str || !regex->regex)
598  return false;
599 
600  int rc = regexec(regex->regex, str, nmatch, matches, 0);
601  return ((rc == 0) ^ regex->pat_not);
602 }
603 
610 bool mutt_regex_match(const struct Regex *regex, const char *str)
611 {
612  return mutt_regex_capture(regex, str, 0, NULL);
613 }
struct RegexNode * mutt_regexlist_new(void)
Create a new RegexList.
Definition: regex.c:212
char * templ
Template to match.
Definition: regex3.h:81
bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
Does a string match a pattern?
Definition: regex.c:475
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
#define STAILQ_REMOVE(head, elm, type, field)
Definition: queue.h:400
int mutt_replacelist_add(struct ReplaceList *rl, const char *pat, const char *templ, struct Buffer *err)
Add a pattern and a template to a list.
Definition: regex.c:262
int mutt_str_atoi(const char *str, int *dst)
Convert ASCII string to an integer.
Definition: string.c:262
int mutt_regexlist_remove(struct RegexList *rl, const char *str)
Remove a Regex from a list.
Definition: regex.c:226
#define MIN(a, b)
Definition: memory.h:31
Memory management wrappers.
regex_t * regex
compiled expression
Definition: regex3.h:60
String manipulation buffer.
Definition: buffer.h:33
bool pat_not
do not match
Definition: regex3.h:61
#define _(a)
Definition: message.h:28
#define STAILQ_INSERT_TAIL(head, elm, field)
Definition: queue.h:387
Multi-byte String manipulation functions.
int mutt_buffer_printf(struct Buffer *buf, const char *fmt,...)
Format a string overwriting a Buffer.
Definition: buffer.c:160
void mutt_replacelist_free(struct ReplaceList *rl)
Free a ReplaceList object.
Definition: regex.c:447
List of regular expressions.
Definition: regex3.h:77
struct Regex * regex
Regex containing a regular expression.
Definition: regex3.h:79
Logging Dispatcher.
struct Regex * mutt_regex_new(const char *str, int flags, struct Buffer *err)
Create an Regex from a string.
Definition: regex.c:74
String manipulation functions.
size_t dsize
Length of data.
Definition: buffer.h:37
Log at debug level 2.
Definition: logging.h:57
#define STAILQ_INIT(head)
Definition: queue.h:370
#define REG_COMP(preg, regex, cflags)
Compile a regular expression.
Definition: regex3.h:52
struct Regex * mutt_regex_compile(const char *str, int flags)
Create an Regex from a string.
Definition: regex.c:53
int mutt_regexlist_add(struct RegexList *rl, const char *str, int flags, struct Buffer *err)
Compile a regex string and add it to a list.
Definition: regex.c:132
bool mutt_regexlist_match(struct RegexList *rl, const char *str)
Does a string match any Regex in the list?
Definition: regex.c:191
#define STAILQ_FOREACH_SAFE(var, head, field, tvar)
Definition: queue.h:360
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
Message logging.
struct Regex * regex
Regex containing a regular expression.
Definition: regex3.h:69
char * data
Pointer to data.
Definition: buffer.h:35
size_t mutt_str_strfcpy(char *dest, const char *src, size_t dsize)
Copy a string into a buffer (guaranteeing NUL-termination)
Definition: string.c:750
#define DT_REGEX_MATCH_CASE
Case-sensitive matching.
Definition: regex3.h:34
char * mutt_replacelist_apply(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
Apply replacements to a buffer.
Definition: regex.c:352
#define STAILQ_FOREACH(var, head, field)
Definition: queue.h:350
struct Replace * mutt_replacelist_new(void)
Create a new ReplaceList.
Definition: regex.c:553
List of regular expressions.
Definition: regex3.h:67
bool mutt_regex_capture(const struct Regex *regex, const char *str, size_t nmatch, regmatch_t matches[])
match a regex against a string, with provided options
Definition: regex.c:594
General purpose object for storing and parsing strings.
void mutt_regex_free(struct Regex **r)
Free a Regex object.
Definition: regex.c:111
char * mutt_str_strdup(const char *str)
Copy a string, safely.
Definition: string.c:380
int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
Remove a pattern from a list.
Definition: regex.c:564
Cached regular expression.
Definition: regex3.h:57
int mutt_str_strcasecmp(const char *a, const char *b)
Compare two strings ignoring case, safely.
Definition: string.c:628
bool mutt_regex_match(const struct Regex *regex, const char *str)
Shorthand to mutt_regex_capture()
Definition: regex.c:610
#define FREE(x)
Definition: memory.h:40
Manage regular expressions.
#define mutt_debug(LEVEL,...)
Definition: logging.h:81
char * pattern
printable version
Definition: regex3.h:59
Log at debug level 5.
Definition: logging.h:60
#define DT_REGEX_ALLOW_NOT
Regex can begin with &#39;!&#39;.
Definition: regex3.h:35
bool mutt_mb_is_lower(const char *s)
Does a multi-byte string contain only lowercase characters?
Definition: mbyte.c:358
int mutt_str_strcmp(const char *a, const char *b)
Compare two strings, safely.
Definition: string.c:615
void mutt_regexlist_free(struct RegexList *rl)
Free a RegexList object.
Definition: regex.c:170
size_t nmatch
Match the &#39;nth&#39; occurrence (0 means the whole expression)
Definition: regex3.h:80