NeoMutt  2023-03-22
Teaching an old dog new tricks
DOXYGEN
regex.c
Go to the documentation of this file.
1
30#include "config.h"
31#include <ctype.h>
32#include <regex.h>
33#include <stdbool.h>
34#include <stdint.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38#include "atoi.h"
39#include "buffer.h"
40#include "logging.h"
41#include "mbyte.h"
42#include "memory.h"
43#include "message.h"
44#include "queue.h"
45#include "regex3.h"
46#include "string2.h"
47
55struct Regex *mutt_regex_compile(const char *str, uint16_t flags)
56{
57 if (!str || (*str == '\0'))
58 return NULL;
59 struct Regex *rx = mutt_mem_calloc(1, sizeof(struct Regex));
60 rx->pattern = mutt_str_dup(str);
61 rx->regex = mutt_mem_calloc(1, sizeof(regex_t));
62 if (REG_COMP(rx->regex, str, flags) != 0)
63 mutt_regex_free(&rx);
64
65 return rx;
66}
67
76struct Regex *mutt_regex_new(const char *str, uint32_t flags, struct Buffer *err)
77{
78 if (!str || (*str == '\0'))
79 return NULL;
80
81 uint16_t rflags = 0;
82 struct Regex *reg = mutt_mem_calloc(1, sizeof(struct Regex));
83
84 reg->regex = mutt_mem_calloc(1, sizeof(regex_t));
85 reg->pattern = mutt_str_dup(str);
86
87 /* Should we use smart case matching? */
88 if (((flags & DT_REGEX_MATCH_CASE) == 0) && mutt_mb_is_lower(str))
89 rflags |= REG_ICASE;
90
91 /* Is a prefix of '!' allowed? */
92 if (((flags & DT_REGEX_ALLOW_NOT) != 0) && (str[0] == '!'))
93 {
94 reg->pat_not = true;
95 str++;
96 }
97
98 int rc = REG_COMP(reg->regex, str, rflags);
99 if (rc != 0)
100 {
101 if (err)
102 regerror(rc, reg->regex, err->data, err->dsize);
103 mutt_regex_free(&reg);
104 return NULL;
105 }
106
107 return reg;
108}
109
114void mutt_regex_free(struct Regex **r)
115{
116 if (!r || !*r)
117 return;
118
119 FREE(&(*r)->pattern);
120 if ((*r)->regex)
121 regfree((*r)->regex);
122 FREE(&(*r)->regex);
123 FREE(r);
124}
125
135int mutt_regexlist_add(struct RegexList *rl, const char *str, uint16_t flags,
136 struct Buffer *err)
137{
138 if (!rl || !str || (*str == '\0'))
139 return 0;
140
141 struct Regex *rx = mutt_regex_compile(str, flags);
142 if (!rx)
143 {
144 mutt_buffer_printf(err, "Bad regex: %s\n", str);
145 return -1;
146 }
147
148 /* check to make sure the item is not already on this rl */
149 struct RegexNode *np = NULL;
150 STAILQ_FOREACH(np, rl, entries)
151 {
152 if (mutt_istr_equal(rx->pattern, np->regex->pattern))
153 break; /* already on the rl */
154 }
155
156 if (np)
157 {
158 mutt_regex_free(&rx);
159 }
160 else
161 {
162 np = mutt_regexlist_new();
163 np->regex = rx;
164 STAILQ_INSERT_TAIL(rl, np, entries);
165 }
166
167 return 0;
168}
169
174void mutt_regexlist_free(struct RegexList *rl)
175{
176 if (!rl)
177 return;
178
179 struct RegexNode *np = NULL, *tmp = NULL;
180 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
181 {
182 STAILQ_REMOVE(rl, np, RegexNode, entries);
184 FREE(&np);
185 }
186 STAILQ_INIT(rl);
187}
188
195bool mutt_regexlist_match(struct RegexList *rl, const char *str)
196{
197 if (!rl || !str)
198 return false;
199 struct RegexNode *np = NULL;
200 STAILQ_FOREACH(np, rl, entries)
201 {
202 if (mutt_regex_match(np->regex, str))
203 {
204 mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
205 return true;
206 }
207 }
208
209 return false;
210}
211
217{
218 return mutt_mem_calloc(1, sizeof(struct RegexNode));
219}
220
230int mutt_regexlist_remove(struct RegexList *rl, const char *str)
231{
232 if (!rl || !str)
233 return -1;
234
235 if (mutt_str_equal("*", str))
236 {
237 mutt_regexlist_free(rl); /* "unCMD *" means delete all current entries */
238 return 0;
239 }
240
241 int rc = -1;
242 struct RegexNode *np = NULL, *tmp = NULL;
243 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
244 {
245 if (mutt_istr_equal(str, np->regex->pattern))
246 {
247 STAILQ_REMOVE(rl, np, RegexNode, entries);
249 FREE(&np);
250 rc = 0;
251 }
252 }
253
254 return rc;
255}
256
266int mutt_replacelist_add(struct ReplaceList *rl, const char *pat,
267 const char *templ, struct Buffer *err)
268{
269 if (!rl || !pat || (*pat == '\0') || !templ)
270 return 0;
271
272 struct Regex *rx = mutt_regex_compile(pat, REG_ICASE);
273 if (!rx)
274 {
275 if (err)
276 mutt_buffer_printf(err, _("Bad regex: %s"), pat);
277 return -1;
278 }
279
280 /* check to make sure the item is not already on this rl */
281 struct Replace *np = NULL;
282 STAILQ_FOREACH(np, rl, entries)
283 {
284 if (mutt_istr_equal(rx->pattern, np->regex->pattern))
285 {
286 /* Already on the rl. Formerly we just skipped this case, but
287 * now we're supporting removals, which means we're supporting
288 * re-adds conceptually. So we probably want this to imply a
289 * removal, then do an add. We can achieve the removal by freeing
290 * the template, and leaving t pointed at the current item. */
291 FREE(&np->templ);
292 break;
293 }
294 }
295
296 /* If np is set, it's pointing into an extant ReplaceList* that we want to
297 * update. Otherwise we want to make a new one to link at the rl's end. */
298 if (np)
299 {
300 mutt_regex_free(&rx);
301 }
302 else
303 {
305 np->regex = rx;
306 rx = NULL;
307 STAILQ_INSERT_TAIL(rl, np, entries);
308 }
309
310 /* Now np is the Replace that we want to modify. It is prepared. */
311 np->templ = mutt_str_dup(templ);
312
313 /* Find highest match number in template string */
314 np->nmatch = 0;
315 for (const char *p = templ; *p;)
316 {
317 if (*p == '%')
318 {
319 int n = 0;
320 const char *end = mutt_str_atoi(++p, &n);
321 if (!end)
322 {
323 // this is not an error, we might have matched %R or %L in subjectrx
324 mutt_debug(LL_DEBUG2, "Invalid match number in replacelist: '%s'\n", p);
325 }
326 if (n > np->nmatch)
327 {
328 np->nmatch = n;
329 }
330 if (end)
331 {
332 p = end;
333 }
334 else
335 {
336 p++;
337 }
338 }
339 else
340 p++;
341 }
342
343 if (np->nmatch > np->regex->regex->re_nsub)
344 {
345 if (err)
346 mutt_buffer_printf(err, "%s", _("Not enough subexpressions for template"));
348 return -1;
349 }
350
351 np->nmatch++; /* match 0 is always the whole expr */
352 return 0;
353}
354
368char *mutt_replacelist_apply(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
369{
370 static regmatch_t *pmatch = NULL;
371 static size_t nmatch = 0;
372 static char twinbuf[2][1024];
373 int switcher = 0;
374 char *p = NULL;
375 size_t cpysize, tlen;
376 char *src = NULL, *dst = NULL;
377
378 if (buf && (buflen != 0))
379 buf[0] = '\0';
380
381 if (!rl || !str || (*str == '\0') || (buf && (buflen == 0)))
382 return buf;
383
384 twinbuf[0][0] = '\0';
385 twinbuf[1][0] = '\0';
386 src = twinbuf[switcher];
387 dst = src;
388
389 mutt_str_copy(src, str, sizeof(*twinbuf));
390
391 struct Replace *np = NULL;
392 STAILQ_FOREACH(np, rl, entries)
393 {
394 /* If this pattern needs more matches, expand pmatch. */
395 if (np->nmatch > nmatch)
396 {
397 mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
398 nmatch = np->nmatch;
399 }
400
401 if (mutt_regex_capture(np->regex, src, np->nmatch, pmatch))
402 {
403 tlen = 0;
404 switcher ^= 1;
405 dst = twinbuf[switcher];
406
407 mutt_debug(LL_DEBUG5, "%s matches %s\n", src, np->regex->pattern);
408
409 /* Copy into other twinbuf with substitutions */
410 if (np->templ)
411 {
412 for (p = np->templ; *p && (tlen < (sizeof(*twinbuf) - 1));)
413 {
414 if (*p == '%')
415 {
416 p++;
417 if (*p == 'L')
418 {
419 p++;
420 cpysize = MIN(pmatch[0].rm_so, (sizeof(*twinbuf) - 1) - tlen);
421 strncpy(&dst[tlen], src, cpysize);
422 tlen += cpysize;
423 }
424 else if (*p == 'R')
425 {
426 p++;
427 cpysize = MIN(strlen(src) - pmatch[0].rm_eo, (sizeof(*twinbuf) - 1) - tlen);
428 strncpy(&dst[tlen], &src[pmatch[0].rm_eo], cpysize);
429 tlen += cpysize;
430 }
431 else
432 {
433 long n = strtoul(p, &p, 10); /* get subst number */
434 if (n < np->nmatch)
435 {
436 while (isdigit((unsigned char) *p)) /* skip subst token */
437 p++;
438 for (int i = pmatch[n].rm_so;
439 (i < pmatch[n].rm_eo) && (tlen < (sizeof(*twinbuf) - 1)); i++)
440 {
441 dst[tlen++] = src[i];
442 }
443 }
444 }
445 }
446 else
447 dst[tlen++] = *p++;
448 }
449 }
450 dst[tlen] = '\0';
451 mutt_debug(LL_DEBUG5, "subst %s\n", dst);
452 }
453 src = dst;
454 }
455
456 if (buf)
457 mutt_str_copy(buf, dst, buflen);
458 else
459 buf = mutt_str_dup(dst);
460 return buf;
461}
462
467void mutt_replacelist_free(struct ReplaceList *rl)
468{
469 if (!rl)
470 return;
471
472 struct Replace *np = NULL, *tmp = NULL;
473 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
474 {
475 STAILQ_REMOVE(rl, np, Replace, entries);
477 FREE(&np->templ);
478 FREE(&np);
479 }
480}
481
495bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
496{
497 if (!rl || !buf || !str)
498 return false;
499
500 static regmatch_t *pmatch = NULL;
501 static size_t nmatch = 0;
502 int tlen = 0;
503 char *p = NULL;
504
505 struct Replace *np = NULL;
506 STAILQ_FOREACH(np, rl, entries)
507 {
508 /* If this pattern needs more matches, expand pmatch. */
509 if (np->nmatch > nmatch)
510 {
511 mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
512 nmatch = np->nmatch;
513 }
514
515 /* Does this pattern match? */
516 if (mutt_regex_capture(np->regex, str, (size_t) np->nmatch, pmatch))
517 {
518 mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
519 mutt_debug(LL_DEBUG5, "%d subs\n", (int) np->regex->regex->re_nsub);
520
521 /* Copy template into buf, with substitutions. */
522 for (p = np->templ; *p && (tlen < (buflen - 1));)
523 {
524 /* backreference to pattern match substring, eg. %1, %2, etc) */
525 if (*p == '%')
526 {
527 char *e = NULL; /* used as pointer to end of integer backreference in strtol() call */
528
529 p++; /* skip over % char */
530 long n = strtol(p, &e, 10);
531 /* Ensure that the integer conversion succeeded (e!=p) and bounds check. The upper bound check
532 * should not strictly be necessary since add_to_spam_list() finds the largest value, and
533 * the static array above is always large enough based on that value. */
534 if ((e != p) && (n >= 0) && (n < np->nmatch) && (pmatch[n].rm_so != -1))
535 {
536 /* copy as much of the substring match as will fit in the output buffer, saving space for
537 * the terminating nul char */
538 for (int idx = pmatch[n].rm_so;
539 (idx < pmatch[n].rm_eo) && (tlen < (buflen - 1)); idx++)
540 {
541 buf[tlen++] = str[idx];
542 }
543 }
544 p = e; /* skip over the parsed integer */
545 }
546 else
547 {
548 buf[tlen++] = *p++;
549 }
550 }
551 /* tlen should always be less than buflen except when buflen<=0
552 * because the bounds checks in the above code leave room for the
553 * terminal nul char. This should avoid returning an unterminated
554 * string to the caller. When buflen<=0 we make no assumption about
555 * the validity of the buf pointer. */
556 if (tlen < buflen)
557 {
558 buf[tlen] = '\0';
559 mutt_debug(LL_DEBUG5, "\"%s\"\n", buf);
560 }
561 return true;
562 }
563 }
564
565 return false;
566}
567
573{
574 return mutt_mem_calloc(1, sizeof(struct Replace));
575}
576
583int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
584{
585 if (!rl || !pat)
586 return 0;
587
588 int nremoved = 0;
589 struct Replace *np = NULL, *tmp = NULL;
590 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
591 {
592 if (mutt_str_equal(np->regex->pattern, pat))
593 {
594 STAILQ_REMOVE(rl, np, Replace, entries);
596 FREE(&np->templ);
597 FREE(&np);
598 nremoved++;
599 }
600 }
601
602 return nremoved;
603}
604
614bool mutt_regex_capture(const struct Regex *regex, const char *str,
615 size_t nmatch, regmatch_t matches[])
616{
617 if (!regex || !str || !regex->regex)
618 return false;
619
620 int rc = regexec(regex->regex, str, nmatch, matches, 0);
621 return ((rc == 0) ^ regex->pat_not);
622}
623
631bool mutt_regex_match(const struct Regex *regex, const char *str)
632{
633 return mutt_regex_capture(regex, str, 0, NULL);
634}
const char * mutt_str_atoi(const char *str, int *dst)
Convert ASCII string to an integer.
Definition: atoi.c:179
Parse a number in a string.
int mutt_buffer_printf(struct Buffer *buf, const char *fmt,...)
Format a string overwriting a Buffer.
Definition: buffer.c:168
General purpose object for storing and parsing strings.
#define mutt_debug(LEVEL,...)
Definition: logging.h:84
Logging Dispatcher.
@ LL_DEBUG5
Log at debug level 5.
Definition: logging.h:44
@ LL_DEBUG2
Log at debug level 2.
Definition: logging.h:41
bool mutt_mb_is_lower(const char *s)
Does a multi-byte string contain only lowercase characters?
Definition: mbyte.c:355
Multi-byte String manipulation functions.
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
Memory management wrappers.
#define FREE(x)
Definition: memory.h:43
#define MIN(a, b)
Definition: memory.h:31
Message logging.
#define _(a)
Definition: message.h:28
int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
Remove a pattern from a list.
Definition: regex.c:583
struct Regex * mutt_regex_new(const char *str, uint32_t flags, struct Buffer *err)
Create an Regex from a string.
Definition: regex.c:76
struct RegexNode * mutt_regexlist_new(void)
Create a new RegexList.
Definition: regex.c:216
char * mutt_replacelist_apply(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
Apply replacements to a buffer.
Definition: regex.c:368
struct Regex * mutt_regex_compile(const char *str, uint16_t flags)
Create an Regex from a string.
Definition: regex.c:55
void mutt_regexlist_free(struct RegexList *rl)
Free a RegexList object.
Definition: regex.c:174
int mutt_regexlist_add(struct RegexList *rl, const char *str, uint16_t flags, struct Buffer *err)
Compile a regex string and add it to a list.
Definition: regex.c:135
bool mutt_regex_capture(const struct Regex *regex, const char *str, size_t nmatch, regmatch_t matches[])
Match a regex against a string, with provided options.
Definition: regex.c:614
void mutt_replacelist_free(struct ReplaceList *rl)
Free a ReplaceList object.
Definition: regex.c:467
int mutt_regexlist_remove(struct RegexList *rl, const char *str)
Remove a Regex from a list.
Definition: regex.c:230
bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
Does a string match a pattern?
Definition: regex.c:495
struct Replace * mutt_replacelist_new(void)
Create a new ReplaceList.
Definition: regex.c:572
int mutt_replacelist_add(struct ReplaceList *rl, const char *pat, const char *templ, struct Buffer *err)
Add a pattern and a template to a list.
Definition: regex.c:266
void mutt_regex_free(struct Regex **r)
Free a Regex object.
Definition: regex.c:114
bool mutt_regexlist_match(struct RegexList *rl, const char *str)
Does a string match any Regex in the list?
Definition: regex.c:195
bool mutt_regex_match(const struct Regex *regex, const char *str)
Shorthand to mutt_regex_capture()
Definition: regex.c:631
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:819
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition: string.c:250
bool mutt_str_equal(const char *a, const char *b)
Compare two strings.
Definition: string.c:807
size_t mutt_str_copy(char *dest, const char *src, size_t dsize)
Copy a string into a buffer (guaranteeing NUL-termination)
Definition: string.c:652
#define STAILQ_REMOVE(head, elm, type, field)
Definition: queue.h:402
#define STAILQ_INIT(head)
Definition: queue.h:372
#define STAILQ_FOREACH(var, head, field)
Definition: queue.h:352
#define STAILQ_INSERT_TAIL(head, elm, field)
Definition: queue.h:389
#define STAILQ_FOREACH_SAFE(var, head, field, tvar)
Definition: queue.h:362
Manage regular expressions.
#define DT_REGEX_ALLOW_NOT
Regex can begin with '!'.
Definition: regex3.h:36
#define DT_REGEX_MATCH_CASE
Case-sensitive matching.
Definition: regex3.h:35
#define REG_COMP(preg, regex, cflags)
Compile a regular expression.
Definition: regex3.h:53
String manipulation functions.
String manipulation buffer.
Definition: buffer.h:34
size_t dsize
Length of data.
Definition: buffer.h:37
char * data
Pointer to data.
Definition: buffer.h:35
List of regular expressions.
Definition: regex3.h:99
struct Regex * regex
Regex containing a regular expression.
Definition: regex3.h:100
Cached regular expression.
Definition: regex3.h:89
char * pattern
printable version
Definition: regex3.h:90
bool pat_not
do not match
Definition: regex3.h:92
regex_t * regex
compiled expression
Definition: regex3.h:91
List of regular expressions.
Definition: regex3.h:109
char * templ
Template to match.
Definition: regex3.h:112
size_t nmatch
Match the 'nth' occurrence (0 means the whole expression)
Definition: regex3.h:111
struct Regex * regex
Regex containing a regular expression.
Definition: regex3.h:110