NeoMutt  2024-02-01-35-geee02f
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
regex.c
Go to the documentation of this file.
1
33#include "config.h"
34#include <ctype.h>
35#include <regex.h>
36#include <stdbool.h>
37#include <stdint.h>
38#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
41#include "config/types.h"
42#include "atoi.h"
43#include "buffer.h"
44#include "logging2.h"
45#include "mbyte.h"
46#include "memory.h"
47#include "message.h"
48#include "queue.h"
49#include "regex3.h"
50#include "string2.h"
51
59struct Regex *mutt_regex_compile(const char *str, uint16_t flags)
60{
61 if (!str || (*str == '\0'))
62 return NULL;
63 struct Regex *rx = mutt_mem_calloc(1, sizeof(struct Regex));
64 rx->pattern = mutt_str_dup(str);
65 rx->regex = mutt_mem_calloc(1, sizeof(regex_t));
66 if (REG_COMP(rx->regex, str, flags) != 0)
67 mutt_regex_free(&rx);
68
69 return rx;
70}
71
80struct Regex *mutt_regex_new(const char *str, uint32_t flags, struct Buffer *err)
81{
82 if (!str || (*str == '\0'))
83 return NULL;
84
85 uint16_t rflags = 0;
86 struct Regex *reg = mutt_mem_calloc(1, sizeof(struct Regex));
87
88 reg->regex = mutt_mem_calloc(1, sizeof(regex_t));
89 reg->pattern = mutt_str_dup(str);
90
91 /* Should we use smart case matching? */
92 if (((flags & D_REGEX_MATCH_CASE) == 0) && mutt_mb_is_lower(str))
93 rflags |= REG_ICASE;
94
95 /* Is a prefix of '!' allowed? */
96 if (((flags & D_REGEX_ALLOW_NOT) != 0) && (str[0] == '!'))
97 {
98 reg->pat_not = true;
99 str++;
100 }
101
102 int rc = REG_COMP(reg->regex, str, rflags);
103 if (rc != 0)
104 {
105 if (err)
106 regerror(rc, reg->regex, err->data, err->dsize);
107 mutt_regex_free(&reg);
108 return NULL;
109 }
110
111 return reg;
112}
113
118void mutt_regex_free(struct Regex **ptr)
119{
120 if (!ptr || !*ptr)
121 return;
122
123 struct Regex *rx = *ptr;
124 FREE(&rx->pattern);
125 if (rx->regex)
126 regfree(rx->regex);
127 FREE(&rx->regex);
128 FREE(ptr);
129}
130
140int mutt_regexlist_add(struct RegexList *rl, const char *str, uint16_t flags,
141 struct Buffer *err)
142{
143 if (!rl || !str || (*str == '\0'))
144 return 0;
145
146 struct Regex *rx = mutt_regex_compile(str, flags);
147 if (!rx)
148 {
149 buf_printf(err, "Bad regex: %s\n", str);
150 return -1;
151 }
152
153 /* check to make sure the item is not already on this rl */
154 struct RegexNode *np = NULL;
155 STAILQ_FOREACH(np, rl, entries)
156 {
157 if (mutt_istr_equal(rx->pattern, np->regex->pattern))
158 break; /* already on the rl */
159 }
160
161 if (np)
162 {
163 mutt_regex_free(&rx);
164 }
165 else
166 {
167 np = mutt_regexlist_new();
168 np->regex = rx;
169 STAILQ_INSERT_TAIL(rl, np, entries);
170 }
171
172 return 0;
173}
174
179void mutt_regexlist_free(struct RegexList *rl)
180{
181 if (!rl)
182 return;
183
184 struct RegexNode *np = NULL, *tmp = NULL;
185 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
186 {
187 STAILQ_REMOVE(rl, np, RegexNode, entries);
189 FREE(&np);
190 }
191 STAILQ_INIT(rl);
192}
193
200bool mutt_regexlist_match(struct RegexList *rl, const char *str)
201{
202 if (!rl || !str)
203 return false;
204 struct RegexNode *np = NULL;
205 STAILQ_FOREACH(np, rl, entries)
206 {
207 if (mutt_regex_match(np->regex, str))
208 {
209 mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
210 return true;
211 }
212 }
213
214 return false;
215}
216
222{
223 return mutt_mem_calloc(1, sizeof(struct RegexNode));
224}
225
235int mutt_regexlist_remove(struct RegexList *rl, const char *str)
236{
237 if (!rl || !str)
238 return -1;
239
240 if (mutt_str_equal("*", str))
241 {
242 mutt_regexlist_free(rl); /* "unCMD *" means delete all current entries */
243 return 0;
244 }
245
246 int rc = -1;
247 struct RegexNode *np = NULL, *tmp = NULL;
248 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
249 {
250 if (mutt_istr_equal(str, np->regex->pattern))
251 {
252 STAILQ_REMOVE(rl, np, RegexNode, entries);
254 FREE(&np);
255 rc = 0;
256 }
257 }
258
259 return rc;
260}
261
271int mutt_replacelist_add(struct ReplaceList *rl, const char *pat,
272 const char *templ, struct Buffer *err)
273{
274 if (!rl || !pat || (*pat == '\0') || !templ)
275 return 0;
276
277 struct Regex *rx = mutt_regex_compile(pat, REG_ICASE);
278 if (!rx)
279 {
280 buf_printf(err, _("Bad regex: %s"), pat);
281 return -1;
282 }
283
284 /* check to make sure the item is not already on this rl */
285 struct Replace *np = NULL;
286 STAILQ_FOREACH(np, rl, entries)
287 {
288 if (mutt_istr_equal(rx->pattern, np->regex->pattern))
289 {
290 /* Already on the rl. Formerly we just skipped this case, but
291 * now we're supporting removals, which means we're supporting
292 * re-adds conceptually. So we probably want this to imply a
293 * removal, then do an add. We can achieve the removal by freeing
294 * the template, and leaving t pointed at the current item. */
295 FREE(&np->templ);
296 break;
297 }
298 }
299
300 /* If np is set, it's pointing into an extant ReplaceList* that we want to
301 * update. Otherwise we want to make a new one to link at the rl's end. */
302 if (np)
303 {
304 mutt_regex_free(&rx);
305 }
306 else
307 {
309 np->regex = rx;
310 rx = NULL;
311 STAILQ_INSERT_TAIL(rl, np, entries);
312 }
313
314 /* Now np is the Replace that we want to modify. It is prepared. */
315 np->templ = mutt_str_dup(templ);
316
317 /* Find highest match number in template string */
318 np->nmatch = 0;
319 for (const char *p = templ; *p;)
320 {
321 if (*p == '%')
322 {
323 int n = 0;
324 const char *end = mutt_str_atoi(++p, &n);
325 if (!end)
326 {
327 // this is not an error, we might have matched %R or %L in subjectrx
328 mutt_debug(LL_DEBUG2, "Invalid match number in replacelist: '%s'\n", p);
329 }
330 if (n > np->nmatch)
331 {
332 np->nmatch = n;
333 }
334 if (end)
335 {
336 p = end;
337 }
338 else
339 {
340 p++;
341 }
342 }
343 else
344 {
345 p++;
346 }
347 }
348
349 if (np->nmatch > np->regex->regex->re_nsub)
350 {
351 if (err)
352 buf_addstr(err, _("Not enough subexpressions for template"));
354 return -1;
355 }
356
357 np->nmatch++; /* match 0 is always the whole expr */
358 return 0;
359}
360
374char *mutt_replacelist_apply(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
375{
376 static regmatch_t *pmatch = NULL;
377 static size_t nmatch = 0;
378 static char twinbuf[2][1024];
379 int switcher = 0;
380 char *p = NULL;
381 size_t cpysize, tlen;
382 char *src = NULL, *dst = NULL;
383
384 if (buf && (buflen != 0))
385 buf[0] = '\0';
386
387 if (!rl || !str || (*str == '\0') || (buf && (buflen == 0)))
388 return buf;
389
390 twinbuf[0][0] = '\0';
391 twinbuf[1][0] = '\0';
392 src = twinbuf[switcher];
393 dst = src;
394
395 mutt_str_copy(src, str, sizeof(*twinbuf));
396
397 struct Replace *np = NULL;
398 STAILQ_FOREACH(np, rl, entries)
399 {
400 /* If this pattern needs more matches, expand pmatch. */
401 if (np->nmatch > nmatch)
402 {
403 mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
404 nmatch = np->nmatch;
405 }
406
407 if (mutt_regex_capture(np->regex, src, np->nmatch, pmatch))
408 {
409 tlen = 0;
410 switcher ^= 1;
411 dst = twinbuf[switcher];
412
413 mutt_debug(LL_DEBUG5, "%s matches %s\n", src, np->regex->pattern);
414
415 /* Copy into other twinbuf with substitutions */
416 if (np->templ)
417 {
418 for (p = np->templ; *p && (tlen < (sizeof(*twinbuf) - 1));)
419 {
420 if (*p == '%')
421 {
422 p++;
423 if (*p == 'L')
424 {
425 p++;
426 cpysize = MIN(pmatch[0].rm_so, (sizeof(*twinbuf) - 1) - tlen);
427 strncpy(&dst[tlen], src, cpysize);
428 tlen += cpysize;
429 }
430 else if (*p == 'R')
431 {
432 p++;
433 cpysize = MIN(strlen(src) - pmatch[0].rm_eo, (sizeof(*twinbuf) - 1) - tlen);
434 strncpy(&dst[tlen], &src[pmatch[0].rm_eo], cpysize);
435 tlen += cpysize;
436 }
437 else
438 {
439 long n = strtoul(p, &p, 10); /* get subst number */
440 if (n < np->nmatch)
441 {
442 while (isdigit((unsigned char) *p)) /* skip subst token */
443 p++;
444 for (int i = pmatch[n].rm_so;
445 (i < pmatch[n].rm_eo) && (tlen < (sizeof(*twinbuf) - 1)); i++)
446 {
447 dst[tlen++] = src[i];
448 }
449 }
450 }
451 }
452 else
453 {
454 dst[tlen++] = *p++;
455 }
456 }
457 }
458 dst[tlen] = '\0';
459 mutt_debug(LL_DEBUG5, "subst %s\n", dst);
460 }
461 src = dst;
462 }
463
464 if (buf)
465 mutt_str_copy(buf, dst, buflen);
466 else
467 buf = mutt_str_dup(dst);
468 return buf;
469}
470
475void mutt_replacelist_free(struct ReplaceList *rl)
476{
477 if (!rl)
478 return;
479
480 struct Replace *np = NULL, *tmp = NULL;
481 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
482 {
483 STAILQ_REMOVE(rl, np, Replace, entries);
485 FREE(&np->templ);
486 FREE(&np);
487 }
488}
489
503bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
504{
505 if (!rl || !buf || !str)
506 return false;
507
508 static regmatch_t *pmatch = NULL;
509 static size_t nmatch = 0;
510 int tlen = 0;
511 char *p = NULL;
512
513 struct Replace *np = NULL;
514 STAILQ_FOREACH(np, rl, entries)
515 {
516 /* If this pattern needs more matches, expand pmatch. */
517 if (np->nmatch > nmatch)
518 {
519 mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
520 nmatch = np->nmatch;
521 }
522
523 /* Does this pattern match? */
524 if (mutt_regex_capture(np->regex, str, (size_t) np->nmatch, pmatch))
525 {
526 mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
527 mutt_debug(LL_DEBUG5, "%d subs\n", (int) np->regex->regex->re_nsub);
528
529 /* Copy template into buf, with substitutions. */
530 for (p = np->templ; *p && (tlen < (buflen - 1));)
531 {
532 /* backreference to pattern match substring, eg. %1, %2, etc) */
533 if (*p == '%')
534 {
535 char *e = NULL; /* used as pointer to end of integer backreference in strtol() call */
536
537 p++; /* skip over % char */
538 long n = strtol(p, &e, 10);
539 /* Ensure that the integer conversion succeeded (e!=p) and bounds check. The upper bound check
540 * should not strictly be necessary since add_to_spam_list() finds the largest value, and
541 * the static array above is always large enough based on that value. */
542 if ((e != p) && (n >= 0) && (n < np->nmatch) && (pmatch[n].rm_so != -1))
543 {
544 /* copy as much of the substring match as will fit in the output buffer, saving space for
545 * the terminating nul char */
546 for (int idx = pmatch[n].rm_so;
547 (idx < pmatch[n].rm_eo) && (tlen < (buflen - 1)); idx++)
548 {
549 buf[tlen++] = str[idx];
550 }
551 }
552 p = e; /* skip over the parsed integer */
553 }
554 else
555 {
556 buf[tlen++] = *p++;
557 }
558 }
559 /* tlen should always be less than buflen except when buflen<=0
560 * because the bounds checks in the above code leave room for the
561 * terminal nul char. This should avoid returning an unterminated
562 * string to the caller. When buflen<=0 we make no assumption about
563 * the validity of the buf pointer. */
564 if (tlen < buflen)
565 {
566 buf[tlen] = '\0';
567 mutt_debug(LL_DEBUG5, "\"%s\"\n", buf);
568 }
569 return true;
570 }
571 }
572
573 return false;
574}
575
581{
582 return mutt_mem_calloc(1, sizeof(struct Replace));
583}
584
591int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
592{
593 if (!rl || !pat)
594 return 0;
595
596 int nremoved = 0;
597 struct Replace *np = NULL, *tmp = NULL;
598 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
599 {
600 if (mutt_str_equal(np->regex->pattern, pat))
601 {
602 STAILQ_REMOVE(rl, np, Replace, entries);
604 FREE(&np->templ);
605 FREE(&np);
606 nremoved++;
607 }
608 }
609
610 return nremoved;
611}
612
622bool mutt_regex_capture(const struct Regex *regex, const char *str,
623 size_t nmatch, regmatch_t matches[])
624{
625 if (!regex || !str || !regex->regex)
626 return false;
627
628 int rc = regexec(regex->regex, str, nmatch, matches, 0);
629 return ((rc == 0) ^ regex->pat_not);
630}
631
639bool mutt_regex_match(const struct Regex *regex, const char *str)
640{
641 return mutt_regex_capture(regex, str, 0, NULL);
642}
const char * mutt_str_atoi(const char *str, int *dst)
Convert ASCII string to an integer.
Definition: atoi.c:188
Parse a number in a string.
int buf_printf(struct Buffer *buf, const char *fmt,...)
Format a string overwriting a Buffer.
Definition: buffer.c:178
size_t buf_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:243
General purpose object for storing and parsing strings.
#define mutt_debug(LEVEL,...)
Definition: logging2.h:89
Logging Dispatcher.
@ LL_DEBUG5
Log at debug level 5.
Definition: logging2.h:47
@ LL_DEBUG2
Log at debug level 2.
Definition: logging2.h:44
bool mutt_mb_is_lower(const char *s)
Does a multi-byte string contain only lowercase characters?
Definition: mbyte.c:354
Multi-byte String manipulation functions.
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
Memory management wrappers.
#define FREE(x)
Definition: memory.h:45
#define MIN(a, b)
Definition: memory.h:32
Message logging.
#define _(a)
Definition: message.h:28
int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
Remove a pattern from a list.
Definition: regex.c:591
struct Regex * mutt_regex_new(const char *str, uint32_t flags, struct Buffer *err)
Create an Regex from a string.
Definition: regex.c:80
struct RegexNode * mutt_regexlist_new(void)
Create a new RegexList.
Definition: regex.c:221
char * mutt_replacelist_apply(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
Apply replacements to a buffer.
Definition: regex.c:374
struct Regex * mutt_regex_compile(const char *str, uint16_t flags)
Create an Regex from a string.
Definition: regex.c:59
void mutt_regexlist_free(struct RegexList *rl)
Free a RegexList object.
Definition: regex.c:179
int mutt_regexlist_add(struct RegexList *rl, const char *str, uint16_t flags, struct Buffer *err)
Compile a regex string and add it to a list.
Definition: regex.c:140
bool mutt_regex_capture(const struct Regex *regex, const char *str, size_t nmatch, regmatch_t matches[])
Match a regex against a string, with provided options.
Definition: regex.c:622
void mutt_replacelist_free(struct ReplaceList *rl)
Free a ReplaceList object.
Definition: regex.c:475
int mutt_regexlist_remove(struct RegexList *rl, const char *str)
Remove a Regex from a list.
Definition: regex.c:235
bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
Does a string match a pattern?
Definition: regex.c:503
struct Replace * mutt_replacelist_new(void)
Create a new ReplaceList.
Definition: regex.c:580
int mutt_replacelist_add(struct ReplaceList *rl, const char *pat, const char *templ, struct Buffer *err)
Add a pattern and a template to a list.
Definition: regex.c:271
bool mutt_regexlist_match(struct RegexList *rl, const char *str)
Does a string match any Regex in the list?
Definition: regex.c:200
void mutt_regex_free(struct Regex **ptr)
Free a Regex object.
Definition: regex.c:118
bool mutt_regex_match(const struct Regex *regex, const char *str)
Shorthand to mutt_regex_capture()
Definition: regex.c:639
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:721
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition: string.c:253
bool mutt_str_equal(const char *a, const char *b)
Compare two strings.
Definition: string.c:709
size_t mutt_str_copy(char *dest, const char *src, size_t dsize)
Copy a string into a buffer (guaranteeing NUL-termination)
Definition: string.c:630
#define STAILQ_REMOVE(head, elm, type, field)
Definition: queue.h:402
#define STAILQ_INIT(head)
Definition: queue.h:372
#define STAILQ_FOREACH(var, head, field)
Definition: queue.h:352
#define STAILQ_INSERT_TAIL(head, elm, field)
Definition: queue.h:389
#define STAILQ_FOREACH_SAFE(var, head, field, tvar)
Definition: queue.h:362
Manage regular expressions.
#define REG_COMP(preg, regex, cflags)
Compile a regular expression.
Definition: regex3.h:49
String manipulation functions.
String manipulation buffer.
Definition: buffer.h:36
size_t dsize
Length of data.
Definition: buffer.h:39
char * data
Pointer to data.
Definition: buffer.h:37
List of regular expressions.
Definition: regex3.h:95
struct Regex * regex
Regex containing a regular expression.
Definition: regex3.h:96
Cached regular expression.
Definition: regex3.h:85
char * pattern
printable version
Definition: regex3.h:86
bool pat_not
do not match
Definition: regex3.h:88
regex_t * regex
compiled expression
Definition: regex3.h:87
List of regular expressions.
Definition: regex3.h:105
char * templ
Template to match.
Definition: regex3.h:108
size_t nmatch
Match the 'nth' occurrence (0 means the whole expression)
Definition: regex3.h:107
struct Regex * regex
Regex containing a regular expression.
Definition: regex3.h:106
Constants for all the config types.
#define D_REGEX_ALLOW_NOT
Regex can begin with '!'.
Definition: types.h:104
#define D_REGEX_MATCH_CASE
Case-sensitive matching.
Definition: types.h:103