NeoMutt  2023-11-03-85-g512e01
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
regex.c
Go to the documentation of this file.
1
30#include "config.h"
31#include <ctype.h>
32#include <regex.h>
33#include <stdbool.h>
34#include <stdint.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38#include "atoi.h"
39#include "buffer.h"
40#include "logging2.h"
41#include "mbyte.h"
42#include "memory.h"
43#include "message.h"
44#include "queue.h"
45#include "regex3.h"
46#include "string2.h"
47
55struct Regex *mutt_regex_compile(const char *str, uint16_t flags)
56{
57 if (!str || (*str == '\0'))
58 return NULL;
59 struct Regex *rx = mutt_mem_calloc(1, sizeof(struct Regex));
60 rx->pattern = mutt_str_dup(str);
61 rx->regex = mutt_mem_calloc(1, sizeof(regex_t));
62 if (REG_COMP(rx->regex, str, flags) != 0)
63 mutt_regex_free(&rx);
64
65 return rx;
66}
67
76struct Regex *mutt_regex_new(const char *str, uint32_t flags, struct Buffer *err)
77{
78 if (!str || (*str == '\0'))
79 return NULL;
80
81 uint16_t rflags = 0;
82 struct Regex *reg = mutt_mem_calloc(1, sizeof(struct Regex));
83
84 reg->regex = mutt_mem_calloc(1, sizeof(regex_t));
85 reg->pattern = mutt_str_dup(str);
86
87 /* Should we use smart case matching? */
88 if (((flags & DT_REGEX_MATCH_CASE) == 0) && mutt_mb_is_lower(str))
89 rflags |= REG_ICASE;
90
91 /* Is a prefix of '!' allowed? */
92 if (((flags & DT_REGEX_ALLOW_NOT) != 0) && (str[0] == '!'))
93 {
94 reg->pat_not = true;
95 str++;
96 }
97
98 int rc = REG_COMP(reg->regex, str, rflags);
99 if (rc != 0)
100 {
101 if (err)
102 regerror(rc, reg->regex, err->data, err->dsize);
103 mutt_regex_free(&reg);
104 return NULL;
105 }
106
107 return reg;
108}
109
114void mutt_regex_free(struct Regex **ptr)
115{
116 if (!ptr || !*ptr)
117 return;
118
119 struct Regex *rx = *ptr;
120 FREE(&rx->pattern);
121 if (rx->regex)
122 regfree(rx->regex);
123 FREE(&rx->regex);
124 FREE(ptr);
125}
126
136int mutt_regexlist_add(struct RegexList *rl, const char *str, uint16_t flags,
137 struct Buffer *err)
138{
139 if (!rl || !str || (*str == '\0'))
140 return 0;
141
142 struct Regex *rx = mutt_regex_compile(str, flags);
143 if (!rx)
144 {
145 buf_printf(err, "Bad regex: %s\n", str);
146 return -1;
147 }
148
149 /* check to make sure the item is not already on this rl */
150 struct RegexNode *np = NULL;
151 STAILQ_FOREACH(np, rl, entries)
152 {
153 if (mutt_istr_equal(rx->pattern, np->regex->pattern))
154 break; /* already on the rl */
155 }
156
157 if (np)
158 {
159 mutt_regex_free(&rx);
160 }
161 else
162 {
163 np = mutt_regexlist_new();
164 np->regex = rx;
165 STAILQ_INSERT_TAIL(rl, np, entries);
166 }
167
168 return 0;
169}
170
175void mutt_regexlist_free(struct RegexList *rl)
176{
177 if (!rl)
178 return;
179
180 struct RegexNode *np = NULL, *tmp = NULL;
181 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
182 {
183 STAILQ_REMOVE(rl, np, RegexNode, entries);
185 FREE(&np);
186 }
187 STAILQ_INIT(rl);
188}
189
196bool mutt_regexlist_match(struct RegexList *rl, const char *str)
197{
198 if (!rl || !str)
199 return false;
200 struct RegexNode *np = NULL;
201 STAILQ_FOREACH(np, rl, entries)
202 {
203 if (mutt_regex_match(np->regex, str))
204 {
205 mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
206 return true;
207 }
208 }
209
210 return false;
211}
212
218{
219 return mutt_mem_calloc(1, sizeof(struct RegexNode));
220}
221
231int mutt_regexlist_remove(struct RegexList *rl, const char *str)
232{
233 if (!rl || !str)
234 return -1;
235
236 if (mutt_str_equal("*", str))
237 {
238 mutt_regexlist_free(rl); /* "unCMD *" means delete all current entries */
239 return 0;
240 }
241
242 int rc = -1;
243 struct RegexNode *np = NULL, *tmp = NULL;
244 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
245 {
246 if (mutt_istr_equal(str, np->regex->pattern))
247 {
248 STAILQ_REMOVE(rl, np, RegexNode, entries);
250 FREE(&np);
251 rc = 0;
252 }
253 }
254
255 return rc;
256}
257
267int mutt_replacelist_add(struct ReplaceList *rl, const char *pat,
268 const char *templ, struct Buffer *err)
269{
270 if (!rl || !pat || (*pat == '\0') || !templ)
271 return 0;
272
273 struct Regex *rx = mutt_regex_compile(pat, REG_ICASE);
274 if (!rx)
275 {
276 if (err)
277 buf_printf(err, _("Bad regex: %s"), pat);
278 return -1;
279 }
280
281 /* check to make sure the item is not already on this rl */
282 struct Replace *np = NULL;
283 STAILQ_FOREACH(np, rl, entries)
284 {
285 if (mutt_istr_equal(rx->pattern, np->regex->pattern))
286 {
287 /* Already on the rl. Formerly we just skipped this case, but
288 * now we're supporting removals, which means we're supporting
289 * re-adds conceptually. So we probably want this to imply a
290 * removal, then do an add. We can achieve the removal by freeing
291 * the template, and leaving t pointed at the current item. */
292 FREE(&np->templ);
293 break;
294 }
295 }
296
297 /* If np is set, it's pointing into an extant ReplaceList* that we want to
298 * update. Otherwise we want to make a new one to link at the rl's end. */
299 if (np)
300 {
301 mutt_regex_free(&rx);
302 }
303 else
304 {
306 np->regex = rx;
307 rx = NULL;
308 STAILQ_INSERT_TAIL(rl, np, entries);
309 }
310
311 /* Now np is the Replace that we want to modify. It is prepared. */
312 np->templ = mutt_str_dup(templ);
313
314 /* Find highest match number in template string */
315 np->nmatch = 0;
316 for (const char *p = templ; *p;)
317 {
318 if (*p == '%')
319 {
320 int n = 0;
321 const char *end = mutt_str_atoi(++p, &n);
322 if (!end)
323 {
324 // this is not an error, we might have matched %R or %L in subjectrx
325 mutt_debug(LL_DEBUG2, "Invalid match number in replacelist: '%s'\n", p);
326 }
327 if (n > np->nmatch)
328 {
329 np->nmatch = n;
330 }
331 if (end)
332 {
333 p = end;
334 }
335 else
336 {
337 p++;
338 }
339 }
340 else
341 {
342 p++;
343 }
344 }
345
346 if (np->nmatch > np->regex->regex->re_nsub)
347 {
348 if (err)
349 buf_addstr(err, _("Not enough subexpressions for template"));
351 return -1;
352 }
353
354 np->nmatch++; /* match 0 is always the whole expr */
355 return 0;
356}
357
371char *mutt_replacelist_apply(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
372{
373 static regmatch_t *pmatch = NULL;
374 static size_t nmatch = 0;
375 static char twinbuf[2][1024];
376 int switcher = 0;
377 char *p = NULL;
378 size_t cpysize, tlen;
379 char *src = NULL, *dst = NULL;
380
381 if (buf && (buflen != 0))
382 buf[0] = '\0';
383
384 if (!rl || !str || (*str == '\0') || (buf && (buflen == 0)))
385 return buf;
386
387 twinbuf[0][0] = '\0';
388 twinbuf[1][0] = '\0';
389 src = twinbuf[switcher];
390 dst = src;
391
392 mutt_str_copy(src, str, sizeof(*twinbuf));
393
394 struct Replace *np = NULL;
395 STAILQ_FOREACH(np, rl, entries)
396 {
397 /* If this pattern needs more matches, expand pmatch. */
398 if (np->nmatch > nmatch)
399 {
400 mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
401 nmatch = np->nmatch;
402 }
403
404 if (mutt_regex_capture(np->regex, src, np->nmatch, pmatch))
405 {
406 tlen = 0;
407 switcher ^= 1;
408 dst = twinbuf[switcher];
409
410 mutt_debug(LL_DEBUG5, "%s matches %s\n", src, np->regex->pattern);
411
412 /* Copy into other twinbuf with substitutions */
413 if (np->templ)
414 {
415 for (p = np->templ; *p && (tlen < (sizeof(*twinbuf) - 1));)
416 {
417 if (*p == '%')
418 {
419 p++;
420 if (*p == 'L')
421 {
422 p++;
423 cpysize = MIN(pmatch[0].rm_so, (sizeof(*twinbuf) - 1) - tlen);
424 strncpy(&dst[tlen], src, cpysize);
425 tlen += cpysize;
426 }
427 else if (*p == 'R')
428 {
429 p++;
430 cpysize = MIN(strlen(src) - pmatch[0].rm_eo, (sizeof(*twinbuf) - 1) - tlen);
431 strncpy(&dst[tlen], &src[pmatch[0].rm_eo], cpysize);
432 tlen += cpysize;
433 }
434 else
435 {
436 long n = strtoul(p, &p, 10); /* get subst number */
437 if (n < np->nmatch)
438 {
439 while (isdigit((unsigned char) *p)) /* skip subst token */
440 p++;
441 for (int i = pmatch[n].rm_so;
442 (i < pmatch[n].rm_eo) && (tlen < (sizeof(*twinbuf) - 1)); i++)
443 {
444 dst[tlen++] = src[i];
445 }
446 }
447 }
448 }
449 else
450 {
451 dst[tlen++] = *p++;
452 }
453 }
454 }
455 dst[tlen] = '\0';
456 mutt_debug(LL_DEBUG5, "subst %s\n", dst);
457 }
458 src = dst;
459 }
460
461 if (buf)
462 mutt_str_copy(buf, dst, buflen);
463 else
464 buf = mutt_str_dup(dst);
465 return buf;
466}
467
472void mutt_replacelist_free(struct ReplaceList *rl)
473{
474 if (!rl)
475 return;
476
477 struct Replace *np = NULL, *tmp = NULL;
478 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
479 {
480 STAILQ_REMOVE(rl, np, Replace, entries);
482 FREE(&np->templ);
483 FREE(&np);
484 }
485}
486
500bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
501{
502 if (!rl || !buf || !str)
503 return false;
504
505 static regmatch_t *pmatch = NULL;
506 static size_t nmatch = 0;
507 int tlen = 0;
508 char *p = NULL;
509
510 struct Replace *np = NULL;
511 STAILQ_FOREACH(np, rl, entries)
512 {
513 /* If this pattern needs more matches, expand pmatch. */
514 if (np->nmatch > nmatch)
515 {
516 mutt_mem_realloc(&pmatch, np->nmatch * sizeof(regmatch_t));
517 nmatch = np->nmatch;
518 }
519
520 /* Does this pattern match? */
521 if (mutt_regex_capture(np->regex, str, (size_t) np->nmatch, pmatch))
522 {
523 mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
524 mutt_debug(LL_DEBUG5, "%d subs\n", (int) np->regex->regex->re_nsub);
525
526 /* Copy template into buf, with substitutions. */
527 for (p = np->templ; *p && (tlen < (buflen - 1));)
528 {
529 /* backreference to pattern match substring, eg. %1, %2, etc) */
530 if (*p == '%')
531 {
532 char *e = NULL; /* used as pointer to end of integer backreference in strtol() call */
533
534 p++; /* skip over % char */
535 long n = strtol(p, &e, 10);
536 /* Ensure that the integer conversion succeeded (e!=p) and bounds check. The upper bound check
537 * should not strictly be necessary since add_to_spam_list() finds the largest value, and
538 * the static array above is always large enough based on that value. */
539 if ((e != p) && (n >= 0) && (n < np->nmatch) && (pmatch[n].rm_so != -1))
540 {
541 /* copy as much of the substring match as will fit in the output buffer, saving space for
542 * the terminating nul char */
543 for (int idx = pmatch[n].rm_so;
544 (idx < pmatch[n].rm_eo) && (tlen < (buflen - 1)); idx++)
545 {
546 buf[tlen++] = str[idx];
547 }
548 }
549 p = e; /* skip over the parsed integer */
550 }
551 else
552 {
553 buf[tlen++] = *p++;
554 }
555 }
556 /* tlen should always be less than buflen except when buflen<=0
557 * because the bounds checks in the above code leave room for the
558 * terminal nul char. This should avoid returning an unterminated
559 * string to the caller. When buflen<=0 we make no assumption about
560 * the validity of the buf pointer. */
561 if (tlen < buflen)
562 {
563 buf[tlen] = '\0';
564 mutt_debug(LL_DEBUG5, "\"%s\"\n", buf);
565 }
566 return true;
567 }
568 }
569
570 return false;
571}
572
578{
579 return mutt_mem_calloc(1, sizeof(struct Replace));
580}
581
588int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
589{
590 if (!rl || !pat)
591 return 0;
592
593 int nremoved = 0;
594 struct Replace *np = NULL, *tmp = NULL;
595 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
596 {
597 if (mutt_str_equal(np->regex->pattern, pat))
598 {
599 STAILQ_REMOVE(rl, np, Replace, entries);
601 FREE(&np->templ);
602 FREE(&np);
603 nremoved++;
604 }
605 }
606
607 return nremoved;
608}
609
619bool mutt_regex_capture(const struct Regex *regex, const char *str,
620 size_t nmatch, regmatch_t matches[])
621{
622 if (!regex || !str || !regex->regex)
623 return false;
624
625 int rc = regexec(regex->regex, str, nmatch, matches, 0);
626 return ((rc == 0) ^ regex->pat_not);
627}
628
636bool mutt_regex_match(const struct Regex *regex, const char *str)
637{
638 return mutt_regex_capture(regex, str, 0, NULL);
639}
const char * mutt_str_atoi(const char *str, int *dst)
Convert ASCII string to an integer.
Definition: atoi.c:187
Parse a number in a string.
int buf_printf(struct Buffer *buf, const char *fmt,...)
Format a string overwriting a Buffer.
Definition: buffer.c:173
size_t buf_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:238
General purpose object for storing and parsing strings.
#define mutt_debug(LEVEL,...)
Definition: logging2.h:89
Logging Dispatcher.
@ LL_DEBUG5
Log at debug level 5.
Definition: logging2.h:47
@ LL_DEBUG2
Log at debug level 2.
Definition: logging2.h:44
bool mutt_mb_is_lower(const char *s)
Does a multi-byte string contain only lowercase characters?
Definition: mbyte.c:353
Multi-byte String manipulation functions.
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
Memory management wrappers.
#define FREE(x)
Definition: memory.h:45
#define MIN(a, b)
Definition: memory.h:32
Message logging.
#define _(a)
Definition: message.h:28
int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
Remove a pattern from a list.
Definition: regex.c:588
struct Regex * mutt_regex_new(const char *str, uint32_t flags, struct Buffer *err)
Create an Regex from a string.
Definition: regex.c:76
struct RegexNode * mutt_regexlist_new(void)
Create a new RegexList.
Definition: regex.c:217
char * mutt_replacelist_apply(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
Apply replacements to a buffer.
Definition: regex.c:371
struct Regex * mutt_regex_compile(const char *str, uint16_t flags)
Create an Regex from a string.
Definition: regex.c:55
void mutt_regexlist_free(struct RegexList *rl)
Free a RegexList object.
Definition: regex.c:175
int mutt_regexlist_add(struct RegexList *rl, const char *str, uint16_t flags, struct Buffer *err)
Compile a regex string and add it to a list.
Definition: regex.c:136
bool mutt_regex_capture(const struct Regex *regex, const char *str, size_t nmatch, regmatch_t matches[])
Match a regex against a string, with provided options.
Definition: regex.c:619
void mutt_replacelist_free(struct ReplaceList *rl)
Free a ReplaceList object.
Definition: regex.c:472
int mutt_regexlist_remove(struct RegexList *rl, const char *str)
Remove a Regex from a list.
Definition: regex.c:231
bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
Does a string match a pattern?
Definition: regex.c:500
struct Replace * mutt_replacelist_new(void)
Create a new ReplaceList.
Definition: regex.c:577
int mutt_replacelist_add(struct ReplaceList *rl, const char *pat, const char *templ, struct Buffer *err)
Add a pattern and a template to a list.
Definition: regex.c:267
bool mutt_regexlist_match(struct RegexList *rl, const char *str)
Does a string match any Regex in the list?
Definition: regex.c:196
void mutt_regex_free(struct Regex **ptr)
Free a Regex object.
Definition: regex.c:114
bool mutt_regex_match(const struct Regex *regex, const char *str)
Shorthand to mutt_regex_capture()
Definition: regex.c:636
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:810
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition: string.c:251
bool mutt_str_equal(const char *a, const char *b)
Compare two strings.
Definition: string.c:798
size_t mutt_str_copy(char *dest, const char *src, size_t dsize)
Copy a string into a buffer (guaranteeing NUL-termination)
Definition: string.c:653
#define STAILQ_REMOVE(head, elm, type, field)
Definition: queue.h:402
#define STAILQ_INIT(head)
Definition: queue.h:372
#define STAILQ_FOREACH(var, head, field)
Definition: queue.h:352
#define STAILQ_INSERT_TAIL(head, elm, field)
Definition: queue.h:389
#define STAILQ_FOREACH_SAFE(var, head, field, tvar)
Definition: queue.h:362
Manage regular expressions.
#define DT_REGEX_ALLOW_NOT
Regex can begin with '!'.
Definition: regex3.h:36
#define DT_REGEX_MATCH_CASE
Case-sensitive matching.
Definition: regex3.h:35
#define REG_COMP(preg, regex, cflags)
Compile a regular expression.
Definition: regex3.h:53
String manipulation functions.
String manipulation buffer.
Definition: buffer.h:34
size_t dsize
Length of data.
Definition: buffer.h:37
char * data
Pointer to data.
Definition: buffer.h:35
List of regular expressions.
Definition: regex3.h:99
struct Regex * regex
Regex containing a regular expression.
Definition: regex3.h:100
Cached regular expression.
Definition: regex3.h:89
char * pattern
printable version
Definition: regex3.h:90
bool pat_not
do not match
Definition: regex3.h:92
regex_t * regex
compiled expression
Definition: regex3.h:91
List of regular expressions.
Definition: regex3.h:109
char * templ
Template to match.
Definition: regex3.h:112
size_t nmatch
Match the 'nth' occurrence (0 means the whole expression)
Definition: regex3.h:111
struct Regex * regex
Regex containing a regular expression.
Definition: regex3.h:110