NeoMutt  2024-04-25-102-g19653a
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
prex.c
Go to the documentation of this file.
1
30#include "config.h"
31#include <stdbool.h>
32#include <stddef.h>
33#include <stdint.h>
34#include "prex.h"
35#include "logging2.h"
36#include "memory.h"
37#include "signal2.h"
38
39#ifdef HAVE_PCRE2
40#define PCRE2_CODE_UNIT_WIDTH 8
41#include <pcre2.h>
42#include <string.h>
43
48static bool pcre2_has_unicode(void)
49{
50 static uint32_t checked = -1;
51 if (checked == -1)
52 {
53 pcre2_config(PCRE2_CONFIG_UNICODE, &checked);
54 }
55 return checked;
56}
57#endif
58
70{
71 enum Prex which;
72 size_t nmatches;
73 const char *str;
74#ifdef HAVE_PCRE2
75 pcre2_code *re;
76 pcre2_match_data *mdata;
77#else
78 regex_t *re;
79#endif
80 regmatch_t *matches;
81};
82
83#define PREX_MONTH "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)"
84#define PREX_MONTH_LAX \
85 "(Jan|January|Feb|February|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|August|Sep|September|Oct|October|Nov|November|Dec|December)"
86#define PREX_DOW "(Mon|Tue|Wed|Thu|Fri|Sat|Sun)"
87#define PREX_DOW_NOCASE \
88 "([Mm][Oo][Nn]|[Tt][Uu][Ee]|[Ww][Ee][Dd]|[Tt][Hh][Uu]|[Ff][Rr][Ii]|[Ss][Aa][Tt]|[Ss][Uu][Nn])"
89#define PREX_TIME "([[:digit:]]{2}:[[:digit:]]{2}:[[:digit:]]{2})"
90#define PREX_YEAR "([[:digit:]]{4})"
91
100static struct PrexStorage *prex(enum Prex which)
101{
102 static struct PrexStorage storage[] = {
103 // clang-format off
104 {
105 PREX_URL,
107 /* Spec: https://tools.ietf.org/html/rfc3986#section-3 */
108#ifdef HAVE_PCRE2
109#define UNR_PCTENC_SUBDEL "][\\p{L}\\p{N}._~%!$&'()*+,;="
110#else
111#define UNR_PCTENC_SUBDEL "][[:alnum:]._~%!$&'()*+,;="
112#endif
113#define PATH ":@/ "
114 "^([[:alpha:]][-+.[:alnum:]]+):" // . scheme
115 "(" // . rest
116 "(" // . . authority + path
117 // . . or path only
118 "(//" // . . . authority + path
119 "(" // . . . . user info
120 "([" UNR_PCTENC_SUBDEL "@-]*)" // . . . . . user name + '@'
121 "(:([" UNR_PCTENC_SUBDEL "-]*))?" // . . . . . password
122 "@)?"
123 "(" // . . . . host
124 "([" UNR_PCTENC_SUBDEL "-]*)" // . . . . . host name
125 "|"
126 "(\\[[[:xdigit:]:.]+\\])" // . . . . . IPv4 or IPv6
127 ")"
128 "(:([[:digit:]]+))?" // . . . . port
129 "(/([" UNR_PCTENC_SUBDEL PATH "-]*))?" // . . . . path
130 ")"
131 "|"
132 "(" // . . . path only
133 "[" UNR_PCTENC_SUBDEL PATH "-]*" // . . . . path
134 ")"
135 ")"
136 // Should be: "(\\?([" UNR_PCTENC_SUBDEL PATH "?-]*))?"
137 "(\\?([^#]*))?" // . . query
138 ")$"
139#undef PATH
140#undef UNR_PCTENC_SUBDEL
141 },
142 {
145#define QUERY_PART "^&=" // Should be: "-[:alnum:]._~%!$'()*+,;:@/"
146 "([" QUERY_PART "]+)=([" QUERY_PART "]+)" // query + ' '
147#undef QUERY_PART
148 },
149 {
152 "=\\?"
153 "([^][()<>@,;:\\\"/?. =]+)" // charset
154 "\\?"
155 "([qQbB])" // encoding
156 "\\?"
157 "([^?]+)" // encoded text - we accept whitespace, see #1189
158 "\\?="
159 },
160 {
163 "^\\#H ([[:alnum:]_\\.-]+) ([[:alnum:]]{4}( [[:alnum:]]{4}){7})[ \t]*$"
164 },
165 {
168 /* Spec: https://tools.ietf.org/html/rfc5322#section-3.3 */
169#define FWS " *"
170#define C "(\\(.*\\))?"
171#define CFWS FWS C FWS
172 "^"
173 CFWS
174 "(([[:alpha:]]+)" CFWS ", *)?" // Day of week (or whatever)
175 CFWS "([[:digit:]]{1,2}) " // Day
176 CFWS PREX_MONTH_LAX // Month
177 CFWS "([[:digit:]]{2,4}) " // Year
178 CFWS "([[:digit:]]{1,2})" // Hour
179 ":" CFWS "([[:digit:]]{1,2})" // Minute
180 CFWS
181 "(:" CFWS "([[:digit:]]{1,2}))?" // Second
182 CFWS
183 "("
184 "([+-][[:digit:]]{4})|" // TZ
185 "([[:alpha:]]+)" // Obsolete TZ
186 ")?"
187#undef CFWS
188#undef C
189#undef FWS
190 },
191 {
194 "( ([[:digit:]])|([[:digit:]]{2}))" // Day
195 "-" PREX_MONTH // Month
196 "-" PREX_YEAR // Year
197 " " PREX_TIME // Time
198 " ([+-][[:digit:]]{4})" // TZ
199 },
200 {
203 /* Spec: http://qmail.omnis.ch/man/man5/mbox.html */
204 "^From " // From
205 "([^[:space:]]+) +" // Sender
206 PREX_DOW // Day of week
207 " +"
208 PREX_MONTH // Month
209 " ( ([[:digit:]])|([[:digit:]]{2}))" // Day
210 " +"
211 PREX_TIME // Time
212 " +"
213 PREX_YEAR // Year
214 },
215 {
218 /* Spec: http://qmail.omnis.ch/man/man5/mbox.html */
219 "^From " // From
220 "("
221 "[^[:space:]]+" // Sender
222 "( at [^[:space:]]+)?" // Possibly obfuscated, pipermail-style
223 ")?"
224 " *"
225 PREX_DOW_NOCASE // Day of week
226 " +"
227 PREX_MONTH // Month
228 " +"
229 "( " // Day
230 "([[:digit:]])|"
231 "([[:digit:]]{2})"
232 ")"
233 " +"
234 "("
235 PREX_TIME // Time (HH:MM:SS)
236 "|"
237 "([[:digit:]]{2}" // Time (HH:MM)
238 ":[[:digit:]]{2})"
239 ")"
240 " +"
241 "("
242 "([[:alpha:] ]+)|" // Timezone name (which we skip)
243 "([+][[:digit:]]{4} )" // Timezone offset (which we skip)
244 ")?"
245 "("
246 PREX_YEAR // Year (YYYY)
247 "|"
248 "([[:digit:]]{2})" // Year (YY)
249 ")"
250 },
251 {
254 "^([[:alpha:]]+): (.*)$"
255 },
256 {
259 "^(.*)(tags:)([[:alnum:],]*) ?(.*)$"
260 },
261 // clang-format on
262 };
263
264 ASSERT((which < PREX_MAX) && "Invalid 'which' argument");
265 struct PrexStorage *h = &storage[which];
266 ASSERT((which == h->which) && "Fix 'storage' array");
267 if (!h->re)
268 {
269#ifdef HAVE_PCRE2
270 uint32_t opt = pcre2_has_unicode() ? PCRE2_UTF : 0;
271 int eno = 0;
272 PCRE2_SIZE eoff = 0;
273 h->re = pcre2_compile((PCRE2_SPTR8) h->str, PCRE2_ZERO_TERMINATED, opt,
274 &eno, &eoff, NULL);
275 ASSERT(h->re && "Fix your RE");
276 h->mdata = pcre2_match_data_create_from_pattern(h->re, NULL);
277 uint32_t ccount = 0;
278 pcre2_pattern_info(h->re, PCRE2_INFO_CAPTURECOUNT, &ccount);
279 ASSERT(((ccount + 1) == h->nmatches) && "Number of matches do not match (...)");
280 h->matches = mutt_mem_calloc(h->nmatches, sizeof(*h->matches));
281#else
282 h->re = mutt_mem_calloc(1, sizeof(*h->re));
283 const int rc = regcomp(h->re, h->str, REG_EXTENDED);
284 ASSERT(rc == 0 && "Fix your RE");
285 h->matches = mutt_mem_calloc(h->nmatches, sizeof(*h->matches));
286#endif
287 }
288 return h;
289}
290
298regmatch_t *mutt_prex_capture(enum Prex which, const char *str)
299{
300 if (!str)
301 return NULL;
302
303 struct PrexStorage *h = prex(which);
304#ifdef HAVE_PCRE2
305 size_t len = strlen(str);
306 int rc = pcre2_match(h->re, (PCRE2_SPTR8) str, len, 0, 0, h->mdata, NULL);
307 if (rc < 0)
308 {
309 PCRE2_UCHAR errmsg[1024];
310 pcre2_get_error_message(rc, errmsg, sizeof(errmsg));
311 mutt_debug(LL_DEBUG2, "pcre2_match - <%s> -> <%s> = %s\n", h->str, str, errmsg);
312 return NULL;
313 }
314 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(h->mdata);
315 int i = 0;
316 for (; i < rc; i++)
317 {
318 h->matches[i].rm_so = ovector[i * 2];
319 h->matches[i].rm_eo = ovector[i * 2 + 1];
320 }
321 for (; i < h->nmatches; i++)
322 {
323 h->matches[i].rm_so = -1;
324 h->matches[i].rm_eo = -1;
325 }
326#else
327 if (regexec(h->re, str, h->nmatches, h->matches, 0))
328 return NULL;
329
330 ASSERT((h->re->re_nsub == (h->nmatches - 1)) &&
331 "Regular expression and matches enum are out of sync");
332#endif
333 return h->matches;
334}
335
340{
341 for (enum Prex which = 0; which < PREX_MAX; which++)
342 {
343 struct PrexStorage *h = prex(which);
344#ifdef HAVE_PCRE2
345 pcre2_match_data_free(h->mdata);
346 pcre2_code_free(h->re);
347#else
348 regfree(h->re);
349 FREE(&h->re);
350#endif
351 FREE(&h->matches);
352 }
353}
#define mutt_debug(LEVEL,...)
Definition: logging2.h:89
Logging Dispatcher.
@ LL_DEBUG2
Log at debug level 2.
Definition: logging2.h:44
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:51
Memory management wrappers.
#define FREE(x)
Definition: memory.h:45
#define PREX_DOW_NOCASE
Definition: prex.c:87
regmatch_t * mutt_prex_capture(enum Prex which, const char *str)
Match a precompiled regex against a string.
Definition: prex.c:298
#define CFWS
#define PREX_MONTH_LAX
Definition: prex.c:84
#define PREX_DOW
Definition: prex.c:86
#define PREX_TIME
Definition: prex.c:89
#define QUERY_PART
#define PREX_MONTH
Definition: prex.c:83
static struct PrexStorage * prex(enum Prex which)
Compile on demand and get data for a predefined regex.
Definition: prex.c:100
#define PATH
#define PREX_YEAR
Definition: prex.c:90
void mutt_prex_cleanup(void)
Cleanup heap memory allocated by compiled regexes.
Definition: prex.c:339
#define UNR_PCTENC_SUBDEL
Manage precompiled / predefined regular expressions.
@ PREX_MBOX_FROM_LAX_MATCH_MAX
Definition: prex.h:216
@ PREX_ACCOUNT_CMD_MATCH_MAX
Definition: prex.h:227
@ PREX_IMAP_DATE_MATCH_MAX
Definition: prex.h:170
@ PREX_MBOX_FROM_MATCH_MAX
Definition: prex.h:189
@ PREX_RFC2047_ENCODED_WORD_MATCH_MAX
Definition: prex.h:100
@ PREX_URL_QUERY_KEY_VAL_MATCH_MAX
Definition: prex.h:86
Prex
Predefined list of regular expressions.
Definition: prex.h:33
@ PREX_GNUTLS_CERT_HOST_HASH
[#H foo.com A76D 954B EB79 1F49 5B3A 0A0E 0681 65B1]
Definition: prex.h:37
@ PREX_MBOX_FROM_LAX
[From god@heaven.af.mil Sat Jan 3 01:05:34 1996]
Definition: prex.h:41
@ PREX_URL
[imaps://user:pass@example.com/INBOX?foo=bar]
Definition: prex.h:34
@ PREX_MBOX_FROM
[From god@heaven.af.mil Sat Jan 3 01:05:34 1996]
Definition: prex.h:40
@ PREX_ACCOUNT_CMD
key: value
Definition: prex.h:42
@ PREX_ALIAS_TAGS
tags:a,b,c
Definition: prex.h:43
@ PREX_IMAP_DATE
[16-MAR-2020 15:09:35 -0700]
Definition: prex.h:39
@ PREX_RFC5322_DATE_LAX
[Mon, (Comment) 16 Mar 2020 15:09:35 -0700]
Definition: prex.h:38
@ PREX_URL_QUERY_KEY_VAL
https://example.com/?[q=foo]
Definition: prex.h:35
@ PREX_MAX
Definition: prex.h:44
@ PREX_RFC2047_ENCODED_WORD
[=?utf-8?Q?=E8=81=AA=E6=98=8E=E7=9A=84?=]
Definition: prex.h:36
@ PREX_RFC5322_DATE_LAX_MATCH_MAX
Definition: prex.h:152
@ PREX_URL_MATCH_MAX
Definition: prex.h:73
@ PREX_GNUTLS_CERT_HOST_HASH_MATCH_MAX
Definition: prex.h:114
@ PREX_ALIAS_TAGS_MATCH_MAX
Definition: prex.h:241
Signal handling.
#define ASSERT(COND)
Definition: signal2.h:58
A predefined / precompiled regex.
Definition: prex.c:70
const char * str
Regex string.
Definition: prex.c:73
enum Prex which
Regex type, e.g. PREX_URL.
Definition: prex.c:71
size_t nmatches
Number of regex matches.
Definition: prex.c:72
regex_t * re
Compiled regex.
Definition: prex.c:78
regmatch_t * matches
Resulting matches.
Definition: prex.c:80