NeoMutt  2022-04-29-323-g5fcc6c
Teaching an old dog new tricks
DOXYGEN
prex.c File Reference

Manage precompiled / predefined regular expressions. More...

#include "config.h"
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include "prex.h"
#include "logging.h"
#include "memory.h"
+ Include dependency graph for prex.c:

Go to the source code of this file.

Data Structures

struct  PrexStorage
 A predefined / precompiled regex. More...
 

Macros

#define PREX_MONTH   "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)"
 
#define PREX_DOW   "(Mon|Tue|Wed|Thu|Fri|Sat|Sun)"
 
#define PREX_DOW_NOCASE    "([Mm][Oo][Nn]|[Tt][Uu][Ee]|[Ww][Ee][Dd]|[Tt][Hh][Uu]|[Ff][Rr][Ii]|[Ss][Aa][Tt]|[Ss][Uu][Nn])"
 
#define PREX_TIME   "([[:digit:]]{2}:[[:digit:]]{2}:[[:digit:]]{2})"
 
#define PREX_YEAR   "([[:digit:]]{4})"
 
#define UNR_PCTENC_SUBDEL   "][[:alnum:]._~%!$&'()*+,;="
 
#define PATH   ":@/ "
 
#define QUERY_PART   "^&="
 
#define FWS   " *"
 
#define C   "(\\‍(.*\\‍))?"
 
#define CFWS   FWS C FWS
 

Functions

static struct PrexStorageprex (enum Prex which)
 Compile on demand and get data for a predefined regex. More...
 
regmatch_t * mutt_prex_capture (enum Prex which, const char *str)
 Match a precompiled regex against a string. More...
 
void mutt_prex_free (void)
 Cleanup heap memory allocated by compiled regexes. More...
 

Detailed Description

Manage precompiled / predefined regular expressions.

Authors
  • Pietro Cerutti

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file prex.c.

Macro Definition Documentation

◆ PREX_MONTH

#define PREX_MONTH   "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)"

Definition at line 81 of file prex.c.

◆ PREX_DOW

#define PREX_DOW   "(Mon|Tue|Wed|Thu|Fri|Sat|Sun)"

Definition at line 82 of file prex.c.

◆ PREX_DOW_NOCASE

#define PREX_DOW_NOCASE    "([Mm][Oo][Nn]|[Tt][Uu][Ee]|[Ww][Ee][Dd]|[Tt][Hh][Uu]|[Ff][Rr][Ii]|[Ss][Aa][Tt]|[Ss][Uu][Nn])"

Definition at line 83 of file prex.c.

◆ PREX_TIME

#define PREX_TIME   "([[:digit:]]{2}:[[:digit:]]{2}:[[:digit:]]{2})"

Definition at line 85 of file prex.c.

◆ PREX_YEAR

#define PREX_YEAR   "([[:digit:]]{4})"

Definition at line 86 of file prex.c.

◆ UNR_PCTENC_SUBDEL

#define UNR_PCTENC_SUBDEL   "][[:alnum:]._~%!$&'()*+,;="

◆ PATH

#define PATH   ":@/ "

◆ QUERY_PART

#define QUERY_PART   "^&="

◆ FWS

#define FWS   " *"

◆ C

#define C   "(\\‍(.*\\‍))?"

◆ CFWS

#define CFWS   FWS C FWS

Function Documentation

◆ prex()

static struct PrexStorage * prex ( enum Prex  which)
static

Compile on demand and get data for a predefined regex.

Parameters
whichWhich regex to get
Return values
ptrPointer to a PrexStorage struct
Note
Returned pointer is guaranteed not to be NULL. The function asserts on error.

Definition at line 96 of file prex.c.

97{
98 static struct PrexStorage storage[] = {
99 // clang-format off
100 {
101 PREX_URL,
103 /* Spec: https://tools.ietf.org/html/rfc3986#section-3 */
104#ifdef HAVE_PCRE2
105#define UNR_PCTENC_SUBDEL "][\\p{L}\\p{N}._~%!$&'()*+,;="
106#else
107#define UNR_PCTENC_SUBDEL "][[:alnum:]._~%!$&'()*+,;="
108#endif
109#define PATH ":@/ "
110 "^([[:alpha:]][-+.[:alnum:]]+):" // . scheme
111 "(" // . rest
112 "(" // . . authority + path
113 // . . or path only
114 "(//" // . . . authority + path
115 "(" // . . . . user info
116 "([" UNR_PCTENC_SUBDEL "@-]*)" // . . . . . user name + '@'
117 "(:([" UNR_PCTENC_SUBDEL "-]*))?" // . . . . . password
118 "@)?"
119 "(" // . . . . host
120 "([" UNR_PCTENC_SUBDEL "-]*)" // . . . . . host name
121 "|"
122 "(\\[[[:xdigit:]:.]+\\])" // . . . . . IPv4 or IPv6
123 ")"
124 "(:([[:digit:]]+))?" // . . . . port
125 "(/([" UNR_PCTENC_SUBDEL PATH "-]*))?" // . . . . path
126 ")"
127 "|"
128 "(" // . . . path only
129 "[" UNR_PCTENC_SUBDEL PATH "-]*" // . . . . path
130 ")"
131 ")"
132 // Should be: "(\\?([" UNR_PCTENC_SUBDEL PATH "?-]*))?"
133 "(\\?([^#]*))?" // . . query
134 ")$"
135#undef PATH
136#undef UNR_PCTENC_SUBDEL
137 },
138 {
141#define QUERY_PART "^&=" // Should be: "-[:alnum:]._~%!$'()*+,;:@/"
142 "([" QUERY_PART "]+)=([" QUERY_PART "]+)" // query + ' '
143#undef QUERY_PART
144 },
145 {
148 "=\\?"
149 "([^][()<>@,;:\\\"/?. =]+)" // charset
150 "\\?"
151 "([qQbB])" // encoding
152 "\\?"
153 "([^?]+)" // encoded text - we accept whitespace, see #1189
154 "\\?="
155 },
156 {
159 "^\\#H ([[:alnum:]_\\.-]+) ([[:alnum:]]{4}( [[:alnum:]]{4}){7})[ \t]*$"
160 },
161 {
164 /* Spec: https://tools.ietf.org/html/rfc5322#section-3.3 */
165 "^"
166 "(" PREX_DOW ", )?" // Day of week
167 " *"
168 "([[:digit:]]{1,2}) " // Day
169 PREX_MONTH // Month
170 " ([[:digit:]]{2,4}) " // Year
171 "([[:digit:]]{2})" // Hour
172 ":([[:digit:]]{2})" // Minute
173 "(:([[:digit:]]{2}))?" // Second
174 " *"
175 "("
176 "([+-][[:digit:]]{4})|" // TZ
177 "([[:alpha:]]+)" // Obsolete TZ
178 ")"
179 },
180 {
183 /* Spec: https://tools.ietf.org/html/rfc5322#section-3.3 */
184#define FWS " *"
185#define C "(\\(.*\\))?"
186#define CFWS FWS C FWS
187 "^"
188 CFWS
189 "(([[:alpha:]]+)" CFWS ", *)?" // Day of week (or whatever)
190 CFWS "([[:digit:]]{1,2}) " // Day
191 CFWS PREX_MONTH // Month
192 CFWS "([[:digit:]]{2,4}) " // Year
193 CFWS "([[:digit:]]{1,2})" // Hour
194 ":" CFWS "([[:digit:]]{1,2})" // Minute
195 CFWS
196 "(:" CFWS "([[:digit:]]{1,2}))?" // Second
197 CFWS
198 "("
199 "([+-][[:digit:]]{4})|" // TZ
200 "([[:alpha:]]+)" // Obsolete TZ
201 ")?"
202#undef CFWS
203#undef C
204#undef FWS
205 },
206 {
209 "( ([[:digit:]])|([[:digit:]]{2}))" // Day
210 "-" PREX_MONTH // Month
211 "-" PREX_YEAR // Year
212 " " PREX_TIME // Time
213 " ([+-][[:digit:]]{4})" // TZ
214 },
215 {
218 /* Spec: http://qmail.omnis.ch/man/man5/mbox.html */
219 "^From " // From
220 "([^[:space:]]+) +" // Sender
221 PREX_DOW // Day of week
222 " +"
223 PREX_MONTH // Month
224 " ( ([[:digit:]])|([[:digit:]]{2}))" // Day
225 " +"
226 PREX_TIME // Time
227 " +"
228 PREX_YEAR // Year
229 },
230 {
233 /* Spec: http://qmail.omnis.ch/man/man5/mbox.html */
234 "^From " // From
235 "("
236 "[^[:space:]]+" // Sender
237 "( at [^[:space:]]+)?" // Possibly obfuscated, pipermail-style
238 ")?"
239 " *"
240 PREX_DOW_NOCASE // Day of week
241 " +"
242 PREX_MONTH // Month
243 " +"
244 "( " // Day
245 "([[:digit:]])|"
246 "([[:digit:]]{2})"
247 ")"
248 " +"
249 "("
250 PREX_TIME // Time (HH:MM:SS)
251 "|"
252 "([[:digit:]]{2}" // Time (HH:MM)
253 ":[[:digit:]]{2})"
254 ")"
255 " +"
256 "("
257 "([[:alpha:] ]+)|" // Timezone name (which we skip)
258 "([+][[:digit:]]{4} )" // Timezone offset (which we skip)
259 ")?"
260 "("
261 PREX_YEAR // Year (YYYY)
262 "|"
263 "([[:digit:]]{2})" // Year (YY)
264 ")"
265 },
266 {
269 "^([[:alpha:]]+): (.*)$"
270 },
271 // clang-format on
272 };
273
274 assert((which < PREX_MAX) && "Invalid 'which' argument");
275 struct PrexStorage *h = &storage[which];
276 assert((which == h->which) && "Fix 'storage' array");
277 if (!h->re)
278 {
279#ifdef HAVE_PCRE2
280 uint32_t opt = pcre2_has_unicode() ? PCRE2_UTF : 0;
281 int eno = 0;
282 PCRE2_SIZE eoff = 0;
283 h->re = pcre2_compile((PCRE2_SPTR8) h->str, PCRE2_ZERO_TERMINATED, opt,
284 &eno, &eoff, NULL);
285 assert(h->re && "Fix your RE");
286 h->mdata = pcre2_match_data_create_from_pattern(h->re, NULL);
287 uint32_t ccount = 0;
288 pcre2_pattern_info(h->re, PCRE2_INFO_CAPTURECOUNT, &ccount);
289 assert(ccount + 1 == h->nmatches && "Number of matches do not match (...)");
290 h->matches = mutt_mem_calloc(h->nmatches, sizeof(*h->matches));
291#else
292 h->re = mutt_mem_calloc(1, sizeof(*h->re));
293 const int rc = regcomp(h->re, h->str, REG_EXTENDED);
294 assert(rc == 0 && "Fix your RE");
295 h->matches = mutt_mem_calloc(h->nmatches, sizeof(*h->matches));
296#endif
297 }
298 return h;
299}
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
#define PREX_DOW_NOCASE
Definition: prex.c:83
#define CFWS
#define PREX_DOW
Definition: prex.c:82
#define PREX_TIME
Definition: prex.c:85
#define QUERY_PART
#define PREX_MONTH
Definition: prex.c:81
#define PATH
#define PREX_YEAR
Definition: prex.c:86
#define UNR_PCTENC_SUBDEL
@ PREX_MBOX_FROM_LAX_MATCH_MAX
Definition: prex.h:238
@ PREX_ACCOUNT_CMD_MATCH_MAX
Definition: prex.h:249
@ PREX_IMAP_DATE_MATCH_MAX
Definition: prex.h:192
@ PREX_RFC5322_DATE_MATCH_MAX
Definition: prex.h:136
@ PREX_MBOX_FROM_MATCH_MAX
Definition: prex.h:211
@ PREX_RFC2047_ENCODED_WORD_MATCH_MAX
Definition: prex.h:99
@ PREX_URL_QUERY_KEY_VAL_MATCH_MAX
Definition: prex.h:85
@ PREX_GNUTLS_CERT_HOST_HASH
[#H foo.com A76D 954B EB79 1F49 5B3A 0A0E 0681 65B1]
Definition: prex.h:36
@ PREX_MBOX_FROM_LAX
[From god@heaven.af.mil Sat Jan 3 01:05:34 1996]
Definition: prex.h:41
@ PREX_URL
[imaps://user:pass@example.com/INBOX?foo=bar]
Definition: prex.h:33
@ PREX_MBOX_FROM
[From god@heaven.af.mil Sat Jan 3 01:05:34 1996]
Definition: prex.h:40
@ PREX_ACCOUNT_CMD
key: value
Definition: prex.h:42
@ PREX_IMAP_DATE
[16-MAR-2020 15:09:35 -0700]
Definition: prex.h:39
@ PREX_RFC5322_DATE
[Mon, 16 Mar 2020 15:09:35 -0700]
Definition: prex.h:37
@ PREX_RFC5322_DATE_LAX
[Mon, (Comment) 16 Mar 2020 15:09:35 -0700]
Definition: prex.h:38
@ PREX_URL_QUERY_KEY_VAL
https://example.com/?[q=foo]
Definition: prex.h:34
@ PREX_MAX
Definition: prex.h:43
@ PREX_RFC2047_ENCODED_WORD
[=?utf-8?Q?=E8=81=AA=E6=98=8E=E7=9A=84?=]
Definition: prex.h:35
@ PREX_RFC5322_DATE_LAX_MATCH_MAX
Definition: prex.h:174
@ PREX_URL_MATCH_MAX
Definition: prex.h:72
@ PREX_GNUTLS_CERT_HOST_HASH_MATCH_MAX
Definition: prex.h:113
A predefined / precompiled regex.
Definition: prex.c:68
const char * str
Regex string.
Definition: prex.c:71
enum Prex which
Regex type, e.g. PREX_URL.
Definition: prex.c:69
size_t nmatches
Number of regex matches.
Definition: prex.c:70
regex_t * re
Compiled regex.
Definition: prex.c:76
regmatch_t * matches
Resulting matches.
Definition: prex.c:78
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_prex_capture()

regmatch_t * mutt_prex_capture ( enum Prex  which,
const char *  str 
)

Match a precompiled regex against a string.

Parameters
whichWhich regex to return
strString to apply regex on
Return values
ptrPointer to an array of matched captures
NULLRegex didn't match

Definition at line 308 of file prex.c.

309{
310 if (!str)
311 return NULL;
312
313 struct PrexStorage *h = prex(which);
314#ifdef HAVE_PCRE2
315 size_t len = strlen(str);
316 int rc = pcre2_match(h->re, (PCRE2_SPTR8) str, len, 0, 0, h->mdata, NULL);
317 if (rc < 0)
318 {
319 PCRE2_UCHAR errmsg[1024];
320 pcre2_get_error_message(rc, errmsg, sizeof(errmsg));
321 mutt_debug(LL_DEBUG2, "pcre2_match - <%s> -> <%s> = %s\n", h->str, str, errmsg);
322 return NULL;
323 }
324 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(h->mdata);
325 int i = 0;
326 for (; i < rc; i++)
327 {
328 h->matches[i].rm_so = ovector[i * 2];
329 h->matches[i].rm_eo = ovector[i * 2 + 1];
330 }
331 for (; i < h->nmatches; i++)
332 {
333 h->matches[i].rm_so = -1;
334 h->matches[i].rm_eo = -1;
335 }
336#else
337 if (regexec(h->re, str, h->nmatches, h->matches, 0))
338 return NULL;
339
340 assert((h->re->re_nsub == (h->nmatches - 1)) &&
341 "Regular expression and matches enum are out of sync");
342#endif
343 return h->matches;
344}
#define mutt_debug(LEVEL,...)
Definition: logging.h:84
@ LL_DEBUG2
Log at debug level 2.
Definition: logging.h:41
static struct PrexStorage * prex(enum Prex which)
Compile on demand and get data for a predefined regex.
Definition: prex.c:96
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_prex_free()

void mutt_prex_free ( void  )

Cleanup heap memory allocated by compiled regexes.

Definition at line 349 of file prex.c.

350{
351 for (enum Prex which = 0; which < PREX_MAX; which++)
352 {
353 struct PrexStorage *h = prex(which);
354#ifdef HAVE_PCRE2
355 pcre2_match_data_free(h->mdata);
356 pcre2_code_free(h->re);
357#else
358 regfree(h->re);
359 FREE(&h->re);
360#endif
361 FREE(&h->matches);
362 }
363}
#define FREE(x)
Definition: memory.h:43
Prex
Predefined list of regular expressions.
Definition: prex.h:32
+ Here is the call graph for this function:
+ Here is the caller graph for this function: