NeoMutt  2020-08-07-1-gab41a1
Teaching an old dog new tricks
DOXYGEN
prex.c File Reference

Manage precompiled / predefined regular expressions. More...

#include "config.h"
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include "prex.h"
#include "logging.h"
#include "memory.h"
+ Include dependency graph for prex.c:

Go to the source code of this file.

Data Structures

struct  PrexStorage
 A predefined / precompiled regex. More...
 

Macros

#define PREX_MONTH   "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)"
 
#define PREX_DOW   "(Mon|Tue|Wed|Thu|Fri|Sat|Sun)"
 
#define PREX_DOW_NOCASE
 
#define PREX_TIME   "([[:digit:]]{2}:[[:digit:]]{2}:[[:digit:]]{2})"
 
#define PREX_YEAR   "([[:digit:]]{4})"
 
#define UNR_PCTENC_SUBDEL   "][[:alnum:]._~%!$&'()*+,;="
 
#define PATH   ":@/ "
 
#define QUERY_PART   "^&="
 
#define FWS   " *"
 
#define C   "(\\(.*\\))?"
 
#define CFWS   FWS C FWS
 

Functions

static struct PrexStorageprex (enum Prex which)
 Compile on demand and get data for a predefined regex. More...
 
regmatch_t * mutt_prex_capture (enum Prex which, const char *str)
 match a precompiled regex against a string More...
 
void mutt_prex_free (void)
 Cleanup heap memory allocated by compiled regexes. More...
 

Detailed Description

Manage precompiled / predefined regular expressions.

Authors
  • Pietro Cerutti

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file prex.c.

Macro Definition Documentation

◆ PREX_MONTH

#define PREX_MONTH   "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)"

Definition at line 81 of file prex.c.

◆ PREX_DOW

#define PREX_DOW   "(Mon|Tue|Wed|Thu|Fri|Sat|Sun)"

Definition at line 82 of file prex.c.

◆ PREX_DOW_NOCASE

#define PREX_DOW_NOCASE
Value:
"([Mm][Oo][Nn]|[Tt][Uu][Ee]|[Ww][Ee][Dd]|[Tt][Hh][Uu]|[Ff][Rr][Ii]|" \
"[Ss][Aa][Tt]|[Ss][Uu][Nn])"

Definition at line 83 of file prex.c.

◆ PREX_TIME

#define PREX_TIME   "([[:digit:]]{2}:[[:digit:]]{2}:[[:digit:]]{2})"

Definition at line 86 of file prex.c.

◆ PREX_YEAR

#define PREX_YEAR   "([[:digit:]]{4})"

Definition at line 87 of file prex.c.

◆ UNR_PCTENC_SUBDEL

#define UNR_PCTENC_SUBDEL   "][[:alnum:]._~%!$&'()*+,;="

◆ PATH

#define PATH   ":@/ "

◆ QUERY_PART

#define QUERY_PART   "^&="

◆ FWS

#define FWS   " *"

◆ C

#define C   "(\\(.*\\))?"

◆ CFWS

#define CFWS   FWS C FWS

Function Documentation

◆ prex()

static struct PrexStorage* prex ( enum Prex  which)
static

Compile on demand and get data for a predefined regex.

Parameters
whichWhich regex to get
Return values
ptrPointer to a PrexStorage struct
Note
Returned pointer is guaranteed not to be NULL. The function asserts on error.

Definition at line 97 of file prex.c.

98 {
99  static struct PrexStorage storage[] = {
100  /* clang-format off */
101  {
102  PREX_URL,
104  /* Spec: https://tools.ietf.org/html/rfc3986#section-3 */
105 #ifdef HAVE_PCRE2
106 #define UNR_PCTENC_SUBDEL "][\\p{L}\\p{N}._~%!$&'()*+,;="
107 #else
108 #define UNR_PCTENC_SUBDEL "][[:alnum:]._~%!$&'()*+,;="
109 #endif
110 #define PATH ":@/ "
111  "^([[:alpha:]][-+.[:alnum:]]+):" // . scheme
112  "(" // . rest
113  "(" // . . authority + path
114  // . . or path only
115  "(//" // . . . authority + path
116  "(" // . . . . user info
117  "([" UNR_PCTENC_SUBDEL "@-]*)" // . . . . . user name + '@'
118  "(:([" UNR_PCTENC_SUBDEL "-]*))?" // . . . . . password
119  "@)?"
120  "(" // . . . . host
121  "([" UNR_PCTENC_SUBDEL "-]*)" // . . . . . host name
122  "|"
123  "(\\[[[:xdigit:]:.]+\\])" // . . . . . IPv4 or IPv6
124  ")"
125  "(:([[:digit:]]+))?" // . . . . port
126  "(/([" UNR_PCTENC_SUBDEL PATH "-]*))?" // . . . . path
127  ")"
128  "|"
129  "(" // . . . path only
130  "[" UNR_PCTENC_SUBDEL PATH "-]*" // . . . . path
131  ")"
132  ")"
133  // Should be: "(\\?([" UNR_PCTENC_SUBDEL PATH "?-]*))?"
134  "(\\?([^#]*))?" // . . query
135  ")$"
136 #undef PATH
137 #undef UNR_PCTENC_SUBDEL
138  },
139  {
142 #define QUERY_PART "^&=" // Should be: "-[:alnum:]._~%!$'()*+,;:@/"
143  "([" QUERY_PART "]+)=([" QUERY_PART "]+)" // query + ' '
144 #undef QUERY_PART
145  },
146  {
149  "=\\?"
150  "([^][()<>@,;:\\\"/?. =]+)" // charset
151  "\\?"
152  "([qQbB])" // encoding
153  "\\?"
154  "([^?]+)" // encoded text - we accept whitespace, see #1189
155  "\\?="
156  },
157  {
160  "^\\#H ([[:alnum:]_\\.-]+) ([[:alnum:]]{4}( [[:alnum:]]{4}){7})[ \t]*$"
161  },
162  {
165  /* Spec: https://tools.ietf.org/html/rfc5322#section-3.3 */
166  "^"
167  "(" PREX_DOW ", )?" // Day of week
168  " *"
169  "([[:digit:]]{1,2}) " // Day
170  PREX_MONTH // Month
171  " ([[:digit:]]{2,4}) " // Year
172  "([[:digit:]]{2})" // Hour
173  ":([[:digit:]]{2})" // Minute
174  "(:([[:digit:]]{2}))?" // Second
175  " *"
176  "("
177  "([+-][[:digit:]]{4})|" // TZ
178  "([[:alpha:]]+)" // Obsolete TZ
179  ")"
180  },
181  {
184  /* Spec: https://tools.ietf.org/html/rfc5322#section-3.3 */
185 #define FWS " *"
186 #define C "(\\(.*\\))?"
187 #define CFWS FWS C FWS
188  "^"
189  CFWS
190  "(([[:alpha:]]+)" CFWS ", *)?" // Day of week (or whatever)
191  CFWS "([[:digit:]]{1,2}) " // Day
192  CFWS PREX_MONTH // Month
193  CFWS "([[:digit:]]{2,4}) " // Year
194  CFWS "([[:digit:]]{1,2})" // Hour
195  ":" CFWS "([[:digit:]]{1,2})" // Minute
196  CFWS
197  "(:" CFWS "([[:digit:]]{1,2}))?" // Second
198  CFWS
199  "("
200  "([+-][[:digit:]]{4})|" // TZ
201  "([[:alpha:]]+)" // Obsolete TZ
202  ")?"
203 #undef CFWS
204 #undef C
205 #undef FWS
206  },
207  {
210  "( ([[:digit:]])|([[:digit:]]{2}))" // Day
211  "-" PREX_MONTH // Month
212  "-" PREX_YEAR // Year
213  " " PREX_TIME // Time
214  " ([+-][[:digit:]]{4})" // TZ
215  },
216  {
219  /* Spec: http://qmail.omnis.ch/man/man5/mbox.html */
220  "^From " // From
221  "([^[:space:]]+) +" // Sender
222  PREX_DOW // Day of week
223  " +"
224  PREX_MONTH // Month
225  " ( ([[:digit:]])|([[:digit:]]{2}))" // Day
226  " +"
227  PREX_TIME // Time
228  " +"
229  PREX_YEAR // Year
230  },
231  {
234  /* Spec: http://qmail.omnis.ch/man/man5/mbox.html */
235  "^From " // From
236  "("
237  "[^[:space:]]+" // Sender
238  "( at [^[:space:]]+)?" // Possibly obfuscated, pipermail-style
239  ")?"
240  " *"
241  PREX_DOW_NOCASE // Day of week
242  " +"
243  PREX_MONTH // Month
244  " +"
245  "( " // Day
246  "([[:digit:]])|"
247  "([[:digit:]]{2})"
248  ")"
249  " +"
250  "("
251  PREX_TIME // Time (HH:MM:SS)
252  "|"
253  "([[:digit:]]{2}" // Time (HH:MM)
254  ":[[:digit:]]{2})"
255  ")"
256  " +"
257  "([[:alpha:] ]*)" // Timezone (which we skip)
258  "("
259  PREX_YEAR // Year (YYYY)
260  "|"
261  "([[:digit:]]{2})" // Year (YY)
262  ")"
263  }
264  /* clang-format on */
265  };
266 
267  assert((which >= 0) && (which < PREX_MAX) && "Invalid 'which' argument");
268  struct PrexStorage *h = &storage[which];
269  assert((which == h->which) && "Fix 'storage' array");
270  if (!h->re)
271  {
272 #ifdef HAVE_PCRE2
273  uint32_t opt = pcre2_has_unicode() ? PCRE2_UTF : 0;
274  int eno;
275  PCRE2_SIZE eoff;
276  h->re = pcre2_compile((PCRE2_SPTR8) h->str, PCRE2_ZERO_TERMINATED, opt,
277  &eno, &eoff, NULL);
278  if (!h->re)
279  {
280  assert("Fix your RE");
281  }
282  h->mdata = pcre2_match_data_create_from_pattern(h->re, NULL);
283  uint32_t ccount;
284  pcre2_pattern_info(h->re, PCRE2_INFO_CAPTURECOUNT, &ccount);
285  assert(ccount + 1 == h->nmatches && "Number of matches do not match (...)");
286  h->matches = mutt_mem_calloc(h->nmatches, sizeof(*h->matches));
287 #else
288  h->re = mutt_mem_calloc(1, sizeof(*h->re));
289  if (regcomp(h->re, h->str, REG_EXTENDED) != 0)
290  {
291  assert("Fix your RE");
292  }
293  h->matches = mutt_mem_calloc(h->nmatches, sizeof(*h->matches));
294 #endif
295  }
296  return h;
297 }
regex_t * re
Compiled regex.
Definition: prex.c:76
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
#define QUERY_PART
Definition: prex.h:42
[16-MAR-2020 15:09:35 -0700]
Definition: prex.h:39
size_t nmatches
Number of regex matches.
Definition: prex.c:70
regmatch_t * matches
Resulting matches.
Definition: prex.c:78
A predefined / precompiled regex.
Definition: prex.c:67
#define UNR_PCTENC_SUBDEL
enum Prex which
Regex type, e.g. PREX_URL.
Definition: prex.c:69
#define PREX_DOW
Definition: prex.c:82
[Mon, (Comment) 16 Mar 2020 15:09:35 -0700]
Definition: prex.h:38
https://example.com/?[q=foo]
Definition: prex.h:34
#define PREX_TIME
Definition: prex.c:86
[imaps://user:pass@example.com/INBOX?foo=bar]
Definition: prex.h:33
[=?utf-8?Q?=E8=81=AA=E6=98=8E=E7=9A=84?=]
Definition: prex.h:35
[From god@heaven.af.mil Sat Jan 3 01:05:34 1996]
Definition: prex.h:40
#define CFWS
[Mon, 16 Mar 2020 15:09:35 -0700]
Definition: prex.h:37
[#H foo.com A76D 954B EB79 1F49 5B3A 0A0E 0681 65B1]
Definition: prex.h:36
#define PATH
#define PREX_DOW_NOCASE
Definition: prex.c:83
#define PREX_YEAR
Definition: prex.c:87
#define PREX_MONTH
Definition: prex.c:81
[From god@heaven.af.mil Sat Jan 3 01:05:34 1996]
Definition: prex.h:41
const char * str
Regex string.
Definition: prex.c:71
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_prex_capture()

regmatch_t* mutt_prex_capture ( enum Prex  which,
const char *  str 
)

match a precompiled regex against a string

Parameters
whichWhich regex to return
strString to apply regex on
Return values
ptrPointer to an array of matched captures
NULLRegex didn't match

Definition at line 306 of file prex.c.

307 {
308  if (!str)
309  return NULL;
310 
311  struct PrexStorage *h = prex(which);
312 #ifdef HAVE_PCRE2
313  size_t len = strlen(str);
314  int rc = pcre2_match(h->re, (PCRE2_SPTR8) str, len, 0, 0, h->mdata, NULL);
315  if (rc < 0)
316  {
317  PCRE2_UCHAR errmsg[1024];
318  pcre2_get_error_message(rc, errmsg, sizeof(errmsg));
319  mutt_debug(LL_DEBUG2, "pcre2_match - <%s> -> <%s> = %s\n", h->str, str, errmsg);
320  return NULL;
321  }
322  PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(h->mdata);
323  int i = 0;
324  for (; i < rc; i++)
325  {
326  h->matches[i].rm_so = ovector[i * 2];
327  h->matches[i].rm_eo = ovector[i * 2 + 1];
328  }
329  for (; i < h->nmatches; i++)
330  {
331  h->matches[i].rm_so = -1;
332  h->matches[i].rm_eo = -1;
333  }
334 #else
335  if (regexec(h->re, str, h->nmatches, h->matches, 0))
336  return NULL;
337 
338  assert((h->re->re_nsub == (h->nmatches - 1)) &&
339  "Regular expression and matches enum are out of sync");
340 #endif
341  return h->matches;
342 }
regex_t * re
Compiled regex.
Definition: prex.c:76
size_t nmatches
Number of regex matches.
Definition: prex.c:70
regmatch_t * matches
Resulting matches.
Definition: prex.c:78
A predefined / precompiled regex.
Definition: prex.c:67
Log at debug level 2.
Definition: logging.h:41
enum Prex which
Regex type, e.g. PREX_URL.
Definition: prex.c:69
static struct PrexStorage * prex(enum Prex which)
Compile on demand and get data for a predefined regex.
Definition: prex.c:97
#define mutt_debug(LEVEL,...)
Definition: logging.h:81
const char * str
Regex string.
Definition: prex.c:71
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_prex_free()

void mutt_prex_free ( void  )

Cleanup heap memory allocated by compiled regexes.

Definition at line 347 of file prex.c.

348 {
349  for (enum Prex which = 0; which < PREX_MAX; which++)
350  {
351  struct PrexStorage *h = prex(which);
352 #ifdef HAVE_PCRE2
353  pcre2_match_data_free(h->mdata);
354  pcre2_code_free(h->re);
355 #else
356  regfree(h->re);
357  FREE(&h->re);
358 #endif
359  FREE(&h->matches);
360  }
361 }
regex_t * re
Compiled regex.
Definition: prex.c:76
Definition: prex.h:42
Prex
Predefined list of regular expressions.
Definition: prex.h:31
regmatch_t * matches
Resulting matches.
Definition: prex.c:78
A predefined / precompiled regex.
Definition: prex.c:67
enum Prex which
Regex type, e.g. PREX_URL.
Definition: prex.c:69
static struct PrexStorage * prex(enum Prex which)
Compile on demand and get data for a predefined regex.
Definition: prex.c:97
#define FREE(x)
Definition: memory.h:40
+ Here is the call graph for this function:
+ Here is the caller graph for this function: