NeoMutt  2020-06-26-30-g76c339
Teaching an old dog new tricks
DOXYGEN
url.c File Reference

Parse and identify different URL schemes. More...

#include "config.h"
#include <ctype.h>
#include <stdbool.h>
#include <string.h>
#include "mutt/lib.h"
#include "url.h"
#include "mime.h"
+ Include dependency graph for url.c:

Go to the source code of this file.

Functions

static bool parse_query_string (struct UrlQueryList *list, char *src)
 Parse a URL query string. More...
 
static enum UrlScheme get_scheme (const char *src, const regmatch_t *match)
 Extract the scheme part from a matched URL. More...
 
static struct Urlurl_new (void)
 Create a Url. More...
 
void url_free (struct Url **ptr)
 Free the contents of a URL. More...
 
void url_pct_encode (char *buf, size_t buflen, const char *src)
 Percent-encode a string. More...
 
int url_pct_decode (char *s)
 Decode a percent-encoded string. More...
 
enum UrlScheme url_check_scheme (const char *str)
 Check the protocol of a URL. More...
 
struct Urlurl_parse (const char *src)
 Fill in Url. More...
 
int url_tobuffer (struct Url *url, struct Buffer *buf, int flags)
 Output the URL string for a given Url object. More...
 
int url_tostring (struct Url *url, char *dest, size_t len, int flags)
 Output the URL string for a given Url object. More...
 

Variables

static const struct Mapping UrlMap []
 Constants for URL protocols. More...
 

Detailed Description

Parse and identify different URL schemes.

Authors
  • Thomas Roessler

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file url.c.

Function Documentation

◆ parse_query_string()

static bool parse_query_string ( struct UrlQueryList *  list,
char *  src 
)
static

Parse a URL query string.

Parameters
listList to store the results
srcString to parse
Return values
trueSuccess
falseError

Definition at line 55 of file url.c.

56 {
57  if (!src || (*src == '\0'))
58  return false;
59 
60  bool again = true;
61  while (again)
62  {
63  regmatch_t *match = mutt_prex_capture(PREX_URL_QUERY_KEY_VAL, src);
64  if (!match)
65  return false;
66 
67  regmatch_t *mkey = &match[PREX_URL_QUERY_KEY_VAL_MATCH_KEY];
68  regmatch_t *mval = &match[PREX_URL_QUERY_KEY_VAL_MATCH_VAL];
69 
70  again = src[mutt_regmatch_end(mval)] != '\0';
71 
72  char *key = src + mutt_regmatch_start(mkey);
73  char *val = src + mutt_regmatch_start(mval);
74  src[mutt_regmatch_end(mkey)] = '\0';
75  src[mutt_regmatch_end(mval)] = '\0';
76  if ((url_pct_decode(key) < 0) || (url_pct_decode(val) < 0))
77  return false;
78 
79  struct UrlQuery *qs = mutt_mem_calloc(1, sizeof(struct UrlQuery));
80  qs->name = key;
81  qs->value = val;
82  STAILQ_INSERT_TAIL(list, qs, entries);
83 
84  src += mutt_regmatch_end(mval) + again;
85  }
86 
87  return true;
88 }
char * name
Query name.
Definition: url.h:57
static regoff_t mutt_regmatch_end(const regmatch_t *match)
Return the end of a match.
Definition: regex3.h:70
int url_pct_decode(char *s)
Decode a percent-encoded string.
Definition: url.c:189
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
Parsed Query String.
Definition: url.h:55
#define STAILQ_INSERT_TAIL(head, elm, field)
Definition: queue.h:386
char * value
Query value.
Definition: url.h:58
regmatch_t * mutt_prex_capture(enum Prex which, const char *str)
match a precompiled regex against a string
Definition: prex.c:306
https://example.com/?[q=foo]
Definition: prex.h:34
static regoff_t mutt_regmatch_start(const regmatch_t *match)
Return the start of a match.
Definition: regex3.h:60
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ get_scheme()

static enum UrlScheme get_scheme ( const char *  src,
const regmatch_t *  match 
)
static

Extract the scheme part from a matched URL.

Parameters
srcOriginal string that was matched
matchResult from a matched regex
Return values
enumScheme

Definition at line 96 of file url.c.

97 {
98  enum UrlScheme ret = U_UNKNOWN;
99  if (src && match)
100  {
102  if (ret == -1)
103  ret = U_UNKNOWN;
104  }
105  return ret;
106 }
int mutt_map_get_value_n(const char *name, size_t len, const struct Mapping *map)
Lookup the constant for a string.
Definition: mapping.c:62
static size_t mutt_regmatch_len(const regmatch_t *match)
Return the length of a match.
Definition: regex3.h:80
Url wasn&#39;t recognised.
Definition: url.h:34
UrlScheme
All recognised Url types.
Definition: url.h:32
[imaps]://...
Definition: prex.h:53
static const struct Mapping UrlMap[]
Constants for URL protocols.
Definition: url.c:40
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ url_new()

static struct Url* url_new ( void  )
static

Create a Url.

Return values
ptrNew Url

Definition at line 112 of file url.c.

113 {
114  struct Url *url = mutt_mem_calloc(1, sizeof(struct Url));
115  STAILQ_INIT(&url->query_strings);
116  return url;
117 }
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
A parsed URL proto://user:password@host:port/path?a=1&b=2
Definition: url.h:66
#define STAILQ_INIT(head)
Definition: queue.h:369
struct UrlQueryList query_strings
List of query strings.
Definition: url.h:74
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ url_free()

void url_free ( struct Url **  ptr)

Free the contents of a URL.

Parameters
ptrUrl to free

Definition at line 123 of file url.c.

124 {
125  if (!ptr || !*ptr)
126  return;
127 
128  struct Url *url = *ptr;
129 
130  struct UrlQueryList *l = &url->query_strings;
131  while (!STAILQ_EMPTY(l))
132  {
133  struct UrlQuery *np = STAILQ_FIRST(l);
134  STAILQ_REMOVE_HEAD(l, entries);
135  // Don't free 'name', 'value': they are pointers into the 'src' string
136  FREE(&np);
137  }
138 
139  FREE(&url->src);
140  FREE(ptr);
141 }
A parsed URL proto://user:password@host:port/path?a=1&b=2
Definition: url.h:66
Parsed Query String.
Definition: url.h:55
#define STAILQ_REMOVE_HEAD(head, field)
Definition: queue.h:419
struct UrlQueryList query_strings
List of query strings.
Definition: url.h:74
#define FREE(x)
Definition: memory.h:40
#define STAILQ_EMPTY(head)
Definition: queue.h:345
#define STAILQ_FIRST(head)
Definition: queue.h:347
char * src
Raw URL string.
Definition: url.h:75
+ Here is the caller graph for this function:

◆ url_pct_encode()

void url_pct_encode ( char *  buf,
size_t  buflen,
const char *  src 
)

Percent-encode a string.

Parameters
bufBuffer for the result
buflenLength of buffer
srcString to encode

e.g. turn "hello world" into "hello%20world"

Definition at line 151 of file url.c.

152 {
153  static const char *hex = "0123456789ABCDEF";
154 
155  if (!buf)
156  return;
157 
158  *buf = '\0';
159  buflen--;
160  while (src && *src && (buflen != 0))
161  {
162  if (strchr(" /:&%=", *src))
163  {
164  if (buflen < 3)
165  break;
166 
167  *buf++ = '%';
168  *buf++ = hex[(*src >> 4) & 0xf];
169  *buf++ = hex[*src & 0xf];
170  src++;
171  buflen -= 3;
172  continue;
173  }
174  *buf++ = *src++;
175  buflen--;
176  }
177  *buf = '\0';
178 }
+ Here is the caller graph for this function:

◆ url_pct_decode()

int url_pct_decode ( char *  s)

Decode a percent-encoded string.

Parameters
sString to decode
Return values
0Success
-1Error

e.g. turn "hello%20world" into "hello world" The string is decoded in-place.

Definition at line 189 of file url.c.

190 {
191  if (!s)
192  return -1;
193 
194  char *d = NULL;
195 
196  for (d = s; *s; s++)
197  {
198  if (*s == '%')
199  {
200  if ((s[1] != '\0') && (s[2] != '\0') && isxdigit((unsigned char) s[1]) &&
201  isxdigit((unsigned char) s[2]) && (hexval(s[1]) >= 0) && (hexval(s[2]) >= 0))
202  {
203  *d++ = (hexval(s[1]) << 4) | (hexval(s[2]));
204  s += 2;
205  }
206  else
207  return -1;
208  }
209  else
210  *d++ = *s;
211  }
212  *d = '\0';
213  return 0;
214 }
#define hexval(ch)
Definition: mime.h:75
+ Here is the caller graph for this function:

◆ url_check_scheme()

enum UrlScheme url_check_scheme ( const char *  str)

Check the protocol of a URL.

Parameters
strString to check
Return values
numUrl type, e.g. U_IMAPS

Definition at line 221 of file url.c.

222 {
223  return get_scheme(str, mutt_prex_capture(PREX_URL, str));
224 }
regmatch_t * mutt_prex_capture(enum Prex which, const char *str)
match a precompiled regex against a string
Definition: prex.c:306
[imaps://user:pass@example.com/INBOX?foo=bar]
Definition: prex.h:33
static enum UrlScheme get_scheme(const char *src, const regmatch_t *match)
Extract the scheme part from a matched URL.
Definition: url.c:96
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ url_parse()

struct Url* url_parse ( const char *  src)

Fill in Url.

Parameters
srcString to parse
Return values
ptrParsed URL
NULLError
Note
Caller must free returned Url with url_free()

Definition at line 234 of file url.c.

235 {
236  const regmatch_t *match = mutt_prex_capture(PREX_URL, src);
237  if (!match)
238  return NULL;
239 
240  enum UrlScheme scheme = get_scheme(src, match);
241  if (scheme == U_UNKNOWN)
242  return NULL;
243 
244  const regmatch_t *userinfo = &match[PREX_URL_MATCH_USERINFO];
245  const regmatch_t *user = &match[PREX_URL_MATCH_USER];
246  const regmatch_t *pass = &match[PREX_URL_MATCH_PASS];
247  const regmatch_t *host = &match[PREX_URL_MATCH_HOSTNAME];
248  const regmatch_t *ipvx = &match[PREX_URL_MATCH_HOSTIPVX];
249  const regmatch_t *port = &match[PREX_URL_MATCH_PORT];
250  const regmatch_t *path = &match[PREX_URL_MATCH_PATH];
251  const regmatch_t *query = &match[PREX_URL_MATCH_QUERY];
252  const regmatch_t *pathonly = &match[PREX_URL_MATCH_PATH_ONLY];
253 
254  struct Url *url = url_new();
255  url->scheme = scheme;
256  url->src = mutt_str_dup(src);
257 
258  /* If the scheme is not followed by two forward slashes, then it's a simple
259  * path (see https://tools.ietf.org/html/rfc3986#section-3). */
260  if (mutt_regmatch_start(pathonly) != -1)
261  {
262  url->src[mutt_regmatch_end(pathonly)] = '\0';
263  url->path = url->src + mutt_regmatch_start(pathonly);
264  if (url_pct_decode(url->path) < 0)
265  goto err;
266  }
267 
268  /* separate userinfo part */
269  if (mutt_regmatch_end(userinfo) != -1)
270  {
271  url->src[mutt_regmatch_end(userinfo) - 1] = '\0';
272  }
273 
274  /* user */
275  if (mutt_regmatch_end(user) != -1)
276  {
277  url->src[mutt_regmatch_end(user)] = '\0';
278  url->user = url->src + mutt_regmatch_start(user);
279  if (url_pct_decode(url->user) < 0)
280  goto err;
281  }
282 
283  /* pass */
284  if (mutt_regmatch_end(pass) != -1)
285  {
286  url->pass = url->src + mutt_regmatch_start(pass);
287  if (url_pct_decode(url->pass) < 0)
288  goto err;
289  }
290 
291  /* host */
292  if (mutt_regmatch_len(host) != 0)
293  {
294  url->host = url->src + mutt_regmatch_start(host);
295  url->src[mutt_regmatch_end(host)] = '\0';
296  }
297  else if (mutt_regmatch_end(ipvx) != -1)
298  {
299  url->host = url->src + mutt_regmatch_start(ipvx) + 1; /* skip opening '[' */
300  url->src[mutt_regmatch_end(ipvx) - 1] = '\0'; /* skip closing ']' */
301  }
302 
303  /* port */
304  if (mutt_regmatch_end(port) != -1)
305  {
306  url->src[mutt_regmatch_end(port)] = '\0';
307  const char *ports = url->src + mutt_regmatch_start(port);
308  int num;
309  if ((mutt_str_atoi(ports, &num) < 0) || (num < 0) || (num > 0xffff))
310  {
311  goto err;
312  }
313  url->port = (unsigned short) num;
314  }
315 
316  /* path */
317  if (mutt_regmatch_end(path) != -1)
318  {
319  url->src[mutt_regmatch_end(path)] = '\0';
320  url->path = url->src + mutt_regmatch_start(path);
321  if (!url->host)
322  {
323  /* If host is not provided, restore the '/': this is an absolute path */
324  *(--url->path) = '/';
325  }
326  if (url_pct_decode(url->path) < 0)
327  goto err;
328  }
329 
330  /* query */
331  if (mutt_regmatch_end(query) != -1)
332  {
333  char *squery = url->src + mutt_regmatch_start(query);
334  if (!parse_query_string(&url->query_strings, squery))
335  goto err;
336  }
337 
338  return url;
339 
340 err:
341  url_free(&url);
342  return NULL;
343 }
static regoff_t mutt_regmatch_end(const regmatch_t *match)
Return the end of a match.
Definition: regex3.h:70
int url_pct_decode(char *s)
Decode a percent-encoded string.
Definition: url.c:189
int mutt_str_atoi(const char *str, int *dst)
Convert ASCII string to an integer.
Definition: string.c:257
char * pass
Password.
Definition: url.h:70
A parsed URL proto://user:password@host:port/path?a=1&b=2
Definition: url.h:66
enum UrlScheme scheme
Scheme, e.g. U_SMTPS.
Definition: url.h:68
static size_t mutt_regmatch_len(const regmatch_t *match)
Return the length of a match.
Definition: regex3.h:80
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition: string.c:375
Url wasn&#39;t recognised.
Definition: url.h:34
static struct Url * url_new(void)
Create a Url.
Definition: url.c:112
void url_free(struct Url **ptr)
Free the contents of a URL.
Definition: url.c:123
...//user:[pass]@...
Definition: prex.h:60
...:993/[Inbox]
Definition: prex.h:67
imaps://host.com:[993]/...
Definition: prex.h:65
UrlScheme
All recognised Url types.
Definition: url.h:32
static bool parse_query_string(struct UrlQueryList *list, char *src)
Parse a URL query string.
Definition: url.c:55
struct UrlQueryList query_strings
List of query strings.
Definition: url.h:74
regmatch_t * mutt_prex_capture(enum Prex which, const char *str)
match a precompiled regex against a string
Definition: prex.c:306
char * user
Username.
Definition: url.h:69
static regoff_t mutt_regmatch_start(const regmatch_t *match)
Return the start of a match.
Definition: regex3.h:60
...Inbox?[foo=bar&baz=value]
Definition: prex.h:70
[imaps://user:pass@example.com/INBOX?foo=bar]
Definition: prex.h:33
imaps://...[127.0.0.1]...
Definition: prex.h:63
...//[user]:pass@...
Definition: prex.h:58
char * host
Host.
Definition: url.h:71
static enum UrlScheme get_scheme(const char *src, const regmatch_t *match)
Extract the scheme part from a matched URL.
Definition: url.c:96
...//[user:pass@]...
Definition: prex.h:57
char * path
Path.
Definition: url.h:73
unsigned short port
Port.
Definition: url.h:72
imaps://...[host.com]...
Definition: prex.h:62
mailto:[me@example.com]?foo=bar
Definition: prex.h:68
char * src
Raw URL string.
Definition: url.h:75
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ url_tobuffer()

int url_tobuffer ( struct Url url,
struct Buffer buf,
int  flags 
)

Output the URL string for a given Url object.

Parameters
urlUrl to turn into a string
bufBuffer for the result
flagsFlags, e.g. U_PATH
Return values
0Success
-1Error

Definition at line 353 of file url.c.

354 {
355  if (!url || !buf)
356  return -1;
357  if (url->scheme == U_UNKNOWN)
358  return -1;
359 
361 
362  if (url->host)
363  {
364  if (!(flags & U_PATH))
365  mutt_buffer_addstr(buf, "//");
366 
367  if (url->user && (url->user[0] || !(flags & U_PATH)))
368  {
369  char str[256];
370  url_pct_encode(str, sizeof(str), url->user);
371  mutt_buffer_add_printf(buf, "%s@", str);
372  }
373 
374  if (strchr(url->host, ':'))
375  mutt_buffer_add_printf(buf, "[%s]", url->host);
376  else
377  mutt_buffer_add_printf(buf, "%s", url->host);
378 
379  if (url->port)
380  mutt_buffer_add_printf(buf, ":%hu/", url->port);
381  else
382  mutt_buffer_addstr(buf, "/");
383  }
384 
385  if (url->path)
386  mutt_buffer_addstr(buf, url->path);
387 
388  if (STAILQ_FIRST(&url->query_strings))
389  {
390  mutt_buffer_addstr(buf, "?");
391 
392  char str[256];
393  struct UrlQuery *np = NULL;
394  STAILQ_FOREACH(np, &url->query_strings, entries)
395  {
396  url_pct_encode(str, sizeof(str), np->name);
397  mutt_buffer_addstr(buf, str);
398  mutt_buffer_addstr(buf, "=");
399  url_pct_encode(str, sizeof(str), np->value);
400  mutt_buffer_addstr(buf, str);
401  if (STAILQ_NEXT(np, entries))
402  mutt_buffer_addstr(buf, "&");
403  }
404  }
405 
406  return 0;
407 }
char * name
Query name.
Definition: url.h:57
enum UrlScheme scheme
Scheme, e.g. U_SMTPS.
Definition: url.h:68
Parsed Query String.
Definition: url.h:55
Url wasn&#39;t recognised.
Definition: url.h:34
char * value
Query value.
Definition: url.h:58
int mutt_buffer_printf(struct Buffer *buf, const char *fmt,...)
Format a string overwriting a Buffer.
Definition: buffer.c:160
const char * mutt_map_get_name(int val, const struct Mapping *map)
Lookup a string for a constant.
Definition: mapping.c:42
int mutt_buffer_add_printf(struct Buffer *buf, const char *fmt,...)
Format a string appending a Buffer.
Definition: buffer.c:203
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:225
void url_pct_encode(char *buf, size_t buflen, const char *src)
Percent-encode a string.
Definition: url.c:151
struct UrlQueryList query_strings
List of query strings.
Definition: url.h:74
char * user
Username.
Definition: url.h:69
char * host
Host.
Definition: url.h:71
#define STAILQ_FOREACH(var, head, field)
Definition: queue.h:349
#define STAILQ_NEXT(elm, field)
Definition: queue.h:397
char * path
Path.
Definition: url.h:73
unsigned short port
Port.
Definition: url.h:72
static const struct Mapping UrlMap[]
Constants for URL protocols.
Definition: url.c:40
#define STAILQ_FIRST(head)
Definition: queue.h:347
#define U_PATH
Definition: url.h:48
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ url_tostring()

int url_tostring ( struct Url url,
char *  dest,
size_t  len,
int  flags 
)

Output the URL string for a given Url object.

Parameters
urlUrl to turn into a string
destBuffer for the result
lenLength of buffer
flagsFlags, e.g. U_PATH
Return values
0Success
-1Error

Definition at line 418 of file url.c.

419 {
420  if (!url || !dest)
421  return -1;
422 
423  struct Buffer *dest_buf = mutt_buffer_pool_get();
424 
425  int retval = url_tobuffer(url, dest_buf, flags);
426  if (retval == 0)
427  mutt_str_copy(dest, mutt_b2s(dest_buf), len);
428 
429  mutt_buffer_pool_release(&dest_buf);
430 
431  return retval;
432 }
struct Buffer * mutt_buffer_pool_get(void)
Get a Buffer from the pool.
Definition: pool.c:101
void mutt_buffer_pool_release(struct Buffer **pbuf)
Free a Buffer from the pool.
Definition: pool.c:112
String manipulation buffer.
Definition: buffer.h:33
#define mutt_b2s(buf)
Definition: buffer.h:41
size_t mutt_str_copy(char *dest, const char *src, size_t dsize)
Copy a string into a buffer (guaranteeing NUL-termination)
Definition: string.c:724
int url_tobuffer(struct Url *url, struct Buffer *buf, int flags)
Output the URL string for a given Url object.
Definition: url.c:353
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Variable Documentation

◆ UrlMap

const struct Mapping UrlMap[]
static
Initial value:
= {
{ "file", U_FILE }, { "imap", U_IMAP }, { "imaps", U_IMAPS },
{ "pop", U_POP }, { "pops", U_POPS }, { "news", U_NNTP },
{ "nntp", U_NNTP }, { "snews", U_NNTPS }, { "nntps", U_NNTPS },
{ "mailto", U_MAILTO }, { "notmuch", U_NOTMUCH }, { "smtp", U_SMTP },
{ "smtps", U_SMTPS }, { NULL, U_UNKNOWN },
}
Url is notmuch://.
Definition: url.h:45
Url is imaps://.
Definition: url.h:39
Url wasn&#39;t recognised.
Definition: url.h:34
Url is imap://.
Definition: url.h:38
Url is nntps://.
Definition: url.h:41
Url is smtps://.
Definition: url.h:43
Url is pop://.
Definition: url.h:36
Url is nntp://.
Definition: url.h:40
Url is smtp://.
Definition: url.h:42
Url is mailto://.
Definition: url.h:44
Url is file://.
Definition: url.h:35
Url is pops://.
Definition: url.h:37

Constants for URL protocols.

Definition at line 40 of file url.c.