NeoMutt  2020-03-20-65-g141838
Teaching an old dog new tricks
DOXYGEN
url.c
Go to the documentation of this file.
1 
29 #include "config.h"
30 #include <ctype.h>
31 #include <string.h>
32 #include "mutt/lib.h"
33 #include "url.h"
34 #include "mime.h"
35 
39 static const struct Mapping UrlMap[] = {
40  { "file", U_FILE }, { "imap", U_IMAP }, { "imaps", U_IMAPS },
41  { "pop", U_POP }, { "pops", U_POPS }, { "news", U_NNTP },
42  { "snews", U_NNTPS }, { "mailto", U_MAILTO }, { "notmuch", U_NOTMUCH },
43  { "smtp", U_SMTP }, { "smtps", U_SMTPS }, { NULL, U_UNKNOWN },
44 };
45 
53 static bool parse_query_string(struct UrlQueryList *list, char *src)
54 {
55  if (!src || !*src)
56  return false;
57 
58  bool again = true;
59  while (again)
60  {
61  regmatch_t *match = mutt_prex_capture(PREX_URL_QUERY_KEY_VAL, src);
62  if (!match)
63  return false;
64 
65  regmatch_t *mkey = &match[PREX_URL_QUERY_KEY_VAL_MATCH_KEY];
66  regmatch_t *mval = &match[PREX_URL_QUERY_KEY_VAL_MATCH_VAL];
67 
68  again = src[mutt_regmatch_end(mval)] != '\0';
69 
70  char *key = src + mutt_regmatch_start(mkey);
71  char *val = src + mutt_regmatch_start(mval);
72  src[mutt_regmatch_end(mkey)] = '\0';
73  src[mutt_regmatch_end(mval)] = '\0';
74  if ((url_pct_decode(key) < 0) || (url_pct_decode(val) < 0))
75  return false;
76 
77  struct UrlQuery *qs = mutt_mem_calloc(1, sizeof(struct UrlQuery));
78  qs->name = key;
79  qs->value = val;
80  STAILQ_INSERT_TAIL(list, qs, entries);
81 
82  src += mutt_regmatch_end(mval) + again;
83  }
84 
85  return true;
86 }
87 
94 static enum UrlScheme get_scheme(const char *src, const regmatch_t *match)
95 {
96  enum UrlScheme ret = U_UNKNOWN;
97  if (src && match)
98  {
100  if (ret == -1)
101  ret = U_UNKNOWN;
102  }
103  return ret;
104 }
105 
110 static struct Url *url_new(void)
111 {
112  struct Url *url = mutt_mem_calloc(1, sizeof(struct Url));
113  STAILQ_INIT(&url->query_strings);
114  return url;
115 }
116 
121 void url_free(struct Url **ptr)
122 {
123  if (!ptr || !*ptr)
124  return;
125 
126  struct Url *url = *ptr;
127 
128  struct UrlQueryList *l = &url->query_strings;
129  while (!STAILQ_EMPTY(l))
130  {
131  struct UrlQuery *np = STAILQ_FIRST(l);
132  STAILQ_REMOVE_HEAD(l, entries);
133  // Don't free 'name', 'value': they are pointers into the 'src' string
134  FREE(&np);
135  }
136 
137  FREE(&url->src);
138  FREE(ptr);
139 }
140 
149 void url_pct_encode(char *buf, size_t buflen, const char *src)
150 {
151  static const char *hex = "0123456789ABCDEF";
152 
153  if (!buf)
154  return;
155 
156  *buf = '\0';
157  buflen--;
158  while (src && *src && (buflen != 0))
159  {
160  if (strchr(" /:&%=", *src))
161  {
162  if (buflen < 3)
163  break;
164 
165  *buf++ = '%';
166  *buf++ = hex[(*src >> 4) & 0xf];
167  *buf++ = hex[*src & 0xf];
168  src++;
169  buflen -= 3;
170  continue;
171  }
172  *buf++ = *src++;
173  buflen--;
174  }
175  *buf = '\0';
176 }
177 
187 int url_pct_decode(char *s)
188 {
189  if (!s)
190  return -1;
191 
192  char *d = NULL;
193 
194  for (d = s; *s; s++)
195  {
196  if (*s == '%')
197  {
198  if ((s[1] != '\0') && (s[2] != '\0') && isxdigit((unsigned char) s[1]) &&
199  isxdigit((unsigned char) s[2]) && (hexval(s[1]) >= 0) && (hexval(s[2]) >= 0))
200  {
201  *d++ = (hexval(s[1]) << 4) | (hexval(s[2]));
202  s += 2;
203  }
204  else
205  return -1;
206  }
207  else
208  *d++ = *s;
209  }
210  *d = '\0';
211  return 0;
212 }
213 
219 enum UrlScheme url_check_scheme(const char *str)
220 {
221  return get_scheme(str, mutt_prex_capture(PREX_URL, str));
222 }
223 
232 struct Url *url_parse(const char *src)
233 {
234  const regmatch_t *match = mutt_prex_capture(PREX_URL, src);
235  if (!match)
236  return NULL;
237 
238  enum UrlScheme scheme = get_scheme(src, match);
239  if (scheme == U_UNKNOWN)
240  return NULL;
241 
242  const regmatch_t *userinfo = &match[PREX_URL_MATCH_USERINFO];
243  const regmatch_t *user = &match[PREX_URL_MATCH_USER];
244  const regmatch_t *pass = &match[PREX_URL_MATCH_PASS];
245  const regmatch_t *host = &match[PREX_URL_MATCH_HOSTNAME];
246  const regmatch_t *ipvx = &match[PREX_URL_MATCH_HOSTIPVX];
247  const regmatch_t *port = &match[PREX_URL_MATCH_PORT];
248  const regmatch_t *path = &match[PREX_URL_MATCH_PATH];
249  const regmatch_t *query = &match[PREX_URL_MATCH_QUERY];
250  const regmatch_t *pathonly = &match[PREX_URL_MATCH_PATH_ONLY];
251 
252  struct Url *url = url_new();
253  url->scheme = scheme;
254  url->src = mutt_str_strdup(src);
255 
256  /* If the scheme is not followed by two forward slashes, then it's a simple
257  * path (see https://tools.ietf.org/html/rfc3986#section-3). */
258  if (mutt_regmatch_start(pathonly) != -1)
259  {
260  url->src[mutt_regmatch_end(pathonly)] = '\0';
261  url->path = url->src + mutt_regmatch_start(pathonly);
262  if (url_pct_decode(url->path) < 0)
263  goto err;
264  }
265 
266  /* separate userinfo part */
267  if (mutt_regmatch_end(userinfo) != -1)
268  {
269  url->src[mutt_regmatch_end(userinfo) - 1] = '\0';
270  }
271 
272  /* user */
273  if (mutt_regmatch_end(user) != -1)
274  {
275  url->src[mutt_regmatch_end(user)] = '\0';
276  url->user = url->src + mutt_regmatch_start(user);
277  if (url_pct_decode(url->user) < 0)
278  goto err;
279  }
280 
281  /* pass */
282  if (mutt_regmatch_end(pass) != -1)
283  {
284  url->pass = url->src + mutt_regmatch_start(pass);
285  if (url_pct_decode(url->pass) < 0)
286  goto err;
287  }
288 
289  /* host */
290  if (mutt_regmatch_end(host) != -1)
291  {
292  url->host = url->src + mutt_regmatch_start(host);
293  url->src[mutt_regmatch_end(host)] = '\0';
294  }
295  else if (mutt_regmatch_end(ipvx) != -1)
296  {
297  url->host = url->src + mutt_regmatch_start(ipvx) + 1; /* skip opening '[' */
298  url->src[mutt_regmatch_end(ipvx) - 1] = '\0'; /* skip closing ']' */
299  }
300 
301  /* port */
302  if (mutt_regmatch_end(port) != -1)
303  {
304  url->src[mutt_regmatch_end(port)] = '\0';
305  const char *ports = url->src + mutt_regmatch_start(port);
306  int num;
307  if ((mutt_str_atoi(ports, &num) < 0) || (num < 0) || (num > 0xffff))
308  {
309  goto err;
310  }
311  url->port = (unsigned short) num;
312  }
313 
314  /* path */
315  if (mutt_regmatch_end(path) != -1)
316  {
317  url->src[mutt_regmatch_end(path)] = '\0';
318  url->path = url->src + mutt_regmatch_start(path);
319  if (!url->host)
320  {
321  /* If host is not provided, restore the '/': this is an absolute path */
322  *(--url->path) = '/';
323  }
324  if (url_pct_decode(url->path) < 0)
325  goto err;
326  }
327 
328  /* query */
329  if (mutt_regmatch_end(query) != -1)
330  {
331  char *squery = url->src + mutt_regmatch_start(query);
332  if (!parse_query_string(&url->query_strings, squery))
333  goto err;
334  }
335 
336  return url;
337 
338 err:
339  url_free(&url);
340  return NULL;
341 }
342 
351 int url_tobuffer(struct Url *url, struct Buffer *buf, int flags)
352 {
353  if (!url || !buf)
354  return -1;
355  if (url->scheme == U_UNKNOWN)
356  return -1;
357 
358  mutt_buffer_printf(buf, "%s:", mutt_map_get_name(url->scheme, UrlMap));
359 
360  if (url->host)
361  {
362  if (!(flags & U_PATH))
363  mutt_buffer_addstr(buf, "//");
364 
365  if (url->user && (url->user[0] || !(flags & U_PATH)))
366  {
367  char str[256];
368  url_pct_encode(str, sizeof(str), url->user);
369  mutt_buffer_add_printf(buf, "%s@", str);
370  }
371 
372  if (strchr(url->host, ':'))
373  mutt_buffer_add_printf(buf, "[%s]", url->host);
374  else
375  mutt_buffer_add_printf(buf, "%s", url->host);
376 
377  if (url->port)
378  mutt_buffer_add_printf(buf, ":%hu/", url->port);
379  else
380  mutt_buffer_addstr(buf, "/");
381  }
382 
383  if (url->path)
384  mutt_buffer_addstr(buf, url->path);
385 
386  if (STAILQ_FIRST(&url->query_strings))
387  {
388  mutt_buffer_addstr(buf, "?");
389 
390  char str[256];
391  struct UrlQuery *np = NULL;
392  STAILQ_FOREACH(np, &url->query_strings, entries)
393  {
394  url_pct_encode(str, sizeof(str), np->name);
395  mutt_buffer_addstr(buf, str);
396  mutt_buffer_addstr(buf, "=");
397  url_pct_encode(str, sizeof(str), np->value);
398  mutt_buffer_addstr(buf, str);
399  if (STAILQ_NEXT(np, entries))
400  mutt_buffer_addstr(buf, "&");
401  }
402  }
403 
404  return 0;
405 }
406 
416 int url_tostring(struct Url *url, char *dest, size_t len, int flags)
417 {
418  if (!url || !dest)
419  return -1;
420 
421  struct Buffer *dest_buf = mutt_buffer_pool_get();
422 
423  int retval = url_tobuffer(url, dest_buf, flags);
424  if (retval == 0)
425  mutt_str_strfcpy(dest, mutt_b2s(dest_buf), len);
426 
427  mutt_buffer_pool_release(&dest_buf);
428 
429  return retval;
430 }
enum UrlScheme url_check_scheme(const char *str)
Check the protocol of a URL.
Definition: url.c:219
char * name
Query name.
Definition: url.h:57
int mutt_map_get_value_n(const char *name, size_t len, const struct Mapping *map)
Lookup the constant for a string.
Definition: mapping.c:62
static regoff_t mutt_regmatch_end(const regmatch_t *match)
Return the end of a match.
Definition: regex3.h:69
Url is notmuch://.
Definition: url.h:45
int url_pct_decode(char *s)
Decode a percent-encoded string.
Definition: url.c:187
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
int mutt_str_atoi(const char *str, int *dst)
Convert ASCII string to an integer.
Definition: string.c:262
char * pass
Password.
Definition: url.h:70
struct Buffer * mutt_buffer_pool_get(void)
Get a Buffer from the pool.
Definition: pool.c:101
A parsed URL proto://user:password@host:port/path?a=1&b=2
Definition: url.h:66
enum UrlScheme scheme
Scheme, e.g. U_SMTPS.
Definition: url.h:68
void mutt_buffer_pool_release(struct Buffer **pbuf)
Free a Buffer from the pool.
Definition: pool.c:112
Url is imaps://.
Definition: url.h:39
static size_t mutt_regmatch_len(const regmatch_t *match)
Return the length of a match.
Definition: regex3.h:79
String manipulation buffer.
Definition: buffer.h:33
Parsed Query String.
Definition: url.h:55
Url wasn&#39;t recognised.
Definition: url.h:34
static struct Url * url_new(void)
Create a Url.
Definition: url.c:110
Url is imap://.
Definition: url.h:38
#define STAILQ_INSERT_TAIL(head, elm, field)
Definition: queue.h:386
#define STAILQ_REMOVE_HEAD(head, field)
Definition: queue.h:419
char * value
Query value.
Definition: url.h:58
void url_free(struct Url **ptr)
Free the contents of a URL.
Definition: url.c:121
int mutt_buffer_printf(struct Buffer *buf, const char *fmt,...)
Format a string overwriting a Buffer.
Definition: buffer.c:160
...//user:[pass]@...
Definition: prex.h:55
...:993/[Inbox]
Definition: prex.h:62
const char * mutt_map_get_name(int val, const struct Mapping *map)
Lookup a string for a constant.
Definition: mapping.c:42
imaps://host.com:[993]/...
Definition: prex.h:60
#define STAILQ_INIT(head)
Definition: queue.h:369
UrlScheme
All recognised Url types.
Definition: url.h:32
int mutt_buffer_add_printf(struct Buffer *buf, const char *fmt,...)
Format a string appending a Buffer.
Definition: buffer.c:203
Url is nntps://.
Definition: url.h:41
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:225
Constants and macros for managing MIME encoding.
#define mutt_b2s(buf)
Definition: buffer.h:41
void url_pct_encode(char *buf, size_t buflen, const char *src)
Percent-encode a string.
Definition: url.c:149
static bool parse_query_string(struct UrlQueryList *list, char *src)
Parse a URL query string.
Definition: url.c:53
struct UrlQueryList query_strings
List of query strings.
Definition: url.h:74
regmatch_t * mutt_prex_capture(enum Prex which, const char *str)
match a precompiled regex against a string
Definition: prex.c:145
char * user
Username.
Definition: url.h:69
https://example.com/?[q=foo]
Definition: prex.h:34
Url is smtps://.
Definition: url.h:43
static regoff_t mutt_regmatch_start(const regmatch_t *match)
Return the start of a match.
Definition: regex3.h:59
...Inbox?[foo=bar&baz=value]
Definition: prex.h:65
size_t mutt_str_strfcpy(char *dest, const char *src, size_t dsize)
Copy a string into a buffer (guaranteeing NUL-termination)
Definition: string.c:773
[imaps://user:pass@example.com/INBOX?foo=bar]
Definition: prex.h:33
imaps://...[127.0.0.1]...
Definition: prex.h:58
...//[user]:pass@...
Definition: prex.h:53
char * host
Host.
Definition: url.h:71
static enum UrlScheme get_scheme(const char *src, const regmatch_t *match)
Extract the scheme part from a matched URL.
Definition: url.c:94
#define STAILQ_FOREACH(var, head, field)
Definition: queue.h:349
#define hexval(ch)
Definition: mime.h:75
...//[user:pass@]...
Definition: prex.h:52
#define STAILQ_NEXT(elm, field)
Definition: queue.h:397
char * path
Path.
Definition: url.h:73
Url is pop://.
Definition: url.h:36
unsigned short port
Port.
Definition: url.h:72
char * mutt_str_strdup(const char *str)
Copy a string, safely.
Definition: string.c:380
Url is nntp://.
Definition: url.h:40
#define FREE(x)
Definition: memory.h:40
[imaps]://...
Definition: prex.h:48
int url_tostring(struct Url *url, char *dest, size_t len, int flags)
Output the URL string for a given Url object.
Definition: url.c:416
Url is smtp://.
Definition: url.h:42
Mapping between user-readable string and a constant.
Definition: mapping.h:29
Url is mailto://.
Definition: url.h:44
#define STAILQ_EMPTY(head)
Definition: queue.h:345
imaps://...[host.com]...
Definition: prex.h:57
Convenience wrapper for the library headers.
#define STAILQ_FIRST(head)
Definition: queue.h:347
mailto:[me@example.com]?foo=bar
Definition: prex.h:63
Parse and identify different URL schemes.
int url_tobuffer(struct Url *url, struct Buffer *buf, int flags)
Output the URL string for a given Url object.
Definition: url.c:351
char * src
Raw URL string.
Definition: url.h:75
Url is file://.
Definition: url.h:35
#define U_PATH
Definition: url.h:48
Url is pops://.
Definition: url.h:37
struct Url * url_parse(const char *src)
Fill in Url.
Definition: url.c:232