NeoMutt  2022-04-29-249-gaae397
Teaching an old dog new tricks
DOXYGEN
url.c
Go to the documentation of this file.
1
29#include "config.h"
30#include <ctype.h>
31#include <stdbool.h>
32#include <string.h>
33#include "mutt/lib.h"
34#include "url.h"
35#include "mime.h"
36
40static const struct Mapping UrlMap[] = {
41 { "file", U_FILE }, { "imap", U_IMAP }, { "imaps", U_IMAPS },
42 { "pop", U_POP }, { "pops", U_POPS }, { "news", U_NNTP },
43 { "nntp", U_NNTP }, { "snews", U_NNTPS }, { "nntps", U_NNTPS },
44 { "mailto", U_MAILTO }, { "notmuch", U_NOTMUCH }, { "smtp", U_SMTP },
45 { "smtps", U_SMTPS }, { NULL, U_UNKNOWN },
46};
47
55static bool parse_query_string(struct UrlQueryList *list, char *src)
56{
57 if (!src || (*src == '\0'))
58 return false;
59
60 bool again = true;
61 while (again)
62 {
63 regmatch_t *match = mutt_prex_capture(PREX_URL_QUERY_KEY_VAL, src);
64 if (!match)
65 return false;
66
67 regmatch_t *mkey = &match[PREX_URL_QUERY_KEY_VAL_MATCH_KEY];
68 regmatch_t *mval = &match[PREX_URL_QUERY_KEY_VAL_MATCH_VAL];
69
70 again = src[mutt_regmatch_end(mval)] != '\0';
71
72 char *key = src + mutt_regmatch_start(mkey);
73 char *val = src + mutt_regmatch_start(mval);
74 src[mutt_regmatch_end(mkey)] = '\0';
75 src[mutt_regmatch_end(mval)] = '\0';
76 if ((url_pct_decode(key) < 0) || (url_pct_decode(val) < 0))
77 return false;
78
79 struct UrlQuery *qs = mutt_mem_calloc(1, sizeof(struct UrlQuery));
80 qs->name = key;
81 qs->value = val;
82 STAILQ_INSERT_TAIL(list, qs, entries);
83
84 src += mutt_regmatch_end(mval) + again;
85 }
86
87 return true;
88}
89
96static enum UrlScheme get_scheme(const char *src, const regmatch_t *match)
97{
98 enum UrlScheme rc = U_UNKNOWN;
99 if (src && match)
100 {
102 if (rc == -1)
103 rc = U_UNKNOWN;
104 }
105 return rc;
106}
107
112static struct Url *url_new(void)
113{
114 struct Url *url = mutt_mem_calloc(1, sizeof(struct Url));
116 return url;
117}
118
123void url_free(struct Url **ptr)
124{
125 if (!ptr || !*ptr)
126 return;
127
128 struct Url *url = *ptr;
129
130 struct UrlQueryList *l = &url->query_strings;
131 while (!STAILQ_EMPTY(l))
132 {
133 struct UrlQuery *np = STAILQ_FIRST(l);
134 STAILQ_REMOVE_HEAD(l, entries);
135 // Don't free 'name', 'value': they are pointers into the 'src' string
136 FREE(&np);
137 }
138
139 FREE(&url->src);
140 FREE(ptr);
141}
142
151void url_pct_encode(char *buf, size_t buflen, const char *src)
152{
153 static const char *hex = "0123456789ABCDEF";
154
155 if (!buf)
156 return;
157
158 *buf = '\0';
159 buflen--;
160 while (src && *src && (buflen != 0))
161 {
162 if (strchr(" /:&%=", *src))
163 {
164 if (buflen < 3)
165 break;
166
167 *buf++ = '%';
168 *buf++ = hex[(*src >> 4) & 0xf];
169 *buf++ = hex[*src & 0xf];
170 src++;
171 buflen -= 3;
172 continue;
173 }
174 *buf++ = *src++;
175 buflen--;
176 }
177 *buf = '\0';
178}
179
189int url_pct_decode(char *s)
190{
191 if (!s)
192 return -1;
193
194 char *d = NULL;
195
196 for (d = s; *s; s++)
197 {
198 if (*s == '%')
199 {
200 if ((s[1] != '\0') && (s[2] != '\0') && isxdigit((unsigned char) s[1]) &&
201 isxdigit((unsigned char) s[2]) && (hexval(s[1]) >= 0) && (hexval(s[2]) >= 0))
202 {
203 *d++ = (hexval(s[1]) << 4) | (hexval(s[2]));
204 s += 2;
205 }
206 else
207 return -1;
208 }
209 else
210 *d++ = *s;
211 }
212 *d = '\0';
213 return 0;
214}
215
221enum UrlScheme url_check_scheme(const char *str)
222{
223 return get_scheme(str, mutt_prex_capture(PREX_URL, str));
224}
225
234struct Url *url_parse(const char *src)
235{
236 const regmatch_t *match = mutt_prex_capture(PREX_URL, src);
237 if (!match)
238 return NULL;
239
240 enum UrlScheme scheme = get_scheme(src, match);
241 if (scheme == U_UNKNOWN)
242 return NULL;
243
244 const regmatch_t *userinfo = &match[PREX_URL_MATCH_USERINFO];
245 const regmatch_t *user = &match[PREX_URL_MATCH_USER];
246 const regmatch_t *pass = &match[PREX_URL_MATCH_PASS];
247 const regmatch_t *host = &match[PREX_URL_MATCH_HOSTNAME];
248 const regmatch_t *ipvx = &match[PREX_URL_MATCH_HOSTIPVX];
249 const regmatch_t *port = &match[PREX_URL_MATCH_PORT];
250 const regmatch_t *path = &match[PREX_URL_MATCH_PATH];
251 const regmatch_t *query = &match[PREX_URL_MATCH_QUERY];
252 const regmatch_t *pathonly = &match[PREX_URL_MATCH_PATH_ONLY];
253
254 struct Url *url = url_new();
255 url->scheme = scheme;
256 url->src = mutt_str_dup(src);
257
258 /* If the scheme is not followed by two forward slashes, then it's a simple
259 * path (see https://tools.ietf.org/html/rfc3986#section-3). */
260 if (mutt_regmatch_start(pathonly) != -1)
261 {
262 url->src[mutt_regmatch_end(pathonly)] = '\0';
263 url->path = url->src + mutt_regmatch_start(pathonly);
264 if (url_pct_decode(url->path) < 0)
265 goto err;
266 }
267
268 /* separate userinfo part */
269 if (mutt_regmatch_end(userinfo) != -1)
270 {
271 url->src[mutt_regmatch_end(userinfo) - 1] = '\0';
272 }
273
274 /* user */
275 if (mutt_regmatch_end(user) != -1)
276 {
277 url->src[mutt_regmatch_end(user)] = '\0';
278 url->user = url->src + mutt_regmatch_start(user);
279 if (url_pct_decode(url->user) < 0)
280 goto err;
281 }
282
283 /* pass */
284 if (mutt_regmatch_end(pass) != -1)
285 {
286 url->pass = url->src + mutt_regmatch_start(pass);
287 if (url_pct_decode(url->pass) < 0)
288 goto err;
289 }
290
291 /* host */
292 if (mutt_regmatch_len(host) != 0)
293 {
294 url->host = url->src + mutt_regmatch_start(host);
295 url->src[mutt_regmatch_end(host)] = '\0';
296 }
297 else if (mutt_regmatch_end(ipvx) != -1)
298 {
299 url->host = url->src + mutt_regmatch_start(ipvx) + 1; /* skip opening '[' */
300 url->src[mutt_regmatch_end(ipvx) - 1] = '\0'; /* skip closing ']' */
301 }
302
303 /* port */
304 if (mutt_regmatch_end(port) != -1)
305 {
306 url->src[mutt_regmatch_end(port)] = '\0';
307 const char *ports = url->src + mutt_regmatch_start(port);
308 unsigned short num;
309 if (!mutt_str_atous_full(ports, &num))
310 {
311 goto err;
312 }
313 url->port = num;
314 }
315
316 /* path */
317 if (mutt_regmatch_end(path) != -1)
318 {
319 url->src[mutt_regmatch_end(path)] = '\0';
320 url->path = url->src + mutt_regmatch_start(path);
321 if (!url->host)
322 {
323 /* If host is not provided, restore the '/': this is an absolute path */
324 *(--url->path) = '/';
325 }
326 if (url_pct_decode(url->path) < 0)
327 goto err;
328 }
329
330 /* query */
331 if (mutt_regmatch_end(query) != -1)
332 {
333 char *squery = url->src + mutt_regmatch_start(query);
334 if (!parse_query_string(&url->query_strings, squery))
335 goto err;
336 }
337
338 return url;
339
340err:
341 url_free(&url);
342 return NULL;
343}
344
353int url_tobuffer(struct Url *url, struct Buffer *buf, uint8_t flags)
354{
355 if (!url || !buf)
356 return -1;
357 if (url->scheme == U_UNKNOWN)
358 return -1;
359
361
362 if (url->host)
363 {
364 if (!(flags & U_PATH))
365 mutt_buffer_addstr(buf, "//");
366
367 if (url->user && (url->user[0] || !(flags & U_PATH)))
368 {
369 char str[256] = { 0 };
370 url_pct_encode(str, sizeof(str), url->user);
371 mutt_buffer_add_printf(buf, "%s@", str);
372 }
373
374 if (strchr(url->host, ':'))
375 mutt_buffer_add_printf(buf, "[%s]", url->host);
376 else
377 mutt_buffer_add_printf(buf, "%s", url->host);
378
379 if (url->port)
380 mutt_buffer_add_printf(buf, ":%hu/", url->port);
381 else
382 mutt_buffer_addstr(buf, "/");
383 }
384
385 if (url->path)
386 mutt_buffer_addstr(buf, url->path);
387
388 if (STAILQ_FIRST(&url->query_strings))
389 {
390 mutt_buffer_addstr(buf, "?");
391
392 char str[256] = { 0 };
393 struct UrlQuery *np = NULL;
394 STAILQ_FOREACH(np, &url->query_strings, entries)
395 {
396 url_pct_encode(str, sizeof(str), np->name);
397 mutt_buffer_addstr(buf, str);
398 mutt_buffer_addstr(buf, "=");
399 url_pct_encode(str, sizeof(str), np->value);
400 mutt_buffer_addstr(buf, str);
401 if (STAILQ_NEXT(np, entries))
402 mutt_buffer_addstr(buf, "&");
403 }
404 }
405
406 return 0;
407}
408
418int url_tostring(struct Url *url, char *dest, size_t len, uint8_t flags)
419{
420 if (!url || !dest)
421 return -1;
422
423 struct Buffer *dest_buf = mutt_buffer_pool_get();
424
425 int retval = url_tobuffer(url, dest_buf, flags);
426 if (retval == 0)
427 mutt_str_copy(dest, mutt_buffer_string(dest_buf), len);
428
429 mutt_buffer_pool_release(&dest_buf);
430
431 return retval;
432}
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:233
int mutt_buffer_add_printf(struct Buffer *buf, const char *fmt,...)
Format a string appending a Buffer.
Definition: buffer.c:211
int mutt_buffer_printf(struct Buffer *buf, const char *fmt,...)
Format a string overwriting a Buffer.
Definition: buffer.c:168
static const char * mutt_buffer_string(const struct Buffer *buf)
Convert a buffer to a const char * "string".
Definition: buffer.h:77
int mutt_map_get_value_n(const char *name, size_t len, const struct Mapping *map)
Lookup the constant for a string.
Definition: mapping.c:62
const char * mutt_map_get_name(int val, const struct Mapping *map)
Lookup a string for a constant.
Definition: mapping.c:42
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
#define FREE(x)
Definition: memory.h:43
Constants and macros for managing MIME encoding.
#define hexval(ch)
Definition: mime.h:80
Convenience wrapper for the library headers.
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition: string.c:250
size_t mutt_str_copy(char *dest, const char *src, size_t dsize)
Copy a string into a buffer (guaranteeing NUL-termination)
Definition: string.c:652
void mutt_buffer_pool_release(struct Buffer **pbuf)
Free a Buffer from the pool.
Definition: pool.c:112
struct Buffer * mutt_buffer_pool_get(void)
Get a Buffer from the pool.
Definition: pool.c:101
regmatch_t * mutt_prex_capture(enum Prex which, const char *str)
Match a precompiled regex against a string.
Definition: prex.c:308
@ PREX_URL_QUERY_KEY_VAL_MATCH_VAL
key=[val]
Definition: prex.h:84
@ PREX_URL_QUERY_KEY_VAL_MATCH_KEY
[key]=val
Definition: prex.h:83
@ PREX_URL
[imaps://user:pass@example.com/INBOX?foo=bar]
Definition: prex.h:33
@ PREX_URL_QUERY_KEY_VAL
https://example.com/?[q=foo]
Definition: prex.h:34
@ PREX_URL_MATCH_USER
...//[user]:pass@...
Definition: prex.h:59
@ PREX_URL_MATCH_QUERY
...Inbox?[foo=bar&baz=value]
Definition: prex.h:71
@ PREX_URL_MATCH_HOSTNAME
imaps://...[host.com]...
Definition: prex.h:63
@ PREX_URL_MATCH_PORT
imaps://host.com:[993]/...
Definition: prex.h:66
@ PREX_URL_MATCH_PATH_ONLY
mailto:[me@example.com]?foo=bar
Definition: prex.h:69
@ PREX_URL_MATCH_SCHEME
[imaps]://...
Definition: prex.h:54
@ PREX_URL_MATCH_USERINFO
...//[user:pass@]...
Definition: prex.h:58
@ PREX_URL_MATCH_PATH
...:993/[Inbox]
Definition: prex.h:68
@ PREX_URL_MATCH_HOSTIPVX
imaps://...[127.0.0.1]...
Definition: prex.h:64
@ PREX_URL_MATCH_PASS
...//user:[pass]@...
Definition: prex.h:61
#define STAILQ_REMOVE_HEAD(head, field)
Definition: queue.h:422
#define STAILQ_INIT(head)
Definition: queue.h:372
#define STAILQ_FIRST(head)
Definition: queue.h:350
#define STAILQ_FOREACH(var, head, field)
Definition: queue.h:352
#define STAILQ_INSERT_TAIL(head, elm, field)
Definition: queue.h:389
#define STAILQ_EMPTY(head)
Definition: queue.h:348
#define STAILQ_NEXT(elm, field)
Definition: queue.h:400
static size_t mutt_regmatch_len(const regmatch_t *match)
Return the length of a match.
Definition: regex3.h:80
static regoff_t mutt_regmatch_end(const regmatch_t *match)
Return the end of a match.
Definition: regex3.h:70
static regoff_t mutt_regmatch_start(const regmatch_t *match)
Return the start of a match.
Definition: regex3.h:60
String manipulation buffer.
Definition: buffer.h:34
Mapping between user-readable string and a constant.
Definition: mapping.h:32
Parsed Query String.
Definition: url.h:58
char * name
Query name.
Definition: url.h:59
char * value
Query value.
Definition: url.h:60
A parsed URL proto://user:password@host:port/path?a=1&b=2
Definition: url.h:69
char * user
Username.
Definition: url.h:71
unsigned short port
Port.
Definition: url.h:74
struct UrlQueryList query_strings
List of query strings.
Definition: url.h:76
char * host
Host.
Definition: url.h:73
char * src
Raw URL string.
Definition: url.h:77
char * pass
Password.
Definition: url.h:72
char * path
Path.
Definition: url.h:75
enum UrlScheme scheme
Scheme, e.g. U_SMTPS.
Definition: url.h:70
int url_pct_decode(char *s)
Decode a percent-encoded string.
Definition: url.c:189
struct Url * url_parse(const char *src)
Fill in Url.
Definition: url.c:234
int url_tostring(struct Url *url, char *dest, size_t len, uint8_t flags)
Output the URL string for a given Url object.
Definition: url.c:418
int url_tobuffer(struct Url *url, struct Buffer *buf, uint8_t flags)
Output the URL string for a given Url object.
Definition: url.c:353
static bool parse_query_string(struct UrlQueryList *list, char *src)
Parse a URL query string.
Definition: url.c:55
void url_free(struct Url **ptr)
Free the contents of a URL.
Definition: url.c:123
enum UrlScheme url_check_scheme(const char *str)
Check the protocol of a URL.
Definition: url.c:221
static const struct Mapping UrlMap[]
Constants for URL protocols.
Definition: url.c:40
void url_pct_encode(char *buf, size_t buflen, const char *src)
Percent-encode a string.
Definition: url.c:151
static enum UrlScheme get_scheme(const char *src, const regmatch_t *match)
Extract the scheme part from a matched URL.
Definition: url.c:96
static struct Url * url_new(void)
Create a Url.
Definition: url.c:112
Parse and identify different URL schemes.
UrlScheme
All recognised Url types.
Definition: url.h:34
@ U_NOTMUCH
Url is notmuch://.
Definition: url.h:46
@ U_UNKNOWN
Url wasn't recognised.
Definition: url.h:35
@ U_FILE
Url is file://.
Definition: url.h:36
@ U_NNTPS
Url is nntps://.
Definition: url.h:42
@ U_MAILTO
Url is mailto://.
Definition: url.h:45
@ U_SMTPS
Url is smtps://.
Definition: url.h:44
@ U_SMTP
Url is smtp://.
Definition: url.h:43
@ U_NNTP
Url is nntp://.
Definition: url.h:41
@ U_IMAP
Url is imap://.
Definition: url.h:39
@ U_POPS
Url is pops://.
Definition: url.h:38
@ U_IMAPS
Url is imaps://.
Definition: url.h:40
@ U_POP
Url is pop://.
Definition: url.h:37
#define U_PATH
Definition: url.h:50