NeoMutt  2023-03-22-27-g3cb248
Teaching an old dog new tricks
DOXYGEN
url.h File Reference

Parse and identify different URL schemes. More...

#include <stddef.h>
#include <stdint.h>
#include "mutt/lib.h"
+ Include dependency graph for url.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  UrlQuery
 Parsed Query String. More...
 
struct  Url
 A parsed URL proto://user:password@host:port/path?a=1&b=2 More...
 

Macros

#define U_NO_FLAGS   0
 
#define U_PATH   (1 << 1)
 

Enumerations

enum  UrlScheme {
  U_UNKNOWN , U_FILE , U_POP , U_POPS ,
  U_IMAP , U_IMAPS , U_NNTP , U_NNTPS ,
  U_SMTP , U_SMTPS , U_MAILTO , U_NOTMUCH
}
 All recognised Url types. More...
 

Functions

 STAILQ_HEAD (UrlQueryList, UrlQuery)
 
enum UrlScheme url_check_scheme (const char *s)
 Check the protocol of a URL. More...
 
void url_free (struct Url **ptr)
 Free the contents of a URL. More...
 
struct Urlurl_parse (const char *src)
 Fill in Url. More...
 
int url_pct_decode (char *s)
 Decode a percent-encoded string. More...
 
void url_pct_encode (char *buf, size_t buflen, const char *src)
 Percent-encode a string. More...
 
int url_tobuffer (struct Url *url, struct Buffer *dest, uint8_t flags)
 Output the URL string for a given Url object. More...
 
int url_tostring (struct Url *url, char *buf, size_t buflen, uint8_t flags)
 Output the URL string for a given Url object. More...
 

Detailed Description

Parse and identify different URL schemes.

Authors
  • Thomas Roessler

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file url.h.

Macro Definition Documentation

◆ U_NO_FLAGS

#define U_NO_FLAGS   0

Definition at line 49 of file url.h.

◆ U_PATH

#define U_PATH   (1 << 1)

Definition at line 50 of file url.h.

Enumeration Type Documentation

◆ UrlScheme

enum UrlScheme

All recognised Url types.

Enumerator
U_UNKNOWN 

Url wasn't recognised.

U_FILE 

Url is file://.

U_POP 

Url is pop://.

U_POPS 

Url is pops://.

U_IMAP 

Url is imap://.

U_IMAPS 

Url is imaps://.

U_NNTP 

Url is nntp://.

U_NNTPS 

Url is nntps://.

U_SMTP 

Url is smtp://.

U_SMTPS 

Url is smtps://.

U_MAILTO 

Url is mailto://.

U_NOTMUCH 

Url is notmuch://.

Definition at line 33 of file url.h.

34{
35 U_UNKNOWN,
36 U_FILE,
37 U_POP,
38 U_POPS,
39 U_IMAP,
40 U_IMAPS,
41 U_NNTP,
42 U_NNTPS,
43 U_SMTP,
44 U_SMTPS,
45 U_MAILTO,
46 U_NOTMUCH,
47};
@ U_NOTMUCH
Url is notmuch://.
Definition: url.h:46
@ U_UNKNOWN
Url wasn't recognised.
Definition: url.h:35
@ U_FILE
Url is file://.
Definition: url.h:36
@ U_NNTPS
Url is nntps://.
Definition: url.h:42
@ U_MAILTO
Url is mailto://.
Definition: url.h:45
@ U_SMTPS
Url is smtps://.
Definition: url.h:44
@ U_SMTP
Url is smtp://.
Definition: url.h:43
@ U_NNTP
Url is nntp://.
Definition: url.h:41
@ U_IMAP
Url is imap://.
Definition: url.h:39
@ U_POPS
Url is pops://.
Definition: url.h:38
@ U_IMAPS
Url is imaps://.
Definition: url.h:40
@ U_POP
Url is pop://.
Definition: url.h:37

Function Documentation

◆ STAILQ_HEAD()

STAILQ_HEAD ( UrlQueryList  ,
UrlQuery   
)

◆ url_check_scheme()

enum UrlScheme url_check_scheme ( const char *  str)

Check the protocol of a URL.

Parameters
strString to check
Return values
numUrl type, e.g. U_IMAPS

Definition at line 223 of file url.c.

224{
225 return get_scheme(str, mutt_prex_capture(PREX_URL, str));
226}
regmatch_t * mutt_prex_capture(enum Prex which, const char *str)
Match a precompiled regex against a string.
Definition: prex.c:308
@ PREX_URL
[imaps://user:pass@example.com/INBOX?foo=bar]
Definition: prex.h:33
static enum UrlScheme get_scheme(const char *src, const regmatch_t *match)
Extract the scheme part from a matched URL.
Definition: url.c:96
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ url_free()

void url_free ( struct Url **  ptr)

Free the contents of a URL.

Parameters
ptrUrl to free

Definition at line 123 of file url.c.

124{
125 if (!ptr || !*ptr)
126 return;
127
128 struct Url *url = *ptr;
129
130 struct UrlQueryList *l = &url->query_strings;
131 while (!STAILQ_EMPTY(l))
132 {
133 struct UrlQuery *np = STAILQ_FIRST(l);
134 STAILQ_REMOVE_HEAD(l, entries);
135 // Don't free 'name', 'value': they are pointers into the 'src' string
136 FREE(&np);
137 }
138
139 FREE(&url->src);
140 FREE(ptr);
141}
#define FREE(x)
Definition: memory.h:43
#define STAILQ_REMOVE_HEAD(head, field)
Definition: queue.h:422
#define STAILQ_FIRST(head)
Definition: queue.h:350
#define STAILQ_EMPTY(head)
Definition: queue.h:348
Parsed Query String.
Definition: url.h:58
A parsed URL proto://user:password@host:port/path?a=1&b=2
Definition: url.h:69
struct UrlQueryList query_strings
List of query strings.
Definition: url.h:76
char * src
Raw URL string.
Definition: url.h:77
+ Here is the caller graph for this function:

◆ url_parse()

struct Url * url_parse ( const char *  src)

Fill in Url.

Parameters
srcString to parse
Return values
ptrParsed URL
NULLError
Note
Caller must free returned Url with url_free()

Definition at line 236 of file url.c.

237{
238 const regmatch_t *match = mutt_prex_capture(PREX_URL, src);
239 if (!match)
240 return NULL;
241
242 enum UrlScheme scheme = get_scheme(src, match);
243 if (scheme == U_UNKNOWN)
244 return NULL;
245
246 const regmatch_t *userinfo = &match[PREX_URL_MATCH_USERINFO];
247 const regmatch_t *user = &match[PREX_URL_MATCH_USER];
248 const regmatch_t *pass = &match[PREX_URL_MATCH_PASS];
249 const regmatch_t *host = &match[PREX_URL_MATCH_HOSTNAME];
250 const regmatch_t *ipvx = &match[PREX_URL_MATCH_HOSTIPVX];
251 const regmatch_t *port = &match[PREX_URL_MATCH_PORT];
252 const regmatch_t *path = &match[PREX_URL_MATCH_PATH];
253 const regmatch_t *query = &match[PREX_URL_MATCH_QUERY];
254 const regmatch_t *pathonly = &match[PREX_URL_MATCH_PATH_ONLY];
255
256 struct Url *url = url_new();
257 url->scheme = scheme;
258 url->src = mutt_str_dup(src);
259
260 /* If the scheme is not followed by two forward slashes, then it's a simple
261 * path (see https://tools.ietf.org/html/rfc3986#section-3). */
262 if (mutt_regmatch_start(pathonly) != -1)
263 {
264 url->src[mutt_regmatch_end(pathonly)] = '\0';
265 url->path = url->src + mutt_regmatch_start(pathonly);
266 if (url_pct_decode(url->path) < 0)
267 goto err;
268 }
269
270 /* separate userinfo part */
271 if (mutt_regmatch_end(userinfo) != -1)
272 {
273 url->src[mutt_regmatch_end(userinfo) - 1] = '\0';
274 }
275
276 /* user */
277 if (mutt_regmatch_end(user) != -1)
278 {
279 url->src[mutt_regmatch_end(user)] = '\0';
280 url->user = url->src + mutt_regmatch_start(user);
281 if (url_pct_decode(url->user) < 0)
282 goto err;
283 }
284
285 /* pass */
286 if (mutt_regmatch_end(pass) != -1)
287 {
288 url->pass = url->src + mutt_regmatch_start(pass);
289 if (url_pct_decode(url->pass) < 0)
290 goto err;
291 }
292
293 /* host */
294 if (mutt_regmatch_len(host) != 0)
295 {
296 url->host = url->src + mutt_regmatch_start(host);
297 url->src[mutt_regmatch_end(host)] = '\0';
298 }
299 else if (mutt_regmatch_end(ipvx) != -1)
300 {
301 url->host = url->src + mutt_regmatch_start(ipvx) + 1; /* skip opening '[' */
302 url->src[mutt_regmatch_end(ipvx) - 1] = '\0'; /* skip closing ']' */
303 }
304
305 /* port */
306 if (mutt_regmatch_end(port) != -1)
307 {
308 url->src[mutt_regmatch_end(port)] = '\0';
309 const char *ports = url->src + mutt_regmatch_start(port);
310 unsigned short num;
311 if (!mutt_str_atous_full(ports, &num))
312 {
313 goto err;
314 }
315 url->port = num;
316 }
317
318 /* path */
319 if (mutt_regmatch_end(path) != -1)
320 {
321 url->src[mutt_regmatch_end(path)] = '\0';
322 url->path = url->src + mutt_regmatch_start(path);
323 if (!url->host)
324 {
325 /* If host is not provided, restore the '/': this is an absolute path */
326 *(--url->path) = '/';
327 }
328 if (url_pct_decode(url->path) < 0)
329 goto err;
330 }
331
332 /* query */
333 if (mutt_regmatch_end(query) != -1)
334 {
335 char *squery = url->src + mutt_regmatch_start(query);
336 if (!parse_query_string(&url->query_strings, squery))
337 goto err;
338 }
339
340 return url;
341
342err:
343 url_free(&url);
344 return NULL;
345}
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition: string.c:250
@ PREX_URL_MATCH_USER
...//[user]:pass@...
Definition: prex.h:59
@ PREX_URL_MATCH_QUERY
...Inbox?[foo=bar&baz=value]
Definition: prex.h:71
@ PREX_URL_MATCH_HOSTNAME
imaps://...[host.com]...
Definition: prex.h:63
@ PREX_URL_MATCH_PORT
imaps://host.com:[993]/...
Definition: prex.h:66
@ PREX_URL_MATCH_PATH_ONLY
mailto:[me@example.com]?foo=bar
Definition: prex.h:69
@ PREX_URL_MATCH_USERINFO
...//[user:pass@]...
Definition: prex.h:58
@ PREX_URL_MATCH_PATH
...:993/[Inbox]
Definition: prex.h:68
@ PREX_URL_MATCH_HOSTIPVX
imaps://...[127.0.0.1]...
Definition: prex.h:64
@ PREX_URL_MATCH_PASS
...//user:[pass]@...
Definition: prex.h:61
static size_t mutt_regmatch_len(const regmatch_t *match)
Return the length of a match.
Definition: regex3.h:80
static regoff_t mutt_regmatch_end(const regmatch_t *match)
Return the end of a match.
Definition: regex3.h:70
static regoff_t mutt_regmatch_start(const regmatch_t *match)
Return the start of a match.
Definition: regex3.h:60
char * user
Username.
Definition: url.h:71
unsigned short port
Port.
Definition: url.h:74
char * host
Host.
Definition: url.h:73
char * pass
Password.
Definition: url.h:72
char * path
Path.
Definition: url.h:75
enum UrlScheme scheme
Scheme, e.g. U_SMTPS.
Definition: url.h:70
int url_pct_decode(char *s)
Decode a percent-encoded string.
Definition: url.c:189
static bool parse_query_string(struct UrlQueryList *list, char *src)
Parse a URL query string.
Definition: url.c:55
void url_free(struct Url **ptr)
Free the contents of a URL.
Definition: url.c:123
static struct Url * url_new(void)
Create a Url.
Definition: url.c:112
UrlScheme
All recognised Url types.
Definition: url.h:34
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ url_pct_decode()

int url_pct_decode ( char *  s)

Decode a percent-encoded string.

Parameters
sString to decode
Return values
0Success
-1Error

e.g. turn "hello%20world" into "hello world" The string is decoded in-place.

Definition at line 189 of file url.c.

190{
191 if (!s)
192 return -1;
193
194 char *d = NULL;
195
196 for (d = s; *s; s++)
197 {
198 if (*s == '%')
199 {
200 if ((s[1] != '\0') && (s[2] != '\0') && isxdigit((unsigned char) s[1]) &&
201 isxdigit((unsigned char) s[2]) && (hexval(s[1]) >= 0) && (hexval(s[2]) >= 0))
202 {
203 *d++ = (hexval(s[1]) << 4) | (hexval(s[2]));
204 s += 2;
205 }
206 else
207 {
208 return -1;
209 }
210 }
211 else
212 *d++ = *s;
213 }
214 *d = '\0';
215 return 0;
216}
#define hexval(ch)
Definition: mime.h:80
+ Here is the caller graph for this function:

◆ url_pct_encode()

void url_pct_encode ( char *  buf,
size_t  buflen,
const char *  src 
)

Percent-encode a string.

Parameters
bufBuffer for the result
buflenLength of buffer
srcString to encode

e.g. turn "hello world" into "hello%20world"

Definition at line 151 of file url.c.

152{
153 static const char *hex = "0123456789ABCDEF";
154
155 if (!buf)
156 return;
157
158 *buf = '\0';
159 buflen--;
160 while (src && *src && (buflen != 0))
161 {
162 if (strchr(" /:&%=", *src))
163 {
164 if (buflen < 3)
165 break;
166
167 *buf++ = '%';
168 *buf++ = hex[(*src >> 4) & 0xf];
169 *buf++ = hex[*src & 0xf];
170 src++;
171 buflen -= 3;
172 continue;
173 }
174 *buf++ = *src++;
175 buflen--;
176 }
177 *buf = '\0';
178}
+ Here is the caller graph for this function:

◆ url_tobuffer()

int url_tobuffer ( struct Url url,
struct Buffer buf,
uint8_t  flags 
)

Output the URL string for a given Url object.

Parameters
urlUrl to turn into a string
bufBuffer for the result
flagsFlags, e.g. U_PATH
Return values
0Success
-1Error

Definition at line 355 of file url.c.

356{
357 if (!url || !buf)
358 return -1;
359 if (url->scheme == U_UNKNOWN)
360 return -1;
361
363
364 if (url->host)
365 {
366 if (!(flags & U_PATH))
367 mutt_buffer_addstr(buf, "//");
368
369 if (url->user && (url->user[0] || !(flags & U_PATH)))
370 {
371 char str[256] = { 0 };
372 url_pct_encode(str, sizeof(str), url->user);
373 mutt_buffer_add_printf(buf, "%s@", str);
374 }
375
376 if (strchr(url->host, ':'))
377 mutt_buffer_add_printf(buf, "[%s]", url->host);
378 else
379 mutt_buffer_add_printf(buf, "%s", url->host);
380
381 if (url->port)
382 mutt_buffer_add_printf(buf, ":%hu/", url->port);
383 else
384 mutt_buffer_addstr(buf, "/");
385 }
386
387 if (url->path)
388 mutt_buffer_addstr(buf, url->path);
389
390 if (STAILQ_FIRST(&url->query_strings))
391 {
392 mutt_buffer_addstr(buf, "?");
393
394 char str[256] = { 0 };
395 struct UrlQuery *np = NULL;
396 STAILQ_FOREACH(np, &url->query_strings, entries)
397 {
398 url_pct_encode(str, sizeof(str), np->name);
399 mutt_buffer_addstr(buf, str);
400 mutt_buffer_addstr(buf, "=");
401 url_pct_encode(str, sizeof(str), np->value);
402 mutt_buffer_addstr(buf, str);
403 if (STAILQ_NEXT(np, entries))
404 mutt_buffer_addstr(buf, "&");
405 }
406 }
407
408 return 0;
409}
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:233
int mutt_buffer_add_printf(struct Buffer *buf, const char *fmt,...)
Format a string appending a Buffer.
Definition: buffer.c:211
int mutt_buffer_printf(struct Buffer *buf, const char *fmt,...)
Format a string overwriting a Buffer.
Definition: buffer.c:168
const char * mutt_map_get_name(int val, const struct Mapping *map)
Lookup a string for a constant.
Definition: mapping.c:42
#define STAILQ_FOREACH(var, head, field)
Definition: queue.h:352
#define STAILQ_NEXT(elm, field)
Definition: queue.h:400
char * name
Query name.
Definition: url.h:59
char * value
Query value.
Definition: url.h:60
static const struct Mapping UrlMap[]
Constants for URL protocols.
Definition: url.c:40
void url_pct_encode(char *buf, size_t buflen, const char *src)
Percent-encode a string.
Definition: url.c:151
#define U_PATH
Definition: url.h:50
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ url_tostring()

int url_tostring ( struct Url url,
char *  dest,
size_t  len,
uint8_t  flags 
)

Output the URL string for a given Url object.

Parameters
urlUrl to turn into a string
destBuffer for the result
lenLength of buffer
flagsFlags, e.g. U_PATH
Return values
0Success
-1Error

Definition at line 420 of file url.c.

421{
422 if (!url || !dest)
423 return -1;
424
425 struct Buffer *dest_buf = mutt_buffer_pool_get();
426
427 int retval = url_tobuffer(url, dest_buf, flags);
428 if (retval == 0)
429 mutt_str_copy(dest, mutt_buffer_string(dest_buf), len);
430
431 mutt_buffer_pool_release(&dest_buf);
432
433 return retval;
434}
static const char * mutt_buffer_string(const struct Buffer *buf)
Convert a buffer to a const char * "string".
Definition: buffer.h:78
size_t mutt_str_copy(char *dest, const char *src, size_t dsize)
Copy a string into a buffer (guaranteeing NUL-termination)
Definition: string.c:652
void mutt_buffer_pool_release(struct Buffer **pbuf)
Free a Buffer from the pool.
Definition: pool.c:112
struct Buffer * mutt_buffer_pool_get(void)
Get a Buffer from the pool.
Definition: pool.c:101
String manipulation buffer.
Definition: buffer.h:34
int url_tobuffer(struct Url *url, struct Buffer *buf, uint8_t flags)
Output the URL string for a given Url object.
Definition: url.c:355
+ Here is the call graph for this function:
+ Here is the caller graph for this function: