NeoMutt
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
rfc2047.c File Reference

RFC2047 MIME extensions encoding / decoding routines. More...

#include "config.h"
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <iconv.h>
#include <stdbool.h>
#include <string.h>
#include "mutt/lib.h"
#include "address/lib.h"
#include "config/lib.h"
#include "core/lib.h"
#include "rfc2047.h"
#include "envelope.h"
#include "mime.h"
+ Include dependency graph for rfc2047.c:

Go to the source code of this file.

Macros

#define ENCWORD_LEN_MAX   75
 
#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */
 
#define HSPACE(ch)   (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))
 
#define CONTINUATION_BYTE(ch)   (((ch) &0xc0) == 0x80)
 

Typedefs

typedef size_t(* encoder_t) (char *res, const char *buf, size_t buflen, const char *tocode)
 

Functions

static size_t b_encoder (char *res, const char *src, size_t srclen, const char *tocode)
 Base64 Encode a string - Implements encoder_t -.
 
static size_t q_encoder (char *res, const char *src, size_t srclen, const char *tocode)
 Quoted-printable Encode a string - Implements encoder_t -.
 
static char * parse_encoded_word (char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
 Parse a string and report RFC2047 elements.
 
static size_t try_block (const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Attempt to convert a block of text.
 
static size_t encode_block (char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
 Encode a block of text using an encoder.
 
static size_t choose_block (char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Calculate how much data can be converted.
 
static void finalize_chunk (struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
 Perform charset conversion and filtering.
 
static char * decode_word (const char *s, size_t len, enum ContentEncoding enc)
 Decode an RFC2047-encoded string.
 
static int encode (const char *d, size_t dlen, int col, const char *fromcode, const struct Slist *charsets, char **e, size_t *elen, const char *specials)
 RFC2047-encode a string.
 
void rfc2047_encode (char **pd, const char *specials, int col, const struct Slist *charsets)
 RFC-2047-encode a string.
 
void rfc2047_decode (char **pd)
 Decode any RFC2047-encoded header fields.
 
void rfc2047_encode_addrlist (struct AddressList *al, const char *tag)
 Encode any RFC2047 headers, where required, in an Address list.
 
void rfc2047_decode_addrlist (struct AddressList *al)
 Decode any RFC2047 headers in an Address list.
 
void rfc2047_decode_envelope (struct Envelope *env)
 Decode the fields of an Envelope.
 
void rfc2047_encode_envelope (struct Envelope *env)
 Encode the fields of an Envelope.
 

Detailed Description

RFC2047 MIME extensions encoding / decoding routines.

Authors
  • Michael R. Elkins
  • Edmund Grimley Evans
  • Pietro Cerutti

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file rfc2047.c.

Macro Definition Documentation

◆ ENCWORD_LEN_MAX

#define ENCWORD_LEN_MAX   75

Definition at line 46 of file rfc2047.c.

◆ ENCWORD_LEN_MIN

#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */

Definition at line 47 of file rfc2047.c.

◆ HSPACE

#define HSPACE (   ch)    (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))

Definition at line 49 of file rfc2047.c.

◆ CONTINUATION_BYTE

#define CONTINUATION_BYTE (   ch)    (((ch) &0xc0) == 0x80)

Definition at line 51 of file rfc2047.c.

Typedef Documentation

◆ encoder_t

typedef size_t(* encoder_t) (char *res, const char *buf, size_t buflen, const char *tocode)

Definition at line 64 of file rfc2047.c.

Function Documentation

◆ parse_encoded_word()

static char * parse_encoded_word ( char *  str,
enum ContentEncoding enc,
char **  charset,
size_t *  charsetlen,
char **  text,
size_t *  textlen 
)
static

Parse a string and report RFC2047 elements.

Parameters
[in]strString to parse
[out]encContent encoding found in the first RFC2047 word
[out]charsetCharset found in the first RFC2047 word
[out]charsetlenLength of the charset string found
[out]textStart of the first RFC2047 encoded text
[out]textlenLength of the encoded text found
Return values
ptrStart of the RFC2047 encoded word
NULLNone was found

Definition at line 147 of file rfc2047.c.

149{
150 regmatch_t *match = mutt_prex_capture(PREX_RFC2047_ENCODED_WORD, str);
151 if (!match)
152 return NULL;
153
154 const regmatch_t *mfull = &match[PREX_RFC2047_ENCODED_WORD_MATCH_FULL];
155 const regmatch_t *mcharset = &match[PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET];
156 const regmatch_t *mencoding = &match[PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING];
157 const regmatch_t *mtext = &match[PREX_RFC2047_ENCODED_WORD_MATCH_TEXT];
158
159 /* Charset */
160 *charset = str + mutt_regmatch_start(mcharset);
161 *charsetlen = mutt_regmatch_len(mcharset);
162
163 /* Encoding: either Q or B */
164 *enc = (tolower(str[mutt_regmatch_start(mencoding)]) == 'q') ? ENC_QUOTED_PRINTABLE : ENC_BASE64;
165
166 *text = str + mutt_regmatch_start(mtext);
167 *textlen = mutt_regmatch_len(mtext);
168 return str + mutt_regmatch_start(mfull);
169}
@ ENC_BASE64
Base-64 encoded text.
Definition: mime.h:52
@ ENC_QUOTED_PRINTABLE
Quoted-printable text.
Definition: mime.h:51
regmatch_t * mutt_prex_capture(enum Prex which, const char *str)
Match a precompiled regex against a string.
Definition: prex.c:289
@ PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING
=?utf-8?[Q]?=E8=81...?=
Definition: prex.h:96
@ PREX_RFC2047_ENCODED_WORD_MATCH_TEXT
=?utf-8?Q?[=E8=81...]?=
Definition: prex.h:97
@ PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET
=?[utf-8]?Q?=E8=81...?=
Definition: prex.h:95
@ PREX_RFC2047_ENCODED_WORD_MATCH_FULL
[=?utf-8?Q?=E8=81...?=]
Definition: prex.h:94
@ PREX_RFC2047_ENCODED_WORD
[=?utf-8?Q?=E8=81=AA=E6=98=8E=E7=9A=84?=]
Definition: prex.h:35
static size_t mutt_regmatch_len(const regmatch_t *match)
Return the length of a match.
Definition: regex3.h:80
static regoff_t mutt_regmatch_start(const regmatch_t *match)
Return the start of a match.
Definition: regex3.h:60
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ try_block()

static size_t try_block ( const char *  d,
size_t  dlen,
const char *  fromcode,
const char *  tocode,
encoder_t encoder,
size_t *  wlen 
)
static

Attempt to convert a block of text.

Parameters
dString to convert
dlenLength of string
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
0Success, string converted
>0Error, number of bytes that could be converted

If the data could be converted using encoder, then set *encoder and *wlen. Otherwise return an upper bound on the maximum length of the data which could be converted.

The data is converted from fromcode (which must be stateless) to tocode, unless fromcode is NULL, in which case the data is assumed to be already in tocode, which should be 8-bit and stateless.

Definition at line 190 of file rfc2047.c.

192{
193 char buf[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
194 const char *ib = NULL;
195 char *ob = NULL;
196 size_t ibl, obl;
197 int count, len, len_b, len_q;
198
199 if (fromcode)
200 {
201 iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, MUTT_ICONV_NO_FLAGS);
202 assert(iconv_t_valid(cd));
203 ib = d;
204 ibl = dlen;
205 ob = buf;
206 obl = sizeof(buf) - strlen(tocode);
207 if ((iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl) == ICONV_ILLEGAL_SEQ) ||
208 (iconv(cd, NULL, NULL, &ob, &obl) == ICONV_ILLEGAL_SEQ))
209 {
210 assert(errno == E2BIG);
211 assert(ib > d);
212 return ((ib - d) == dlen) ? dlen : ib - d + 1;
213 }
214 }
215 else
216 {
217 if (dlen > (sizeof(buf) - strlen(tocode)))
218 return sizeof(buf) - strlen(tocode) + 1;
219 memcpy(buf, d, dlen);
220 ob = buf + dlen;
221 }
222
223 count = 0;
224 for (char *p = buf; p < ob; p++)
225 {
226 unsigned char c = *p;
227 assert(strchr(MimeSpecials, '?'));
228 if ((c >= 0x7f) || (c < 0x20) || (*p == '_') ||
229 ((c != ' ') && strchr(MimeSpecials, *p)))
230 {
231 count++;
232 }
233 }
234
235 len = ENCWORD_LEN_MIN - 2 + strlen(tocode);
236 len_b = len + (((ob - buf) + 2) / 3) * 4;
237 len_q = len + (ob - buf) + 2 * count;
238
239 /* Apparently RFC1468 says to use B encoding for iso-2022-jp. */
240 if (mutt_istr_equal(tocode, "ISO-2022-JP"))
241 len_q = ENCWORD_LEN_MAX + 1;
242
243 if ((len_b < len_q) && (len_b <= ENCWORD_LEN_MAX))
244 {
245 *encoder = b_encoder;
246 *wlen = len_b;
247 return 0;
248 }
249 else if (len_q <= ENCWORD_LEN_MAX)
250 {
251 *encoder = q_encoder;
252 *wlen = len_q;
253 return 0;
254 }
255 else
256 {
257 return dlen;
258 }
259}
static size_t b_encoder(char *res, const char *src, size_t srclen, const char *tocode)
Base64 Encode a string - Implements encoder_t -.
Definition: rfc2047.c:69
static size_t q_encoder(char *res, const char *src, size_t srclen, const char *tocode)
Quoted-printable Encode a string - Implements encoder_t -.
Definition: rfc2047.c:102
const char MimeSpecials[]
Characters that need special treatment in MIME.
Definition: mime.c:67
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, uint8_t flags)
Set up iconv for conversions.
Definition: charset.c:589
#define MUTT_ICONV_NO_FLAGS
No flags are set.
Definition: charset.h:71
#define ICONV_ILLEGAL_SEQ
Error value for iconv() - Illegal sequence.
Definition: charset.h:103
static bool iconv_t_valid(const iconv_t cd)
Is the conversion descriptor valid?
Definition: charset.h:112
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:810
#define ENCWORD_LEN_MIN
Definition: rfc2047.c:47
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:46
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode_block()

static size_t encode_block ( char *  str,
char *  buf,
size_t  buflen,
const char *  fromcode,
const char *  tocode,
encoder_t  encoder 
)
static

Encode a block of text using an encoder.

Parameters
strString to convert
bufBuffer for result
buflenBuffer length
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
Return values
numLength of the encoded word

Encode the data (buf, buflen) into str using the encoder.

Definition at line 273 of file rfc2047.c.

275{
276 if (!fromcode)
277 {
278 return (*encoder)(str, buf, buflen, tocode);
279 }
280
281 const iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, MUTT_ICONV_NO_FLAGS);
282 assert(iconv_t_valid(cd));
283 const char *ib = buf;
284 size_t ibl = buflen;
285 char tmp[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
286 char *ob = tmp;
287 size_t obl = sizeof(tmp) - strlen(tocode);
288 const size_t n1 = iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl);
289 const size_t n2 = iconv(cd, NULL, NULL, &ob, &obl);
290 assert((n1 != ICONV_ILLEGAL_SEQ) && (n2 != ICONV_ILLEGAL_SEQ));
291 return (*encoder)(str, tmp, ob - tmp, tocode);
292}
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ choose_block()

static size_t choose_block ( char *  d,
size_t  dlen,
int  col,
const char *  fromcode,
const char *  tocode,
encoder_t encoder,
size_t *  wlen 
)
static

Calculate how much data can be converted.

Parameters
dString to convert
dlenLength of string
colStarting column to convert
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
numBytes that can be converted

Discover how much of the data (d, dlen) can be converted into a single encoded word. Return how much data can be converted, and set the length *wlen of the encoded word and *encoder. We start in column col, which limits the length of the word.

Definition at line 310 of file rfc2047.c.

312{
313 const bool utf8 = fromcode && mutt_istr_equal(fromcode, "utf-8");
314
315 size_t n = dlen;
316 while (true)
317 {
318 assert(n > 0);
319 const size_t nn = try_block(d, n, fromcode, tocode, encoder, wlen);
320 if ((nn == 0) && (((col + *wlen) <= (ENCWORD_LEN_MAX + 1)) || (n <= 1)))
321 break;
322 n = ((nn != 0) ? nn : n) - 1;
323 assert(n > 0);
324 if (utf8)
325 while ((n > 1) && CONTINUATION_BYTE(d[n]))
326 n--;
327 }
328 return n;
329}
#define CONTINUATION_BYTE(ch)
Definition: rfc2047.c:51
static size_t try_block(const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Attempt to convert a block of text.
Definition: rfc2047.c:190
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ finalize_chunk()

static void finalize_chunk ( struct Buffer res,
struct Buffer buf,
char *  charset,
size_t  charsetlen 
)
static

Perform charset conversion and filtering.

Parameters
[out]resBuffer where the resulting string is appended
[in]bufBuffer with the input string
[in]charsetCharset to use for the conversion
[in]charsetlenLength of the charset parameter

The buffer buf is reinitialized at the end of this function.

Definition at line 340 of file rfc2047.c.

341{
342 if (!charset)
343 return;
344 char end = charset[charsetlen];
345 charset[charsetlen] = '\0';
347 charset[charsetlen] = end;
349 buf_addstr(res, buf->data);
350 FREE(&buf->data);
351 buf_init(buf);
352}
struct Buffer * buf_init(struct Buffer *buf)
Initialise a new Buffer.
Definition: buffer.c:55
size_t buf_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:238
const char * cc_charset(void)
Get the cached value of $charset.
Definition: config_cache.c:115
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
Definition: mbyte.c:419
#define FREE(x)
Definition: memory.h:45
int mutt_ch_convert_string(char **ps, const char *from, const char *to, uint8_t flags)
Convert a string between encodings.
Definition: charset.c:826
#define MUTT_ICONV_HOOK_FROM
apply charset-hooks to fromcode
Definition: charset.h:72
char * data
Pointer to data.
Definition: buffer.h:35
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ decode_word()

static char * decode_word ( const char *  s,
size_t  len,
enum ContentEncoding  enc 
)
static

Decode an RFC2047-encoded string.

Parameters
sString to decode
lenLength of the string
encEncoding type
Return values
ptrDecoded string
Note
The input string must be null-terminated; the len parameter is an optimization. The caller must free the returned string.

Definition at line 364 of file rfc2047.c.

365{
366 const char *it = s;
367 const char *end = s + len;
368
369 assert(*end == '\0');
370
371 if (enc == ENC_QUOTED_PRINTABLE)
372 {
373 struct Buffer buf = buf_make(0);
374 for (; it < end; it++)
375 {
376 if (*it == '_')
377 {
378 buf_addch(&buf, ' ');
379 }
380 else if ((it[0] == '=') && (!(it[1] & ~127) && (hexval(it[1]) != -1)) &&
381 (!(it[2] & ~127) && (hexval(it[2]) != -1)))
382 {
383 buf_addch(&buf, (hexval(it[1]) << 4) | hexval(it[2]));
384 it += 2;
385 }
386 else
387 {
388 buf_addch(&buf, *it);
389 }
390 }
391 buf_addch(&buf, '\0');
392 return buf.data;
393 }
394 else if (enc == ENC_BASE64)
395 {
396 const int olen = 3 * len / 4 + 1;
397 char *out = mutt_mem_malloc(olen);
398 int dlen = mutt_b64_decode(it, out, olen);
399 if (dlen == -1)
400 {
401 FREE(&out);
402 return NULL;
403 }
404 out[dlen] = '\0';
405 return out;
406 }
407
408 assert(0); /* The enc parameter has an invalid value */
409 return NULL;
410}
int mutt_b64_decode(const char *in, char *out, size_t olen)
Convert null-terminated base64 string to raw bytes.
Definition: base64.c:136
struct Buffer buf_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:70
size_t buf_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
Definition: buffer.c:253
void * mutt_mem_malloc(size_t size)
Allocate memory on the heap.
Definition: memory.c:90
#define hexval(ch)
Definition: mime.h:80
String manipulation buffer.
Definition: buffer.h:34
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode()

static int encode ( const char *  d,
size_t  dlen,
int  col,
const char *  fromcode,
const struct Slist charsets,
char **  e,
size_t *  elen,
const char *  specials 
)
static

RFC2047-encode a string.

Parameters
[in]dString to convert
[in]dlenLength of string
[in]colStarting column to convert
[in]fromcodeOriginal encoding
[in]charsetsList of allowable encodings (colon separated)
[out]eEncoded string
[out]elenLength of encoded string
[in]specialsSpecial characters to be encoded
Return values
0Success

Definition at line 424 of file rfc2047.c.

426{
427 int rc = 0;
428 char *buf = NULL;
429 size_t bufpos, buflen;
430 char *t0 = NULL, *t1 = NULL, *t = NULL;
431 char *s0 = NULL, *s1 = NULL;
432 size_t ulen, r, wlen = 0;
433 encoder_t encoder = NULL;
434 char *tocode1 = NULL;
435 const char *tocode = NULL;
436 const char *icode = "utf-8";
437
438 /* Try to convert to UTF-8. */
439 char *u = mutt_strn_dup(d, dlen);
440 if (mutt_ch_convert_string(&u, fromcode, icode, MUTT_ICONV_NO_FLAGS) != 0)
441 {
442 rc = 1;
443 icode = 0;
444 }
445 ulen = mutt_str_len(u);
446
447 /* Find earliest and latest things we must encode. */
448 s0 = 0;
449 s1 = 0;
450 t0 = 0;
451 t1 = 0;
452 for (t = u; t < (u + ulen); t++)
453 {
454 if ((*t & 0x80) || ((*t == '=') && (t[1] == '?') && ((t == u) || HSPACE(*(t - 1)))))
455 {
456 if (!t0)
457 t0 = t;
458 t1 = t;
459 }
460 else if (specials && *t && strchr(specials, *t))
461 {
462 if (!s0)
463 s0 = t;
464 s1 = t;
465 }
466 }
467
468 /* If we have something to encode, include RFC822 specials */
469 if (t0 && s0 && (s0 < t0))
470 t0 = s0;
471 if (t1 && s1 && (s1 > t1))
472 t1 = s1;
473
474 if (!t0)
475 {
476 /* No encoding is required. */
477 *e = u;
478 *elen = ulen;
479 return rc;
480 }
481
482 /* Choose target charset. */
483 tocode = fromcode;
484 if (icode)
485 {
486 tocode1 = mutt_ch_choose(icode, charsets, u, ulen, 0, 0);
487 if (tocode1)
488 {
489 tocode = tocode1;
490 }
491 else
492 {
493 rc = 2;
494 icode = 0;
495 }
496 }
497
498 /* Hack to avoid labelling 8-bit data as us-ascii. */
499 if (!icode && mutt_ch_is_us_ascii(tocode))
500 tocode = "unknown-8bit";
501
502 /* Adjust t0 for maximum length of line. */
503 t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
504 if (t < u)
505 t = u;
506 if (t < t0)
507 t0 = t;
508
509 /* Adjust t0 until we can encode a character after a space. */
510 for (; t0 > u; t0--)
511 {
512 if (!HSPACE(*(t0 - 1)))
513 continue;
514 t = t0 + 1;
515 if (icode)
516 while ((t < (u + ulen)) && CONTINUATION_BYTE(*t))
517 t++;
518 if ((try_block(t0, t - t0, icode, tocode, &encoder, &wlen) == 0) &&
519 ((col + (t0 - u) + wlen) <= (ENCWORD_LEN_MAX + 1)))
520 {
521 break;
522 }
523 }
524
525 /* Adjust t1 until we can encode a character before a space. */
526 for (; t1 < (u + ulen); t1++)
527 {
528 if (!HSPACE(*t1))
529 continue;
530 t = t1 - 1;
531 if (icode)
532 while (CONTINUATION_BYTE(*t))
533 t--;
534 if ((try_block(t, t1 - t, icode, tocode, &encoder, &wlen) == 0) &&
535 ((1 + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1)))
536 {
537 break;
538 }
539 }
540
541 /* We shall encode the region [t0,t1). */
542
543 /* Initialise the output buffer with the us-ascii prefix. */
544 buflen = 2 * ulen;
545 buf = mutt_mem_malloc(buflen);
546 bufpos = t0 - u;
547 memcpy(buf, u, t0 - u);
548
549 col += t0 - u;
550
551 t = t0;
552 while (true)
553 {
554 /* Find how much we can encode. */
555 size_t n = choose_block(t, t1 - t, col, icode, tocode, &encoder, &wlen);
556 if (n == (t1 - t))
557 {
558 /* See if we can fit the us-ascii suffix, too. */
559 if ((col + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1))
560 break;
561 n = t1 - t - 1;
562 if (icode)
563 while (CONTINUATION_BYTE(t[n]))
564 n--;
565 if (n == 0)
566 {
567 /* This should only happen in the really stupid case where the
568 * only word that needs encoding is one character long, but
569 * there is too much us-ascii stuff after it to use a single
570 * encoded word. We add the next word to the encoded region
571 * and try again. */
572 assert(t1 < (u + ulen));
573 for (t1++; (t1 < (u + ulen)) && !HSPACE(*t1); t1++)
574 ; // do nothing
575
576 continue;
577 }
578 n = choose_block(t, n, col, icode, tocode, &encoder, &wlen);
579 }
580
581 /* Add to output buffer. */
582 const char *line_break = "\n\t";
583 const int lb_len = 2; /* strlen(line_break) */
584
585 if ((bufpos + wlen + lb_len) > buflen)
586 {
587 buflen = bufpos + wlen + lb_len;
588 mutt_mem_realloc(&buf, buflen);
589 }
590 r = encode_block(buf + bufpos, t, n, icode, tocode, encoder);
591 assert(r == wlen);
592 bufpos += wlen;
593 memcpy(buf + bufpos, line_break, lb_len);
594 bufpos += lb_len;
595
596 col = 1;
597
598 t += n;
599 }
600
601 /* Add last encoded word and us-ascii suffix to buffer. */
602 buflen = bufpos + wlen + (u + ulen - t1);
603 mutt_mem_realloc(&buf, buflen + 1);
604 r = encode_block(buf + bufpos, t, t1 - t, icode, tocode, encoder);
605 assert(r == wlen);
606 bufpos += wlen;
607 memcpy(buf + bufpos, t1, u + ulen - t1);
608
609 FREE(&tocode1);
610 FREE(&u);
611
612 buf[buflen] = '\0';
613
614 *e = buf;
615 *elen = buflen + 1;
616 return rc;
617}
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
char * mutt_ch_choose(const char *fromcode, const struct Slist *charsets, const char *u, size_t ulen, char **d, size_t *dlen)
Figure the best charset to encode a string.
Definition: charset.c:1106
#define mutt_ch_is_us_ascii(str)
Definition: charset.h:97
char * mutt_strn_dup(const char *begin, size_t len)
Duplicate a sub-string.
Definition: string.c:452
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:568
static size_t choose_block(char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Calculate how much data can be converted.
Definition: rfc2047.c:310
size_t(* encoder_t)(char *res, const char *buf, size_t buflen, const char *tocode)
Definition: rfc2047.c:64
static size_t encode_block(char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
Encode a block of text using an encoder.
Definition: rfc2047.c:273
#define HSPACE(ch)
Definition: rfc2047.c:49
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode()

void rfc2047_encode ( char **  pd,
const char *  specials,
int  col,
const struct Slist charsets 
)

RFC-2047-encode a string.

Parameters
[in,out]pdString to be encoded, and resulting encoded string
[in]specialsSpecial characters to be encoded
[in]colStarting index in string
[in]charsetsList of charsets to choose from

Definition at line 626 of file rfc2047.c.

627{
628 if (!pd || !*pd)
629 return;
630
631 const char *const c_charset = cc_charset();
632 if (!c_charset)
633 return;
634
635 struct Slist *fallback = NULL;
636 if (!charsets)
637 {
638 fallback = slist_parse("utf-8", SLIST_SEP_COLON);
639 charsets = fallback;
640 }
641
642 char *e = NULL;
643 size_t elen = 0;
644 encode(*pd, strlen(*pd), col, c_charset, charsets, &e, &elen, specials);
645
646 slist_free(&fallback);
647 FREE(pd);
648 *pd = e;
649}
struct Slist * slist_parse(const char *str, uint32_t flags)
Parse a list of strings into a list.
Definition: slist.c:215
void slist_free(struct Slist **ptr)
Free an Slist object.
Definition: slist.c:162
static int encode(const char *d, size_t dlen, int col, const char *fromcode, const struct Slist *charsets, char **e, size_t *elen, const char *specials)
RFC2047-encode a string.
Definition: rfc2047.c:424
#define SLIST_SEP_COLON
Definition: slist.h:35
String list.
Definition: slist.h:47
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode()

void rfc2047_decode ( char **  pd)

Decode any RFC2047-encoded header fields.

Parameters
[in,out]pdString to be decoded, and resulting decoded string

Try to decode anything that looks like a valid RFC2047 encoded header field, ignoring RFC822 parsing rules. If decoding fails, for example due to an invalid base64 string, the original input is left untouched.

Definition at line 659 of file rfc2047.c.

660{
661 if (!pd || !*pd)
662 return;
663
664 struct Buffer buf = buf_make(0); // Output buffer
665 char *s = *pd; // Read pointer
666 char *beg = NULL; // Begin of encoded word
667 enum ContentEncoding enc = ENC_OTHER; // ENC_BASE64 or ENC_QUOTED_PRINTABLE
668 char *charset = NULL; // Which charset
669 size_t charsetlen; // Length of the charset
670 char *text = NULL; // Encoded text
671 size_t textlen = 0; // Length of encoded text
672
673 /* Keep some state in case the next decoded word is using the same charset
674 * and it happens to be split in the middle of a multibyte character.
675 * See https://github.com/neomutt/neomutt/issues/1015 */
676 struct Buffer prev = buf_make(0); /* Previously decoded word */
677 char *prev_charset = NULL; /* Previously used charset */
678 size_t prev_charsetlen = 0; /* Length of the previously used charset */
679
680 const struct Slist *c_assumed_charset = cc_assumed_charset();
681 const char *c_charset = cc_charset();
682 while (*s)
683 {
684 beg = parse_encoded_word(s, &enc, &charset, &charsetlen, &text, &textlen);
685 if (beg != s)
686 {
687 /* Some non-encoded text was found */
688 size_t holelen = beg ? beg - s : mutt_str_len(s);
689
690 /* Ignore whitespace between encoded words */
691 if (beg && (mutt_str_lws_len(s, holelen) == holelen))
692 {
693 s = beg;
694 continue;
695 }
696
697 /* If we have some previously decoded text, add it now */
698 if (!buf_is_empty(&prev))
699 {
700 finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
701 }
702
703 /* Add non-encoded part */
704 if (slist_is_empty(c_assumed_charset))
705 {
706 buf_addstr_n(&buf, s, holelen);
707 }
708 else
709 {
710 char *conv = mutt_strn_dup(s, holelen);
711 mutt_ch_convert_nonmime_string(c_assumed_charset, c_charset, &conv);
712 buf_addstr(&buf, conv);
713 FREE(&conv);
714 }
715 s += holelen;
716 }
717 if (beg)
718 {
719 /* Some encoded text was found */
720 text[textlen] = '\0';
721 char *decoded = decode_word(text, textlen, enc);
722 if (!decoded)
723 {
724 buf_dealloc(&buf);
725 return;
726 }
727 if (prev.data && ((prev_charsetlen != charsetlen) ||
728 !mutt_strn_equal(prev_charset, charset, charsetlen)))
729 {
730 /* Different charset, convert the previous chunk and add it to the
731 * final result */
732 finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
733 }
734
735 buf_addstr(&prev, decoded);
736 FREE(&decoded);
737 prev_charset = charset;
738 prev_charsetlen = charsetlen;
739 s = text + textlen + 2; /* Skip final ?= */
740 }
741 }
742
743 /* Save the last chunk */
744 if (prev.data)
745 {
746 finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
747 }
748
749 buf_addch(&buf, '\0');
750 FREE(pd);
751 *pd = buf.data;
752}
size_t buf_addstr_n(struct Buffer *buf, const char *s, size_t len)
Add a string to a Buffer, expanding it if necessary.
Definition: buffer.c:108
void buf_dealloc(struct Buffer *buf)
Release the memory allocated by a buffer.
Definition: buffer.c:389
bool buf_is_empty(const struct Buffer *buf)
Is the Buffer empty?
Definition: buffer.c:303
const struct Slist * cc_assumed_charset(void)
Get the cached value of $assumed_charset.
Definition: config_cache.c:100
ContentEncoding
Content-Transfer-Encoding.
Definition: mime.h:47
@ ENC_OTHER
Encoding unknown.
Definition: mime.h:48
int mutt_ch_convert_nonmime_string(const struct Slist *const assumed_charset, const char *charset, char **ps)
Try to convert a string using a list of character sets.
Definition: charset.c:328
bool slist_is_empty(const struct Slist *list)
Is the slist empty?
Definition: slist.c:178
size_t mutt_str_lws_len(const char *s, size_t n)
Measure the linear-white-space at the beginning of a string.
Definition: string.c:700
bool mutt_strn_equal(const char *a, const char *b, size_t num)
Check for equality of two strings (to a maximum), safely.
Definition: string.c:497
static char * parse_encoded_word(char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
Parse a string and report RFC2047 elements.
Definition: rfc2047.c:147
static char * decode_word(const char *s, size_t len, enum ContentEncoding enc)
Decode an RFC2047-encoded string.
Definition: rfc2047.c:364
static void finalize_chunk(struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
Perform charset conversion and filtering.
Definition: rfc2047.c:340
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_addrlist()

void rfc2047_encode_addrlist ( struct AddressList *  al,
const char *  tag 
)

Encode any RFC2047 headers, where required, in an Address list.

Parameters
alAddressList
tagHeader tag (used for wrapping calculation)
Note
rfc2047_encode() may realloc the data pointer it's given, so work on a copy to avoid breaking the Buffer

Definition at line 762 of file rfc2047.c.

763{
764 if (!al)
765 return;
766
767 int col = tag ? strlen(tag) + 2 : 32;
768 struct Address *a = NULL;
769 char *data = NULL;
770 const struct Slist *const c_send_charset = cs_subset_slist(NeoMutt->sub, "send_charset");
771 TAILQ_FOREACH(a, al, entries)
772 {
773 if (a->personal)
774 {
775 data = buf_strdup(a->personal);
776 rfc2047_encode(&data, AddressSpecials, col, c_send_charset);
777 buf_strcpy(a->personal, data);
778 FREE(&data);
779 }
780 else if (a->group && a->mailbox)
781 {
782 data = buf_strdup(a->mailbox);
783 rfc2047_encode(&data, AddressSpecials, col, c_send_charset);
784 buf_strcpy(a->mailbox, data);
785 FREE(&data);
786 }
787 }
788}
const char AddressSpecials[]
Characters with special meaning for email addresses.
Definition: address.c:43
size_t buf_strcpy(struct Buffer *buf, const char *s)
Copy a string into a Buffer.
Definition: buffer.c:407
char * buf_strdup(const struct Buffer *buf)
Copy a Buffer's string.
Definition: buffer.c:542
const struct Slist * cs_subset_slist(const struct ConfigSubset *sub, const char *name)
Get a string-list config item by name.
Definition: helpers.c:243
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:725
void rfc2047_encode(char **pd, const char *specials, int col, const struct Slist *charsets)
RFC-2047-encode a string.
Definition: rfc2047.c:626
An email address.
Definition: address.h:36
struct Buffer * personal
Real name of address.
Definition: address.h:37
bool group
Group mailbox?
Definition: address.h:39
struct Buffer * mailbox
Mailbox and host address.
Definition: address.h:38
Container for Accounts, Notifications.
Definition: neomutt.h:41
struct ConfigSubset * sub
Inherited config items.
Definition: neomutt.h:45
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_addrlist()

void rfc2047_decode_addrlist ( struct AddressList *  al)

Decode any RFC2047 headers in an Address list.

Parameters
alAddressList
Note
rfc2047_decode() may realloc the data pointer it's given, so work on a copy to avoid breaking the Buffer

Definition at line 797 of file rfc2047.c.

798{
799 if (!al)
800 return;
801
802 const bool assumed = !slist_is_empty(cc_assumed_charset());
803 struct Address *a = NULL;
804 char *data = NULL;
805 TAILQ_FOREACH(a, al, entries)
806 {
807 if (a->personal && ((buf_find_string(a->personal, "=?")) || assumed))
808 {
809 data = buf_strdup(a->personal);
810 rfc2047_decode(&data);
811 buf_strcpy(a->personal, data);
812 FREE(&data);
813 }
814 else if (a->group && a->mailbox && buf_find_string(a->mailbox, "=?"))
815 {
816 data = buf_strdup(a->mailbox);
817 rfc2047_decode(&data);
818 buf_strcpy(a->mailbox, data);
819 FREE(&data);
820 }
821 }
822}
const char * buf_find_string(const struct Buffer *buf, const char *s)
Return a pointer to a substring found in the buffer.
Definition: buffer.c:608
void rfc2047_decode(char **pd)
Decode any RFC2047-encoded header fields.
Definition: rfc2047.c:659
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_envelope()

void rfc2047_decode_envelope ( struct Envelope env)

Decode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 828 of file rfc2047.c.

829{
830 if (!env)
831 return;
840 rfc2047_decode(&env->x_label);
841 rfc2047_decode(&env->subject);
842}
void rfc2047_decode_addrlist(struct AddressList *al)
Decode any RFC2047 headers in an Address list.
Definition: rfc2047.c:797
struct AddressList return_path
Return path for the Email.
Definition: envelope.h:58
struct AddressList to
Email's 'To' list.
Definition: envelope.h:60
struct AddressList reply_to
Email's 'reply-to'.
Definition: envelope.h:64
struct AddressList mail_followup_to
Email's 'mail-followup-to'.
Definition: envelope.h:65
struct AddressList cc
Email's 'Cc' list.
Definition: envelope.h:61
struct AddressList sender
Email's sender.
Definition: envelope.h:63
char * subject
Email's subject.
Definition: envelope.h:70
struct AddressList bcc
Email's 'Bcc' list.
Definition: envelope.h:62
char * x_label
X-Label.
Definition: envelope.h:76
struct AddressList from
Email's 'From' list.
Definition: envelope.h:59
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_envelope()

void rfc2047_encode_envelope ( struct Envelope env)

Encode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 848 of file rfc2047.c.

849{
850 if (!env)
851 return;
852 rfc2047_encode_addrlist(&env->from, "From");
853 rfc2047_encode_addrlist(&env->to, "To");
854 rfc2047_encode_addrlist(&env->cc, "Cc");
855 rfc2047_encode_addrlist(&env->bcc, "Bcc");
856 rfc2047_encode_addrlist(&env->reply_to, "Reply-To");
857 rfc2047_encode_addrlist(&env->mail_followup_to, "Mail-Followup-To");
858 rfc2047_encode_addrlist(&env->sender, "Sender");
859 const struct Slist *const c_send_charset = cs_subset_slist(NeoMutt->sub, "send_charset");
860 rfc2047_encode(&env->x_label, NULL, sizeof("X-Label:"), c_send_charset);
861 rfc2047_encode(&env->subject, NULL, sizeof("Subject:"), c_send_charset);
862}
void rfc2047_encode_addrlist(struct AddressList *al, const char *tag)
Encode any RFC2047 headers, where required, in an Address list.
Definition: rfc2047.c:762
+ Here is the call graph for this function:
+ Here is the caller graph for this function: