NeoMutt  2023-03-22-27-g3cb248
Teaching an old dog new tricks
DOXYGEN
rfc2047.c File Reference

RFC2047 MIME extensions encoding / decoding routines. More...

#include "config.h"
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <iconv.h>
#include <stdbool.h>
#include <string.h>
#include "mutt/lib.h"
#include "address/lib.h"
#include "config/lib.h"
#include "core/lib.h"
#include "rfc2047.h"
#include "envelope.h"
#include "mime.h"
+ Include dependency graph for rfc2047.c:

Go to the source code of this file.

Macros

#define ENCWORD_LEN_MAX   75
 
#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */
 
#define HSPACE(ch)   (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))
 
#define CONTINUATION_BYTE(ch)   (((ch) &0xc0) == 0x80)
 

Typedefs

typedef size_t(* encoder_t) (char *str, const char *buf, size_t buflen, const char *tocode)
 

Functions

static size_t b_encoder (char *str, const char *buf, size_t buflen, const char *tocode)
 Base64 Encode a string - Implements encoder_t -. More...
 
static size_t q_encoder (char *str, const char *buf, size_t buflen, const char *tocode)
 Quoted-printable Encode a string - Implements encoder_t -. More...
 
static char * parse_encoded_word (char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
 Parse a string and report RFC2047 elements. More...
 
static size_t try_block (const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Attempt to convert a block of text. More...
 
static size_t encode_block (char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
 Encode a block of text using an encoder. More...
 
static size_t choose_block (char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Calculate how much data can be converted. More...
 
static void finalize_chunk (struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
 Perform charset conversion and filtering. More...
 
static char * decode_word (const char *s, size_t len, enum ContentEncoding enc)
 Decode an RFC2047-encoded string. More...
 
static int encode (const char *d, size_t dlen, int col, const char *fromcode, const struct Slist *charsets, char **e, size_t *elen, const char *specials)
 RFC2047-encode a string. More...
 
void rfc2047_encode (char **pd, const char *specials, int col, const struct Slist *charsets)
 RFC-2047-encode a string. More...
 
void rfc2047_decode (char **pd)
 Decode any RFC2047-encoded header fields. More...
 
void rfc2047_encode_addrlist (struct AddressList *al, const char *tag)
 Encode any RFC2047 headers, where required, in an Address list. More...
 
void rfc2047_decode_addrlist (struct AddressList *al)
 Decode any RFC2047 headers in an Address list. More...
 
void rfc2047_decode_envelope (struct Envelope *env)
 Decode the fields of an Envelope. More...
 
void rfc2047_encode_envelope (struct Envelope *env)
 Encode the fields of an Envelope. More...
 

Detailed Description

RFC2047 MIME extensions encoding / decoding routines.

Authors
  • Michael R. Elkins
  • Edmund Grimley Evans
  • Pietro Cerutti

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file rfc2047.c.

Macro Definition Documentation

◆ ENCWORD_LEN_MAX

#define ENCWORD_LEN_MAX   75

Definition at line 46 of file rfc2047.c.

◆ ENCWORD_LEN_MIN

#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */

Definition at line 47 of file rfc2047.c.

◆ HSPACE

#define HSPACE (   ch)    (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))

Definition at line 49 of file rfc2047.c.

◆ CONTINUATION_BYTE

#define CONTINUATION_BYTE (   ch)    (((ch) &0xc0) == 0x80)

Definition at line 51 of file rfc2047.c.

Typedef Documentation

◆ encoder_t

typedef size_t(* encoder_t) (char *str, const char *buf, size_t buflen, const char *tocode)

Definition at line 64 of file rfc2047.c.

Function Documentation

◆ parse_encoded_word()

static char * parse_encoded_word ( char *  str,
enum ContentEncoding enc,
char **  charset,
size_t *  charsetlen,
char **  text,
size_t *  textlen 
)
static

Parse a string and report RFC2047 elements.

Parameters
[in]strString to parse
[out]encContent encoding found in the first RFC2047 word
[out]charsetCharset found in the first RFC2047 word
[out]charsetlenLength of the charset string found
[out]textStart of the first RFC2047 encoded text
[out]textlenLength of the encoded text found
Return values
ptrStart of the RFC2047 encoded word
NULLNone was found

Definition at line 145 of file rfc2047.c.

147{
148 regmatch_t *match = mutt_prex_capture(PREX_RFC2047_ENCODED_WORD, str);
149 if (!match)
150 return NULL;
151
152 const regmatch_t *mfull = &match[PREX_RFC2047_ENCODED_WORD_MATCH_FULL];
153 const regmatch_t *mcharset = &match[PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET];
154 const regmatch_t *mencoding = &match[PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING];
155 const regmatch_t *mtext = &match[PREX_RFC2047_ENCODED_WORD_MATCH_TEXT];
156
157 /* Charset */
158 *charset = str + mutt_regmatch_start(mcharset);
159 *charsetlen = mutt_regmatch_len(mcharset);
160
161 /* Encoding: either Q or B */
162 *enc = (tolower(str[mutt_regmatch_start(mencoding)]) == 'q') ? ENC_QUOTED_PRINTABLE : ENC_BASE64;
163
164 *text = str + mutt_regmatch_start(mtext);
165 *textlen = mutt_regmatch_len(mtext);
166 return str + mutt_regmatch_start(mfull);
167}
@ ENC_BASE64
Base-64 encoded text.
Definition: mime.h:52
@ ENC_QUOTED_PRINTABLE
Quoted-printable text.
Definition: mime.h:51
regmatch_t * mutt_prex_capture(enum Prex which, const char *str)
Match a precompiled regex against a string.
Definition: prex.c:308
@ PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING
=?utf-8?[Q]?=E8=81...?=
Definition: prex.h:97
@ PREX_RFC2047_ENCODED_WORD_MATCH_TEXT
=?utf-8?Q?[=E8=81...]?=
Definition: prex.h:98
@ PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET
=?[utf-8]?Q?=E8=81...?=
Definition: prex.h:96
@ PREX_RFC2047_ENCODED_WORD_MATCH_FULL
[=?utf-8?Q?=E8=81...?=]
Definition: prex.h:95
@ PREX_RFC2047_ENCODED_WORD
[=?utf-8?Q?=E8=81=AA=E6=98=8E=E7=9A=84?=]
Definition: prex.h:35
static size_t mutt_regmatch_len(const regmatch_t *match)
Return the length of a match.
Definition: regex3.h:80
static regoff_t mutt_regmatch_start(const regmatch_t *match)
Return the start of a match.
Definition: regex3.h:60
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ try_block()

static size_t try_block ( const char *  d,
size_t  dlen,
const char *  fromcode,
const char *  tocode,
encoder_t encoder,
size_t *  wlen 
)
static

Attempt to convert a block of text.

Parameters
dString to convert
dlenLength of string
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
0Success, string converted
>0Error, number of bytes that could be converted

If the data could be converted using encoder, then set *encoder and *wlen. Otherwise return an upper bound on the maximum length of the data which could be converted.

The data is converted from fromcode (which must be stateless) to tocode, unless fromcode is NULL, in which case the data is assumed to be already in tocode, which should be 8-bit and stateless.

Definition at line 188 of file rfc2047.c.

190{
191 char buf[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
192 const char *ib = NULL;
193 char *ob = NULL;
194 size_t ibl, obl;
195 int count, len, len_b, len_q;
196
197 if (fromcode)
198 {
199 iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, MUTT_ICONV_NO_FLAGS);
200 assert(cd != (iconv_t) (-1));
201 ib = d;
202 ibl = dlen;
203 ob = buf;
204 obl = sizeof(buf) - strlen(tocode);
205 if ((iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl) == (size_t) (-1)) ||
206 (iconv(cd, NULL, NULL, &ob, &obl) == (size_t) (-1)))
207 {
208 assert(errno == E2BIG);
209 iconv_close(cd);
210 assert(ib > d);
211 return ((ib - d) == dlen) ? dlen : ib - d + 1;
212 }
213 iconv_close(cd);
214 }
215 else
216 {
217 if (dlen > (sizeof(buf) - strlen(tocode)))
218 return sizeof(buf) - strlen(tocode) + 1;
219 memcpy(buf, d, dlen);
220 ob = buf + dlen;
221 }
222
223 count = 0;
224 for (char *p = buf; p < ob; p++)
225 {
226 unsigned char c = *p;
227 assert(strchr(MimeSpecials, '?'));
228 if ((c >= 0x7f) || (c < 0x20) || (*p == '_') ||
229 ((c != ' ') && strchr(MimeSpecials, *p)))
230 {
231 count++;
232 }
233 }
234
235 len = ENCWORD_LEN_MIN - 2 + strlen(tocode);
236 len_b = len + (((ob - buf) + 2) / 3) * 4;
237 len_q = len + (ob - buf) + 2 * count;
238
239 /* Apparently RFC1468 says to use B encoding for iso-2022-jp. */
240 if (mutt_istr_equal(tocode, "ISO-2022-JP"))
241 len_q = ENCWORD_LEN_MAX + 1;
242
243 if ((len_b < len_q) && (len_b <= ENCWORD_LEN_MAX))
244 {
245 *encoder = b_encoder;
246 *wlen = len_b;
247 return 0;
248 }
249 else if (len_q <= ENCWORD_LEN_MAX)
250 {
251 *encoder = q_encoder;
252 *wlen = len_q;
253 return 0;
254 }
255 else
256 {
257 return dlen;
258 }
259}
static size_t b_encoder(char *str, const char *buf, size_t buflen, const char *tocode)
Base64 Encode a string - Implements encoder_t -.
Definition: rfc2047.c:69
static size_t q_encoder(char *str, const char *buf, size_t buflen, const char *tocode)
Quoted-printable Encode a string - Implements encoder_t -.
Definition: rfc2047.c:102
const char MimeSpecials[]
Characters that need special treatment in MIME.
Definition: mime.c:67
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, uint8_t flags)
Set up iconv for conversions.
Definition: charset.c:563
#define MUTT_ICONV_NO_FLAGS
No flags are set.
Definition: charset.h:71
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:819
#define ENCWORD_LEN_MIN
Definition: rfc2047.c:47
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:46
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode_block()

static size_t encode_block ( char *  str,
char *  buf,
size_t  buflen,
const char *  fromcode,
const char *  tocode,
encoder_t  encoder 
)
static

Encode a block of text using an encoder.

Parameters
strString to convert
bufBuffer for result
buflenBuffer length
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
Return values
numLength of the encoded word

Encode the data (buf, buflen) into str using the encoder.

Definition at line 273 of file rfc2047.c.

275{
276 if (!fromcode)
277 {
278 return (*encoder)(str, buf, buflen, tocode);
279 }
280
281 const iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, MUTT_ICONV_NO_FLAGS);
282 assert(cd != (iconv_t) (-1));
283 const char *ib = buf;
284 size_t ibl = buflen;
285 char tmp[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
286 char *ob = tmp;
287 size_t obl = sizeof(tmp) - strlen(tocode);
288 const size_t n1 = iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl);
289 const size_t n2 = iconv(cd, NULL, NULL, &ob, &obl);
290 assert(n1 != (size_t) (-1) && n2 != (size_t) (-1));
291 iconv_close(cd);
292 return (*encoder)(str, tmp, ob - tmp, tocode);
293}
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ choose_block()

static size_t choose_block ( char *  d,
size_t  dlen,
int  col,
const char *  fromcode,
const char *  tocode,
encoder_t encoder,
size_t *  wlen 
)
static

Calculate how much data can be converted.

Parameters
dString to convert
dlenLength of string
colStarting column to convert
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
numBytes that can be converted

Discover how much of the data (d, dlen) can be converted into a single encoded word. Return how much data can be converted, and set the length *wlen of the encoded word and *encoder. We start in column col, which limits the length of the word.

Definition at line 311 of file rfc2047.c.

313{
314 const bool utf8 = fromcode && mutt_istr_equal(fromcode, "utf-8");
315
316 size_t n = dlen;
317 while (true)
318 {
319 assert(n > 0);
320 const size_t nn = try_block(d, n, fromcode, tocode, encoder, wlen);
321 if ((nn == 0) && (((col + *wlen) <= (ENCWORD_LEN_MAX + 1)) || (n <= 1)))
322 break;
323 n = ((nn != 0) ? nn : n) - 1;
324 assert(n > 0);
325 if (utf8)
326 while ((n > 1) && CONTINUATION_BYTE(d[n]))
327 n--;
328 }
329 return n;
330}
#define CONTINUATION_BYTE(ch)
Definition: rfc2047.c:51
static size_t try_block(const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Attempt to convert a block of text.
Definition: rfc2047.c:188
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ finalize_chunk()

static void finalize_chunk ( struct Buffer res,
struct Buffer buf,
char *  charset,
size_t  charsetlen 
)
static

Perform charset conversion and filtering.

Parameters
[out]resBuffer where the resulting string is appended
[in]bufBuffer with the input string
[in]charsetCharset to use for the conversion
[in]charsetlenLength of the charset parameter

The buffer buf is reinitialized at the end of this function.

Definition at line 341 of file rfc2047.c.

342{
343 if (!charset)
344 return;
345 char end = charset[charsetlen];
346 charset[charsetlen] = '\0';
347 const char *const c_charset = cs_subset_string(NeoMutt->sub, "charset");
348 mutt_ch_convert_string(&buf->data, charset, c_charset, MUTT_ICONV_HOOK_FROM);
349 charset[charsetlen] = end;
351 mutt_buffer_addstr(res, buf->data);
352 FREE(&buf->data);
353 mutt_buffer_init(buf);
354}
struct Buffer * mutt_buffer_init(struct Buffer *buf)
Initialise a new Buffer.
Definition: buffer.c:52
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:233
const char * cs_subset_string(const struct ConfigSubset *sub, const char *name)
Get a string config item by name.
Definition: helpers.c:317
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
Definition: mbyte.c:423
#define FREE(x)
Definition: memory.h:43
int mutt_ch_convert_string(char **ps, const char *from, const char *to, uint8_t flags)
Convert a string between encodings.
Definition: charset.c:751
#define MUTT_ICONV_HOOK_FROM
apply charset-hooks to fromcode
Definition: charset.h:72
char * data
Pointer to data.
Definition: buffer.h:35
Container for Accounts, Notifications.
Definition: neomutt.h:37
struct ConfigSubset * sub
Inherited config items.
Definition: neomutt.h:39
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ decode_word()

static char * decode_word ( const char *  s,
size_t  len,
enum ContentEncoding  enc 
)
static

Decode an RFC2047-encoded string.

Parameters
sString to decode
lenLength of the string
encEncoding type
Return values
ptrDecoded string
Note
The caller must free the returned string

Definition at line 365 of file rfc2047.c.

366{
367 const char *it = s;
368 const char *end = s + len;
369
370 if (enc == ENC_QUOTED_PRINTABLE)
371 {
372 struct Buffer buf = mutt_buffer_make(0);
373 for (; it < end; it++)
374 {
375 if (*it == '_')
376 {
377 mutt_buffer_addch(&buf, ' ');
378 }
379 else if ((it[0] == '=') && (!(it[1] & ~127) && (hexval(it[1]) != -1)) &&
380 (!(it[2] & ~127) && (hexval(it[2]) != -1)))
381 {
382 mutt_buffer_addch(&buf, (hexval(it[1]) << 4) | hexval(it[2]));
383 it += 2;
384 }
385 else
386 {
387 mutt_buffer_addch(&buf, *it);
388 }
389 }
390 mutt_buffer_addch(&buf, '\0');
391 return buf.data;
392 }
393 else if (enc == ENC_BASE64)
394 {
395 const int olen = 3 * len / 4 + 1;
396 char *out = mutt_mem_malloc(olen);
397 int dlen = mutt_b64_decode(it, out, olen);
398 if (dlen == -1)
399 {
400 FREE(&out);
401 return NULL;
402 }
403 out[dlen] = '\0';
404 return out;
405 }
406
407 assert(0); /* The enc parameter has an invalid value */
408 return NULL;
409}
int mutt_b64_decode(const char *in, char *out, size_t olen)
Convert null-terminated base64 string to raw bytes.
Definition: base64.c:136
struct Buffer mutt_buffer_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:67
size_t mutt_buffer_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
Definition: buffer.c:248
void * mutt_mem_malloc(size_t size)
Allocate memory on the heap.
Definition: memory.c:90
#define hexval(ch)
Definition: mime.h:80
String manipulation buffer.
Definition: buffer.h:34
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode()

static int encode ( const char *  d,
size_t  dlen,
int  col,
const char *  fromcode,
const struct Slist charsets,
char **  e,
size_t *  elen,
const char *  specials 
)
static

RFC2047-encode a string.

Parameters
[in]dString to convert
[in]dlenLength of string
[in]colStarting column to convert
[in]fromcodeOriginal encoding
[in]charsetsList of allowable encodings (colon separated)
[out]eEncoded string
[out]elenLength of encoded string
[in]specialsSpecial characters to be encoded
Return values
0Success

Definition at line 423 of file rfc2047.c.

425{
426 int rc = 0;
427 char *buf = NULL;
428 size_t bufpos, buflen;
429 char *t0 = NULL, *t1 = NULL, *t = NULL;
430 char *s0 = NULL, *s1 = NULL;
431 size_t ulen, r, wlen = 0;
432 encoder_t encoder = NULL;
433 char *tocode1 = NULL;
434 const char *tocode = NULL;
435 const char *icode = "utf-8";
436
437 /* Try to convert to UTF-8. */
438 char *u = mutt_strn_dup(d, dlen);
439 if (mutt_ch_convert_string(&u, fromcode, icode, MUTT_ICONV_NO_FLAGS) != 0)
440 {
441 rc = 1;
442 icode = 0;
443 }
444 ulen = mutt_str_len(u);
445
446 /* Find earliest and latest things we must encode. */
447 s0 = 0;
448 s1 = 0;
449 t0 = 0;
450 t1 = 0;
451 for (t = u; t < (u + ulen); t++)
452 {
453 if ((*t & 0x80) || ((*t == '=') && (t[1] == '?') && ((t == u) || HSPACE(*(t - 1)))))
454 {
455 if (!t0)
456 t0 = t;
457 t1 = t;
458 }
459 else if (specials && *t && strchr(specials, *t))
460 {
461 if (!s0)
462 s0 = t;
463 s1 = t;
464 }
465 }
466
467 /* If we have something to encode, include RFC822 specials */
468 if (t0 && s0 && (s0 < t0))
469 t0 = s0;
470 if (t1 && s1 && (s1 > t1))
471 t1 = s1;
472
473 if (!t0)
474 {
475 /* No encoding is required. */
476 *e = u;
477 *elen = ulen;
478 return rc;
479 }
480
481 /* Choose target charset. */
482 tocode = fromcode;
483 if (icode)
484 {
485 tocode1 = mutt_ch_choose(icode, charsets, u, ulen, 0, 0);
486 if (tocode1)
487 {
488 tocode = tocode1;
489 }
490 else
491 {
492 rc = 2;
493 icode = 0;
494 }
495 }
496
497 /* Hack to avoid labelling 8-bit data as us-ascii. */
498 if (!icode && mutt_ch_is_us_ascii(tocode))
499 tocode = "unknown-8bit";
500
501 /* Adjust t0 for maximum length of line. */
502 t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
503 if (t < u)
504 t = u;
505 if (t < t0)
506 t0 = t;
507
508 /* Adjust t0 until we can encode a character after a space. */
509 for (; t0 > u; t0--)
510 {
511 if (!HSPACE(*(t0 - 1)))
512 continue;
513 t = t0 + 1;
514 if (icode)
515 while ((t < (u + ulen)) && CONTINUATION_BYTE(*t))
516 t++;
517 if ((try_block(t0, t - t0, icode, tocode, &encoder, &wlen) == 0) &&
518 ((col + (t0 - u) + wlen) <= (ENCWORD_LEN_MAX + 1)))
519 {
520 break;
521 }
522 }
523
524 /* Adjust t1 until we can encode a character before a space. */
525 for (; t1 < (u + ulen); t1++)
526 {
527 if (!HSPACE(*t1))
528 continue;
529 t = t1 - 1;
530 if (icode)
531 while (CONTINUATION_BYTE(*t))
532 t--;
533 if ((try_block(t, t1 - t, icode, tocode, &encoder, &wlen) == 0) &&
534 ((1 + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1)))
535 {
536 break;
537 }
538 }
539
540 /* We shall encode the region [t0,t1). */
541
542 /* Initialise the output buffer with the us-ascii prefix. */
543 buflen = 2 * ulen;
544 buf = mutt_mem_malloc(buflen);
545 bufpos = t0 - u;
546 memcpy(buf, u, t0 - u);
547
548 col += t0 - u;
549
550 t = t0;
551 while (true)
552 {
553 /* Find how much we can encode. */
554 size_t n = choose_block(t, t1 - t, col, icode, tocode, &encoder, &wlen);
555 if (n == (t1 - t))
556 {
557 /* See if we can fit the us-ascii suffix, too. */
558 if ((col + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1))
559 break;
560 n = t1 - t - 1;
561 if (icode)
562 while (CONTINUATION_BYTE(t[n]))
563 n--;
564 if (n == 0)
565 {
566 /* This should only happen in the really stupid case where the
567 * only word that needs encoding is one character long, but
568 * there is too much us-ascii stuff after it to use a single
569 * encoded word. We add the next word to the encoded region
570 * and try again. */
571 assert(t1 < (u + ulen));
572 for (t1++; (t1 < (u + ulen)) && !HSPACE(*t1); t1++)
573 ; // do nothing
574
575 continue;
576 }
577 n = choose_block(t, n, col, icode, tocode, &encoder, &wlen);
578 }
579
580 /* Add to output buffer. */
581 const char *line_break = "\n\t";
582 const int lb_len = 2; /* strlen(line_break) */
583
584 if ((bufpos + wlen + lb_len) > buflen)
585 {
586 buflen = bufpos + wlen + lb_len;
587 mutt_mem_realloc(&buf, buflen);
588 }
589 r = encode_block(buf + bufpos, t, n, icode, tocode, encoder);
590 assert(r == wlen);
591 bufpos += wlen;
592 memcpy(buf + bufpos, line_break, lb_len);
593 bufpos += lb_len;
594
595 col = 1;
596
597 t += n;
598 }
599
600 /* Add last encoded word and us-ascii suffix to buffer. */
601 buflen = bufpos + wlen + (u + ulen - t1);
602 mutt_mem_realloc(&buf, buflen + 1);
603 r = encode_block(buf + bufpos, t, t1 - t, icode, tocode, encoder);
604 assert(r == wlen);
605 bufpos += wlen;
606 memcpy(buf + bufpos, t1, u + ulen - t1);
607
608 FREE(&tocode1);
609 FREE(&u);
610
611 buf[buflen] = '\0';
612
613 *e = buf;
614 *elen = buflen + 1;
615 return rc;
616}
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
char * mutt_ch_choose(const char *fromcode, const struct Slist *charsets, const char *u, size_t ulen, char **d, size_t *dlen)
Figure the best charset to encode a string.
Definition: charset.c:1036
#define mutt_ch_is_us_ascii(str)
Definition: charset.h:96
char * mutt_strn_dup(const char *begin, size_t len)
Duplicate a sub-string.
Definition: string.c:451
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:567
size_t(* encoder_t)(char *str, const char *buf, size_t buflen, const char *tocode)
Definition: rfc2047.c:64
static size_t choose_block(char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Calculate how much data can be converted.
Definition: rfc2047.c:311
static size_t encode_block(char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
Encode a block of text using an encoder.
Definition: rfc2047.c:273
#define HSPACE(ch)
Definition: rfc2047.c:49
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode()

void rfc2047_encode ( char **  pd,
const char *  specials,
int  col,
const struct Slist charsets 
)

RFC-2047-encode a string.

Parameters
[in,out]pdString to be encoded, and resulting encoded string
[in]specialsSpecial characters to be encoded
[in]colStarting index in string
[in]charsetsList of charsets to choose from

Definition at line 625 of file rfc2047.c.

626{
627 const char *const c_charset = cs_subset_string(NeoMutt->sub, "charset");
628 if (!c_charset || !pd || !*pd)
629 return;
630
631 struct Slist *fallback = NULL;
632 if (!charsets)
633 {
634 fallback = slist_parse("utf-8", SLIST_SEP_COLON);
635 charsets = fallback;
636 }
637
638 char *e = NULL;
639 size_t elen = 0;
640 encode(*pd, strlen(*pd), col, c_charset, charsets, &e, &elen, specials);
641
642 slist_free(&fallback);
643 FREE(pd);
644 *pd = e;
645}
struct Slist * slist_parse(const char *str, uint32_t flags)
Parse a list of strings into a list.
Definition: slist.c:200
void slist_free(struct Slist **list)
Free an Slist object.
Definition: slist.c:162
static int encode(const char *d, size_t dlen, int col, const char *fromcode, const struct Slist *charsets, char **e, size_t *elen, const char *specials)
RFC2047-encode a string.
Definition: rfc2047.c:423
#define SLIST_SEP_COLON
Definition: slist.h:35
String list.
Definition: slist.h:47
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode()

void rfc2047_decode ( char **  pd)

Decode any RFC2047-encoded header fields.

Parameters
[in,out]pdString to be decoded, and resulting decoded string

Try to decode anything that looks like a valid RFC2047 encoded header field, ignoring RFC822 parsing rules. If decoding fails, for example due to an invalid base64 string, the original input is left untouched.

Definition at line 655 of file rfc2047.c.

656{
657 if (!pd || !*pd)
658 return;
659
660 struct Buffer buf = mutt_buffer_make(0); /* Output buffer */
661 char *s = *pd; /* Read pointer */
662 char *beg = NULL; /* Begin of encoded word */
663 enum ContentEncoding enc; /* ENC_BASE64 or ENC_QUOTED_PRINTABLE */
664 char *charset = NULL; /* Which charset */
665 size_t charsetlen; /* Length of the charset */
666 char *text = NULL; /* Encoded text */
667 size_t textlen; /* Length of encoded text */
668
669 /* Keep some state in case the next decoded word is using the same charset
670 * and it happens to be split in the middle of a multibyte character.
671 * See https://github.com/neomutt/neomutt/issues/1015 */
672 struct Buffer prev = mutt_buffer_make(0); /* Previously decoded word */
673 char *prev_charset = NULL; /* Previously used charset */
674 size_t prev_charsetlen = 0; /* Length of the previously used charset */
675
676 while (*s)
677 {
678 beg = parse_encoded_word(s, &enc, &charset, &charsetlen, &text, &textlen);
679 if (beg != s)
680 {
681 /* Some non-encoded text was found */
682 size_t holelen = beg ? beg - s : mutt_str_len(s);
683
684 /* Ignore whitespace between encoded words */
685 if (beg && (mutt_str_lws_len(s, holelen) == holelen))
686 {
687 s = beg;
688 continue;
689 }
690
691 /* If we have some previously decoded text, add it now */
692 if (!mutt_buffer_is_empty(&prev))
693 {
694 finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
695 }
696
697 /* Add non-encoded part */
698 {
699 const struct Slist *const c_assumed_charset = cs_subset_slist(NeoMutt->sub, "assumed_charset");
700 if (c_assumed_charset)
701 {
702 char *conv = mutt_strn_dup(s, holelen);
703 const char *const c_charset = cs_subset_string(NeoMutt->sub, "charset");
704 mutt_ch_convert_nonmime_string(c_assumed_charset, c_charset, &conv);
705 mutt_buffer_addstr(&buf, conv);
706 FREE(&conv);
707 }
708 else
709 {
710 mutt_buffer_addstr_n(&buf, s, holelen);
711 }
712 }
713 s += holelen;
714 }
715 if (beg)
716 {
717 /* Some encoded text was found */
718 text[textlen] = '\0';
719 char *decoded = decode_word(text, textlen, enc);
720 if (!decoded)
721 {
723 return;
724 }
725 if (prev.data && ((prev_charsetlen != charsetlen) ||
726 !mutt_strn_equal(prev_charset, charset, charsetlen)))
727 {
728 /* Different charset, convert the previous chunk and add it to the
729 * final result */
730 finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
731 }
732
733 mutt_buffer_addstr(&prev, decoded);
734 FREE(&decoded);
735 prev_charset = charset;
736 prev_charsetlen = charsetlen;
737 s = text + textlen + 2; /* Skip final ?= */
738 }
739 }
740
741 /* Save the last chunk */
742 if (prev.data)
743 {
744 finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
745 }
746
747 mutt_buffer_addch(&buf, '\0');
748 FREE(pd);
749 *pd = buf.data;
750}
bool mutt_buffer_is_empty(const struct Buffer *buf)
Is the Buffer empty?
Definition: buffer.c:298
void mutt_buffer_dealloc(struct Buffer *buf)
Release the memory allocated by a buffer.
Definition: buffer.c:347
size_t mutt_buffer_addstr_n(struct Buffer *buf, const char *s, size_t len)
Add a string to a Buffer, expanding it if necessary.
Definition: buffer.c:105
const struct Slist * cs_subset_slist(const struct ConfigSubset *sub, const char *name)
Get a string-list config item by name.
Definition: helpers.c:268
ContentEncoding
Content-Transfer-Encoding.
Definition: mime.h:47
int mutt_ch_convert_nonmime_string(const struct Slist *const assumed_charset, const char *charset, char **ps)
Try to convert a string using a list of character sets.
Definition: charset.c:307
size_t mutt_str_lws_len(const char *s, size_t n)
Measure the linear-white-space at the beginning of a string.
Definition: string.c:709
bool mutt_strn_equal(const char *a, const char *b, size_t num)
Check for equality of two strings (to a maximum), safely.
Definition: string.c:496
static char * parse_encoded_word(char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
Parse a string and report RFC2047 elements.
Definition: rfc2047.c:145
static char * decode_word(const char *s, size_t len, enum ContentEncoding enc)
Decode an RFC2047-encoded string.
Definition: rfc2047.c:365
static void finalize_chunk(struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
Perform charset conversion and filtering.
Definition: rfc2047.c:341
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_addrlist()

void rfc2047_encode_addrlist ( struct AddressList *  al,
const char *  tag 
)

Encode any RFC2047 headers, where required, in an Address list.

Parameters
alAddressList
tagHeader tag (used for wrapping calculation)

Definition at line 757 of file rfc2047.c.

758{
759 if (!al)
760 return;
761
762 int col = tag ? strlen(tag) + 2 : 32;
763 struct Address *a = NULL;
764 TAILQ_FOREACH(a, al, entries)
765 {
766 const struct Slist *const c_send_charset = cs_subset_slist(NeoMutt->sub, "send_charset");
767 if (a->personal)
768 rfc2047_encode(&a->personal, AddressSpecials, col, c_send_charset);
769 else if (a->group && a->mailbox)
770 rfc2047_encode(&a->mailbox, AddressSpecials, col, c_send_charset);
771 }
772}
const char AddressSpecials[]
Characters with special meaning for email addresses.
Definition: address.c:42
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:725
void rfc2047_encode(char **pd, const char *specials, int col, const struct Slist *charsets)
RFC-2047-encode a string.
Definition: rfc2047.c:625
An email address.
Definition: address.h:36
bool group
Group mailbox?
Definition: address.h:39
char * mailbox
Mailbox and host address.
Definition: address.h:38
char * personal
Real name of address.
Definition: address.h:37
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_addrlist()

void rfc2047_decode_addrlist ( struct AddressList *  al)

Decode any RFC2047 headers in an Address list.

Parameters
alAddressList

Definition at line 778 of file rfc2047.c.

779{
780 if (!al)
781 return;
782
783 struct Address *a = NULL;
784 TAILQ_FOREACH(a, al, entries)
785 {
786 const struct Slist *const c_assumed_charset = cs_subset_slist(NeoMutt->sub, "assumed_charset");
787 if (a->personal && ((strstr(a->personal, "=?")) || c_assumed_charset))
788 {
790 }
791 else if (a->group && a->mailbox && strstr(a->mailbox, "=?"))
793 }
794}
void rfc2047_decode(char **pd)
Decode any RFC2047-encoded header fields.
Definition: rfc2047.c:655
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_envelope()

void rfc2047_decode_envelope ( struct Envelope env)

Decode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 800 of file rfc2047.c.

801{
802 if (!env)
803 return;
812 rfc2047_decode(&env->x_label);
813 rfc2047_decode(&env->subject);
814}
void rfc2047_decode_addrlist(struct AddressList *al)
Decode any RFC2047 headers in an Address list.
Definition: rfc2047.c:778
struct AddressList return_path
Return path for the Email.
Definition: envelope.h:58
struct AddressList to
Email's 'To' list.
Definition: envelope.h:60
struct AddressList reply_to
Email's 'reply-to'.
Definition: envelope.h:64
struct AddressList mail_followup_to
Email's 'mail-followup-to'.
Definition: envelope.h:65
struct AddressList cc
Email's 'Cc' list.
Definition: envelope.h:61
struct AddressList sender
Email's sender.
Definition: envelope.h:63
char * subject
Email's subject.
Definition: envelope.h:70
struct AddressList bcc
Email's 'Bcc' list.
Definition: envelope.h:62
char * x_label
X-Label.
Definition: envelope.h:76
struct AddressList from
Email's 'From' list.
Definition: envelope.h:59
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_envelope()

void rfc2047_encode_envelope ( struct Envelope env)

Encode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 820 of file rfc2047.c.

821{
822 if (!env)
823 return;
824 rfc2047_encode_addrlist(&env->from, "From");
825 rfc2047_encode_addrlist(&env->to, "To");
826 rfc2047_encode_addrlist(&env->cc, "Cc");
827 rfc2047_encode_addrlist(&env->bcc, "Bcc");
828 rfc2047_encode_addrlist(&env->reply_to, "Reply-To");
829 rfc2047_encode_addrlist(&env->mail_followup_to, "Mail-Followup-To");
830 rfc2047_encode_addrlist(&env->sender, "Sender");
831 const struct Slist *const c_send_charset = cs_subset_slist(NeoMutt->sub, "send_charset");
832 rfc2047_encode(&env->x_label, NULL, sizeof("X-Label:"), c_send_charset);
833 rfc2047_encode(&env->subject, NULL, sizeof("Subject:"), c_send_charset);
834}
void rfc2047_encode_addrlist(struct AddressList *al, const char *tag)
Encode any RFC2047 headers, where required, in an Address list.
Definition: rfc2047.c:757
+ Here is the call graph for this function:
+ Here is the caller graph for this function: