NeoMutt  2024-03-23-142-g2b2e76
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
rfc2047.c File Reference

RFC2047 MIME extensions encoding / decoding routines. More...

#include "config.h"
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <iconv.h>
#include <stdbool.h>
#include <string.h>
#include "mutt/lib.h"
#include "address/lib.h"
#include "config/lib.h"
#include "core/lib.h"
#include "rfc2047.h"
#include "envelope.h"
#include "mime.h"
+ Include dependency graph for rfc2047.c:

Go to the source code of this file.

Macros

#define ENCWORD_LEN_MAX   75
 
#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */
 
#define HSPACE(ch)   (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))
 
#define CONTINUATION_BYTE(ch)   (((ch) & 0xc0) == 0x80)
 

Typedefs

typedef size_t(* encoder_t) (char *res, const char *buf, size_t buflen, const char *tocode)
 

Functions

static size_t b_encoder (char *res, const char *src, size_t srclen, const char *tocode)
 Base64 Encode a string - Implements encoder_t -.
 
static size_t q_encoder (char *res, const char *src, size_t srclen, const char *tocode)
 Quoted-printable Encode a string - Implements encoder_t -.
 
static char * parse_encoded_word (char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
 Parse a string and report RFC2047 elements.
 
static size_t try_block (const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Attempt to convert a block of text.
 
static size_t encode_block (char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
 Encode a block of text using an encoder.
 
static size_t choose_block (char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Calculate how much data can be converted.
 
static void finalize_chunk (struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
 Perform charset conversion and filtering.
 
static char * decode_word (const char *s, size_t len, enum ContentEncoding enc)
 Decode an RFC2047-encoded string.
 
static int encode (const char *d, size_t dlen, int col, const char *fromcode, const struct Slist *charsets, char **e, size_t *elen, const char *specials)
 RFC2047-encode a string.
 
void rfc2047_encode (char **pd, const char *specials, int col, const struct Slist *charsets)
 RFC-2047-encode a string.
 
void rfc2047_decode (char **pd)
 Decode any RFC2047-encoded header fields.
 
void rfc2047_encode_addrlist (struct AddressList *al, const char *tag)
 Encode any RFC2047 headers, where required, in an Address list.
 
void rfc2047_decode_addrlist (struct AddressList *al)
 Decode any RFC2047 headers in an Address list.
 
void rfc2047_decode_envelope (struct Envelope *env)
 Decode the fields of an Envelope.
 
void rfc2047_encode_envelope (struct Envelope *env)
 Encode the fields of an Envelope.
 

Detailed Description

RFC2047 MIME extensions encoding / decoding routines.

Authors
  • Federico Kircheis
  • Pietro Cerutti
  • Richard Russon
  • Anna Figueiredo Gomes
  • наб

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file rfc2047.c.

Macro Definition Documentation

◆ ENCWORD_LEN_MAX

#define ENCWORD_LEN_MAX   75

Definition at line 48 of file rfc2047.c.

◆ ENCWORD_LEN_MIN

#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */

Definition at line 49 of file rfc2047.c.

◆ HSPACE

#define HSPACE (   ch)    (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))

Definition at line 51 of file rfc2047.c.

◆ CONTINUATION_BYTE

#define CONTINUATION_BYTE (   ch)    (((ch) & 0xc0) == 0x80)

Definition at line 53 of file rfc2047.c.

Typedef Documentation

◆ encoder_t

typedef size_t(* encoder_t) (char *res, const char *buf, size_t buflen, const char *tocode)

Definition at line 66 of file rfc2047.c.

Function Documentation

◆ parse_encoded_word()

static char * parse_encoded_word ( char *  str,
enum ContentEncoding enc,
char **  charset,
size_t *  charsetlen,
char **  text,
size_t *  textlen 
)
static

Parse a string and report RFC2047 elements.

Parameters
[in]strString to parse
[out]encContent encoding found in the first RFC2047 word
[out]charsetCharset found in the first RFC2047 word
[out]charsetlenLength of the charset string found
[out]textStart of the first RFC2047 encoded text
[out]textlenLength of the encoded text found
Return values
ptrStart of the RFC2047 encoded word
NULLNone was found

Definition at line 149 of file rfc2047.c.

151{
152 regmatch_t *match = mutt_prex_capture(PREX_RFC2047_ENCODED_WORD, str);
153 if (!match)
154 return NULL;
155
156 const regmatch_t *mfull = &match[PREX_RFC2047_ENCODED_WORD_MATCH_FULL];
157 const regmatch_t *mcharset = &match[PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET];
158 const regmatch_t *mencoding = &match[PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING];
159 const regmatch_t *mtext = &match[PREX_RFC2047_ENCODED_WORD_MATCH_TEXT];
160
161 /* Charset */
162 *charset = str + mutt_regmatch_start(mcharset);
163 *charsetlen = mutt_regmatch_len(mcharset);
164
165 /* Encoding: either Q or B */
166 *enc = (tolower(str[mutt_regmatch_start(mencoding)]) == 'q') ? ENC_QUOTED_PRINTABLE : ENC_BASE64;
167
168 *text = str + mutt_regmatch_start(mtext);
169 *textlen = mutt_regmatch_len(mtext);
170 return str + mutt_regmatch_start(mfull);
171}
@ ENC_BASE64
Base-64 encoded text.
Definition: mime.h:52
@ ENC_QUOTED_PRINTABLE
Quoted-printable text.
Definition: mime.h:51
regmatch_t * mutt_prex_capture(enum Prex which, const char *str)
Match a precompiled regex against a string.
Definition: prex.c:295
@ PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING
=?utf-8?[Q]?=E8=81...?=
Definition: prex.h:98
@ PREX_RFC2047_ENCODED_WORD_MATCH_TEXT
=?utf-8?Q?[=E8=81...]?=
Definition: prex.h:99
@ PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET
=?[utf-8]?Q?=E8=81...?=
Definition: prex.h:97
@ PREX_RFC2047_ENCODED_WORD_MATCH_FULL
[=?utf-8?Q?=E8=81...?=]
Definition: prex.h:96
@ PREX_RFC2047_ENCODED_WORD
[=?utf-8?Q?=E8=81=AA=E6=98=8E=E7=9A=84?=]
Definition: prex.h:36
static size_t mutt_regmatch_len(const regmatch_t *match)
Return the length of a match.
Definition: regex3.h:76
static regoff_t mutt_regmatch_start(const regmatch_t *match)
Return the start of a match.
Definition: regex3.h:56
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ try_block()

static size_t try_block ( const char *  d,
size_t  dlen,
const char *  fromcode,
const char *  tocode,
encoder_t encoder,
size_t *  wlen 
)
static

Attempt to convert a block of text.

Parameters
dString to convert
dlenLength of string
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
0Success, string converted
>0Error, number of bytes that could be converted

If the data could be converted using encoder, then set *encoder and *wlen. Otherwise return an upper bound on the maximum length of the data which could be converted.

The data is converted from fromcode (which must be stateless) to tocode, unless fromcode is NULL, in which case the data is assumed to be already in tocode, which should be 8-bit and stateless.

Definition at line 192 of file rfc2047.c.

194{
195 char buf[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
196 const char *ib = NULL;
197 char *ob = NULL;
198 size_t ibl, obl;
199 int count, len, len_b, len_q;
200
201 if (fromcode)
202 {
203 iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, MUTT_ICONV_NO_FLAGS);
204 assert(iconv_t_valid(cd));
205 ib = d;
206 ibl = dlen;
207 ob = buf;
208 obl = sizeof(buf) - strlen(tocode);
209 if ((iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl) == ICONV_ILLEGAL_SEQ) ||
210 (iconv(cd, NULL, NULL, &ob, &obl) == ICONV_ILLEGAL_SEQ))
211 {
212 assert(errno == E2BIG);
213 assert(ib > d);
214 return ((ib - d) == dlen) ? dlen : ib - d + 1;
215 }
216 }
217 else
218 {
219 if (dlen > (sizeof(buf) - strlen(tocode)))
220 return sizeof(buf) - strlen(tocode) + 1;
221 memcpy(buf, d, dlen);
222 ob = buf + dlen;
223 }
224
225 count = 0;
226 for (char *p = buf; p < ob; p++)
227 {
228 unsigned char c = *p;
229 assert(strchr(MimeSpecials, '?'));
230 if ((c >= 0x7f) || (c < 0x20) || (*p == '_') ||
231 ((c != ' ') && strchr(MimeSpecials, *p)))
232 {
233 count++;
234 }
235 }
236
237 len = ENCWORD_LEN_MIN - 2 + strlen(tocode);
238 len_b = len + (((ob - buf) + 2) / 3) * 4;
239 len_q = len + (ob - buf) + 2 * count;
240
241 /* Apparently RFC1468 says to use B encoding for iso-2022-jp. */
242 if (mutt_istr_equal(tocode, "ISO-2022-JP"))
243 len_q = ENCWORD_LEN_MAX + 1;
244
245 if ((len_b < len_q) && (len_b <= ENCWORD_LEN_MAX))
246 {
247 *encoder = b_encoder;
248 *wlen = len_b;
249 return 0;
250 }
251 else if (len_q <= ENCWORD_LEN_MAX)
252 {
253 *encoder = q_encoder;
254 *wlen = len_q;
255 return 0;
256 }
257 else
258 {
259 return dlen;
260 }
261}
static size_t b_encoder(char *res, const char *src, size_t srclen, const char *tocode)
Base64 Encode a string - Implements encoder_t -.
Definition: rfc2047.c:71
static size_t q_encoder(char *res, const char *src, size_t srclen, const char *tocode)
Quoted-printable Encode a string - Implements encoder_t -.
Definition: rfc2047.c:104
const char MimeSpecials[]
Characters that need special treatment in MIME.
Definition: mime.c:67
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, uint8_t flags)
Set up iconv for conversions.
Definition: charset.c:593
#define MUTT_ICONV_NO_FLAGS
No flags are set.
Definition: charset.h:72
#define ICONV_ILLEGAL_SEQ
Error value for iconv() - Illegal sequence.
Definition: charset.h:104
static bool iconv_t_valid(const iconv_t cd)
Is the conversion descriptor valid?
Definition: charset.h:113
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:721
#define ENCWORD_LEN_MIN
Definition: rfc2047.c:49
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:48
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode_block()

static size_t encode_block ( char *  str,
char *  buf,
size_t  buflen,
const char *  fromcode,
const char *  tocode,
encoder_t  encoder 
)
static

Encode a block of text using an encoder.

Parameters
strString to convert
bufBuffer for result
buflenBuffer length
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
Return values
numLength of the encoded word

Encode the data (buf, buflen) into str using the encoder.

Definition at line 275 of file rfc2047.c.

277{
278 if (!fromcode)
279 {
280 return (*encoder)(str, buf, buflen, tocode);
281 }
282
283 const iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, MUTT_ICONV_NO_FLAGS);
284 assert(iconv_t_valid(cd));
285 const char *ib = buf;
286 size_t ibl = buflen;
287 char tmp[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
288 char *ob = tmp;
289 size_t obl = sizeof(tmp) - strlen(tocode);
290 const size_t n1 = iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl);
291 const size_t n2 = iconv(cd, NULL, NULL, &ob, &obl);
292 assert((n1 != ICONV_ILLEGAL_SEQ) && (n2 != ICONV_ILLEGAL_SEQ));
293 return (*encoder)(str, tmp, ob - tmp, tocode);
294}
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ choose_block()

static size_t choose_block ( char *  d,
size_t  dlen,
int  col,
const char *  fromcode,
const char *  tocode,
encoder_t encoder,
size_t *  wlen 
)
static

Calculate how much data can be converted.

Parameters
dString to convert
dlenLength of string
colStarting column to convert
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
numBytes that can be converted

Discover how much of the data (d, dlen) can be converted into a single encoded word. Return how much data can be converted, and set the length *wlen of the encoded word and *encoder. We start in column col, which limits the length of the word.

Definition at line 312 of file rfc2047.c.

314{
315 const bool utf8 = fromcode && mutt_istr_equal(fromcode, "utf-8");
316
317 size_t n = dlen;
318 while (true)
319 {
320 assert(n > 0);
321 const size_t nn = try_block(d, n, fromcode, tocode, encoder, wlen);
322 if ((nn == 0) && (((col + *wlen) <= (ENCWORD_LEN_MAX + 1)) || (n <= 1)))
323 break;
324 n = ((nn != 0) ? nn : n) - 1;
325 assert(n > 0);
326 if (utf8)
327 while ((n > 1) && CONTINUATION_BYTE(d[n]))
328 n--;
329 }
330 return n;
331}
#define CONTINUATION_BYTE(ch)
Definition: rfc2047.c:53
static size_t try_block(const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Attempt to convert a block of text.
Definition: rfc2047.c:192
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ finalize_chunk()

static void finalize_chunk ( struct Buffer res,
struct Buffer buf,
char *  charset,
size_t  charsetlen 
)
static

Perform charset conversion and filtering.

Parameters
[out]resBuffer where the resulting string is appended
[in]bufBuffer with the input string
[in]charsetCharset to use for the conversion
[in]charsetlenLength of the charset parameter

The buffer buf is reinitialized at the end of this function.

Definition at line 342 of file rfc2047.c.

343{
344 if (!charset)
345 return;
346 char end = charset[charsetlen];
347 charset[charsetlen] = '\0';
349 charset[charsetlen] = end;
351 buf_addstr(res, buf->data);
352 FREE(&buf->data);
353 buf_init(buf);
354}
struct Buffer * buf_init(struct Buffer *buf)
Initialise a new Buffer.
Definition: buffer.c:60
size_t buf_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:225
const char * cc_charset(void)
Get the cached value of $charset.
Definition: config_cache.c:116
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
Definition: mbyte.c:423
#define FREE(x)
Definition: memory.h:45
int mutt_ch_convert_string(char **ps, const char *from, const char *to, uint8_t flags)
Convert a string between encodings.
Definition: charset.c:830
#define MUTT_ICONV_HOOK_FROM
apply charset-hooks to fromcode
Definition: charset.h:73
char * data
Pointer to data.
Definition: buffer.h:37
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ decode_word()

static char * decode_word ( const char *  s,
size_t  len,
enum ContentEncoding  enc 
)
static

Decode an RFC2047-encoded string.

Parameters
sString to decode
lenLength of the string
encEncoding type
Return values
ptrDecoded string
Note
The input string must be null-terminated; the len parameter is an optimization. The caller must free the returned string.

Definition at line 366 of file rfc2047.c.

367{
368 const char *it = s;
369 const char *end = s + len;
370
371 assert(*end == '\0');
372
373 if (enc == ENC_QUOTED_PRINTABLE)
374 {
375 struct Buffer *buf = buf_pool_get();
376 for (; it < end; it++)
377 {
378 if (*it == '_')
379 {
380 buf_addch(buf, ' ');
381 }
382 else if ((it[0] == '=') && (!(it[1] & ~127) && (hexval(it[1]) != -1)) &&
383 (!(it[2] & ~127) && (hexval(it[2]) != -1)))
384 {
385 buf_addch(buf, (hexval(it[1]) << 4) | hexval(it[2]));
386 it += 2;
387 }
388 else
389 {
390 buf_addch(buf, *it);
391 }
392 }
393 char *str = buf_strdup(buf);
394 buf_pool_release(&buf);
395 return str;
396 }
397 else if (enc == ENC_BASE64)
398 {
399 const int olen = 3 * len / 4 + 1;
400 char *out = mutt_mem_malloc(olen);
401 int dlen = mutt_b64_decode(it, out, olen);
402 if (dlen == -1)
403 {
404 FREE(&out);
405 return NULL;
406 }
407 out[dlen] = '\0';
408 return out;
409 }
410
411 assert(0); /* The enc parameter has an invalid value */
412 return NULL;
413}
int mutt_b64_decode(const char *in, char *out, size_t olen)
Convert null-terminated base64 string to raw bytes.
Definition: base64.c:135
size_t buf_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
Definition: buffer.c:240
char * buf_strdup(const struct Buffer *buf)
Copy a Buffer's string.
Definition: buffer.c:570
void * mutt_mem_malloc(size_t size)
Allocate memory on the heap.
Definition: memory.c:90
#define hexval(ch)
Definition: mime.h:80
struct Buffer * buf_pool_get(void)
Get a Buffer from the pool.
Definition: pool.c:81
void buf_pool_release(struct Buffer **ptr)
Return a Buffer to the pool.
Definition: pool.c:94
String manipulation buffer.
Definition: buffer.h:36
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode()

static int encode ( const char *  d,
size_t  dlen,
int  col,
const char *  fromcode,
const struct Slist charsets,
char **  e,
size_t *  elen,
const char *  specials 
)
static

RFC2047-encode a string.

Parameters
[in]dString to convert
[in]dlenLength of string
[in]colStarting column to convert
[in]fromcodeOriginal encoding
[in]charsetsList of allowable encodings (colon separated)
[out]eEncoded string
[out]elenLength of encoded string
[in]specialsSpecial characters to be encoded
Return values
0Success

Definition at line 427 of file rfc2047.c.

429{
430 int rc = 0;
431 char *buf = NULL;
432 size_t bufpos, buflen;
433 char *t0 = NULL, *t1 = NULL, *t = NULL;
434 char *s0 = NULL, *s1 = NULL;
435 size_t ulen, r, wlen = 0;
436 encoder_t encoder = NULL;
437 char *tocode1 = NULL;
438 const char *tocode = NULL;
439 const char *icode = "utf-8";
440
441 /* Try to convert to UTF-8. */
442 char *u = mutt_strn_dup(d, dlen);
443 if (mutt_ch_convert_string(&u, fromcode, icode, MUTT_ICONV_NO_FLAGS) != 0)
444 {
445 rc = 1;
446 icode = 0;
447 }
448 ulen = mutt_str_len(u);
449
450 /* Find earliest and latest things we must encode. */
451 s0 = 0;
452 s1 = 0;
453 t0 = 0;
454 t1 = 0;
455 for (t = u; t < (u + ulen); t++)
456 {
457 if ((*t & 0x80) || ((*t == '=') && (t[1] == '?') && ((t == u) || HSPACE(*(t - 1)))))
458 {
459 if (!t0)
460 t0 = t;
461 t1 = t;
462 }
463 else if (specials && *t && strchr(specials, *t))
464 {
465 if (!s0)
466 s0 = t;
467 s1 = t;
468 }
469 }
470
471 /* If we have something to encode, include RFC822 specials */
472 if (t0 && s0 && (s0 < t0))
473 t0 = s0;
474 if (t1 && s1 && (s1 > t1))
475 t1 = s1;
476
477 if (!t0)
478 {
479 /* No encoding is required. */
480 *e = u;
481 *elen = ulen;
482 return rc;
483 }
484
485 /* Choose target charset. */
486 tocode = fromcode;
487 if (icode)
488 {
489 tocode1 = mutt_ch_choose(icode, charsets, u, ulen, 0, 0);
490 if (tocode1)
491 {
492 tocode = tocode1;
493 }
494 else
495 {
496 rc = 2;
497 icode = 0;
498 }
499 }
500
501 /* Hack to avoid labelling 8-bit data as us-ascii. */
502 if (!icode && mutt_ch_is_us_ascii(tocode))
503 tocode = "unknown-8bit";
504
505 /* Adjust t0 for maximum length of line. */
506 t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
507 if (t < u)
508 t = u;
509 if (t < t0)
510 t0 = t;
511
512 /* Adjust t0 until we can encode a character after a space. */
513 for (; t0 > u; t0--)
514 {
515 if (!HSPACE(*(t0 - 1)))
516 continue;
517 t = t0 + 1;
518 if (icode)
519 while ((t < (u + ulen)) && CONTINUATION_BYTE(*t))
520 t++;
521 if ((try_block(t0, t - t0, icode, tocode, &encoder, &wlen) == 0) &&
522 ((col + (t0 - u) + wlen) <= (ENCWORD_LEN_MAX + 1)))
523 {
524 break;
525 }
526 }
527
528 /* Adjust t1 until we can encode a character before a space. */
529 for (; t1 < (u + ulen); t1++)
530 {
531 if (!HSPACE(*t1))
532 continue;
533 t = t1 - 1;
534 if (icode)
535 while (CONTINUATION_BYTE(*t))
536 t--;
537 if ((try_block(t, t1 - t, icode, tocode, &encoder, &wlen) == 0) &&
538 ((1 + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1)))
539 {
540 break;
541 }
542 }
543
544 /* We shall encode the region [t0,t1). */
545
546 /* Initialise the output buffer with the us-ascii prefix. */
547 buflen = 2 * ulen;
548 buf = mutt_mem_malloc(buflen);
549 bufpos = t0 - u;
550 memcpy(buf, u, t0 - u);
551
552 col += t0 - u;
553
554 t = t0;
555 while (true)
556 {
557 /* Find how much we can encode. */
558 size_t n = choose_block(t, t1 - t, col, icode, tocode, &encoder, &wlen);
559 if (n == (t1 - t))
560 {
561 /* See if we can fit the us-ascii suffix, too. */
562 if ((col + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1))
563 break;
564 n = t1 - t - 1;
565 if (icode)
566 while (CONTINUATION_BYTE(t[n]))
567 n--;
568 if (n == 0)
569 {
570 /* This should only happen in the really stupid case where the
571 * only word that needs encoding is one character long, but
572 * there is too much us-ascii stuff after it to use a single
573 * encoded word. We add the next word to the encoded region
574 * and try again. */
575 assert(t1 < (u + ulen));
576 for (t1++; (t1 < (u + ulen)) && !HSPACE(*t1); t1++)
577 ; // do nothing
578
579 continue;
580 }
581 n = choose_block(t, n, col, icode, tocode, &encoder, &wlen);
582 }
583
584 /* Add to output buffer. */
585 const char *line_break = "\n\t";
586 const int lb_len = 2; /* strlen(line_break) */
587
588 if ((bufpos + wlen + lb_len) > buflen)
589 {
590 buflen = bufpos + wlen + lb_len;
591 mutt_mem_realloc(&buf, buflen);
592 }
593 r = encode_block(buf + bufpos, t, n, icode, tocode, encoder);
594 assert(r == wlen);
595 bufpos += wlen;
596 memcpy(buf + bufpos, line_break, lb_len);
597 bufpos += lb_len;
598
599 col = 1;
600
601 t += n;
602 }
603
604 /* Add last encoded word and us-ascii suffix to buffer. */
605 buflen = bufpos + wlen + (u + ulen - t1);
606 mutt_mem_realloc(&buf, buflen + 1);
607 r = encode_block(buf + bufpos, t, t1 - t, icode, tocode, encoder);
608 assert(r == wlen);
609 bufpos += wlen;
610 memcpy(buf + bufpos, t1, u + ulen - t1);
611
612 FREE(&tocode1);
613 FREE(&u);
614
615 buf[buflen] = '\0';
616
617 *e = buf;
618 *elen = buflen + 1;
619 return rc;
620}
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
char * mutt_ch_choose(const char *fromcode, const struct Slist *charsets, const char *u, size_t ulen, char **d, size_t *dlen)
Figure the best charset to encode a string.
Definition: charset.c:1110
#define mutt_ch_is_us_ascii(str)
Definition: charset.h:98
char * mutt_strn_dup(const char *begin, size_t len)
Duplicate a sub-string.
Definition: string.c:429
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:545
static size_t choose_block(char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Calculate how much data can be converted.
Definition: rfc2047.c:312
size_t(* encoder_t)(char *res, const char *buf, size_t buflen, const char *tocode)
Definition: rfc2047.c:66
static size_t encode_block(char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
Encode a block of text using an encoder.
Definition: rfc2047.c:275
#define HSPACE(ch)
Definition: rfc2047.c:51
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode()

void rfc2047_encode ( char **  pd,
const char *  specials,
int  col,
const struct Slist charsets 
)

RFC-2047-encode a string.

Parameters
[in,out]pdString to be encoded, and resulting encoded string
[in]specialsSpecial characters to be encoded
[in]colStarting index in string
[in]charsetsList of charsets to choose from

Definition at line 629 of file rfc2047.c.

630{
631 if (!pd || !*pd)
632 return;
633
634 const char *const c_charset = cc_charset();
635 if (!c_charset)
636 return;
637
638 struct Slist *fallback = NULL;
639 if (!charsets)
640 {
641 fallback = slist_parse("utf-8", D_SLIST_SEP_COLON);
642 charsets = fallback;
643 }
644
645 char *e = NULL;
646 size_t elen = 0;
647 encode(*pd, strlen(*pd), col, c_charset, charsets, &e, &elen, specials);
648
649 slist_free(&fallback);
650 FREE(pd);
651 *pd = e;
652}
struct Slist * slist_parse(const char *str, uint32_t flags)
Parse a list of strings into a list.
Definition: slist.c:179
void slist_free(struct Slist **ptr)
Free an Slist object.
Definition: slist.c:126
static int encode(const char *d, size_t dlen, int col, const char *fromcode, const struct Slist *charsets, char **e, size_t *elen, const char *specials)
RFC2047-encode a string.
Definition: rfc2047.c:427
String list.
Definition: slist.h:37
#define D_SLIST_SEP_COLON
Slist items are colon-separated.
Definition: types.h:112
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode()

void rfc2047_decode ( char **  pd)

Decode any RFC2047-encoded header fields.

Parameters
[in,out]pdString to be decoded, and resulting decoded string

Try to decode anything that looks like a valid RFC2047 encoded header field, ignoring RFC822 parsing rules. If decoding fails, for example due to an invalid base64 string, the original input is left untouched.

Definition at line 662 of file rfc2047.c.

663{
664 if (!pd || !*pd)
665 return;
666
667 struct Buffer *buf = buf_pool_get(); // Output buffer
668 char *s = *pd; // Read pointer
669 char *beg = NULL; // Begin of encoded word
670 enum ContentEncoding enc = ENC_OTHER; // ENC_BASE64 or ENC_QUOTED_PRINTABLE
671 char *charset = NULL; // Which charset
672 size_t charsetlen; // Length of the charset
673 char *text = NULL; // Encoded text
674 size_t textlen = 0; // Length of encoded text
675
676 /* Keep some state in case the next decoded word is using the same charset
677 * and it happens to be split in the middle of a multibyte character.
678 * See https://github.com/neomutt/neomutt/issues/1015 */
679 struct Buffer *prev = buf_pool_get(); /* Previously decoded word */
680 char *prev_charset = NULL; /* Previously used charset */
681 size_t prev_charsetlen = 0; /* Length of the previously used charset */
682
683 const struct Slist *c_assumed_charset = cc_assumed_charset();
684 const char *c_charset = cc_charset();
685 while (*s)
686 {
687 beg = parse_encoded_word(s, &enc, &charset, &charsetlen, &text, &textlen);
688 if (beg != s)
689 {
690 /* Some non-encoded text was found */
691 size_t holelen = beg ? beg - s : mutt_str_len(s);
692
693 /* Ignore whitespace between encoded words */
694 if (beg && (mutt_str_lws_len(s, holelen) == holelen))
695 {
696 s = beg;
697 continue;
698 }
699
700 /* If we have some previously decoded text, add it now */
701 if (!buf_is_empty(prev))
702 {
703 finalize_chunk(buf, prev, prev_charset, prev_charsetlen);
704 }
705
706 /* Add non-encoded part */
707 if (slist_is_empty(c_assumed_charset))
708 {
709 buf_addstr_n(buf, s, holelen);
710 }
711 else
712 {
713 char *conv = mutt_strn_dup(s, holelen);
714 mutt_ch_convert_nonmime_string(c_assumed_charset, c_charset, &conv);
715 buf_addstr(buf, conv);
716 FREE(&conv);
717 }
718 s += holelen;
719 }
720 if (beg)
721 {
722 /* Some encoded text was found */
723 text[textlen] = '\0';
724 char *decoded = decode_word(text, textlen, enc);
725 if (!decoded)
726 {
727 goto done;
728 }
729 if (!buf_is_empty(prev) && ((prev_charsetlen != charsetlen) ||
730 !mutt_strn_equal(prev_charset, charset, charsetlen)))
731 {
732 /* Different charset, convert the previous chunk and add it to the
733 * final result */
734 finalize_chunk(buf, prev, prev_charset, prev_charsetlen);
735 }
736
737 buf_addstr(prev, decoded);
738 FREE(&decoded);
739 prev_charset = charset;
740 prev_charsetlen = charsetlen;
741 s = text + textlen + 2; /* Skip final ?= */
742 }
743 }
744
745 /* Save the last chunk */
746 if (!buf_is_empty(prev))
747 {
748 finalize_chunk(buf, prev, prev_charset, prev_charsetlen);
749 }
750
751 FREE(pd);
752 *pd = buf_strdup(buf);
753
754done:
755 buf_pool_release(&buf);
756 buf_pool_release(&prev);
757}
size_t buf_addstr_n(struct Buffer *buf, const char *s, size_t len)
Add a string to a Buffer, expanding it if necessary.
Definition: buffer.c:95
bool buf_is_empty(const struct Buffer *buf)
Is the Buffer empty?
Definition: buffer.c:290
const struct Slist * cc_assumed_charset(void)
Get the cached value of $assumed_charset.
Definition: config_cache.c:101
ContentEncoding
Content-Transfer-Encoding.
Definition: mime.h:47
@ ENC_OTHER
Encoding unknown.
Definition: mime.h:48
int mutt_ch_convert_nonmime_string(const struct Slist *const assumed_charset, const char *charset, char **ps)
Try to convert a string using a list of character sets.
Definition: charset.c:331
bool slist_is_empty(const struct Slist *list)
Is the slist empty?
Definition: slist.c:142
size_t mutt_str_lws_len(const char *s, size_t n)
Measure the linear-white-space at the beginning of a string.
Definition: string.c:677
bool mutt_strn_equal(const char *a, const char *b, size_t num)
Check for equality of two strings (to a maximum), safely.
Definition: string.c:474
static char * parse_encoded_word(char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
Parse a string and report RFC2047 elements.
Definition: rfc2047.c:149
static char * decode_word(const char *s, size_t len, enum ContentEncoding enc)
Decode an RFC2047-encoded string.
Definition: rfc2047.c:366
static void finalize_chunk(struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
Perform charset conversion and filtering.
Definition: rfc2047.c:342
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_addrlist()

void rfc2047_encode_addrlist ( struct AddressList *  al,
const char *  tag 
)

Encode any RFC2047 headers, where required, in an Address list.

Parameters
alAddressList
tagHeader tag (used for wrapping calculation)
Note
rfc2047_encode() may realloc the data pointer it's given, so work on a copy to avoid breaking the Buffer

Definition at line 767 of file rfc2047.c.

768{
769 if (!al)
770 return;
771
772 int col = tag ? strlen(tag) + 2 : 32;
773 struct Address *a = NULL;
774 char *data = NULL;
775 const struct Slist *const c_send_charset = cs_subset_slist(NeoMutt->sub, "send_charset");
776 TAILQ_FOREACH(a, al, entries)
777 {
778 if (a->personal)
779 {
780 data = buf_strdup(a->personal);
781 rfc2047_encode(&data, AddressSpecials, col, c_send_charset);
782 buf_strcpy(a->personal, data);
783 FREE(&data);
784 }
785 else if (a->group && a->mailbox)
786 {
787 data = buf_strdup(a->mailbox);
788 rfc2047_encode(&data, AddressSpecials, col, c_send_charset);
789 buf_strcpy(a->mailbox, data);
790 FREE(&data);
791 }
792 }
793}
const char AddressSpecials[]
Characters with special meaning for email addresses.
Definition: address.c:45
size_t buf_strcpy(struct Buffer *buf, const char *s)
Copy a string into a Buffer.
Definition: buffer.c:394
const struct Slist * cs_subset_slist(const struct ConfigSubset *sub, const char *name)
Get a string-list config item by name.
Definition: helpers.c:243
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:725
void rfc2047_encode(char **pd, const char *specials, int col, const struct Slist *charsets)
RFC-2047-encode a string.
Definition: rfc2047.c:629
An email address.
Definition: address.h:36
struct Buffer * personal
Real name of address.
Definition: address.h:37
bool group
Group mailbox?
Definition: address.h:39
struct Buffer * mailbox
Mailbox and host address.
Definition: address.h:38
Container for Accounts, Notifications.
Definition: neomutt.h:41
struct ConfigSubset * sub
Inherited config items.
Definition: neomutt.h:45
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_addrlist()

void rfc2047_decode_addrlist ( struct AddressList *  al)

Decode any RFC2047 headers in an Address list.

Parameters
alAddressList
Note
rfc2047_decode() may realloc the data pointer it's given, so work on a copy to avoid breaking the Buffer

Definition at line 802 of file rfc2047.c.

803{
804 if (!al)
805 return;
806
807 const bool assumed = !slist_is_empty(cc_assumed_charset());
808 struct Address *a = NULL;
809 char *data = NULL;
810 TAILQ_FOREACH(a, al, entries)
811 {
812 if (a->personal && ((buf_find_string(a->personal, "=?")) || assumed))
813 {
814 data = buf_strdup(a->personal);
815 rfc2047_decode(&data);
816 buf_strcpy(a->personal, data);
817 FREE(&data);
818 }
819 else if (a->group && a->mailbox && buf_find_string(a->mailbox, "=?"))
820 {
821 data = buf_strdup(a->mailbox);
822 rfc2047_decode(&data);
823 buf_strcpy(a->mailbox, data);
824 FREE(&data);
825 }
826 }
827}
const char * buf_find_string(const struct Buffer *buf, const char *s)
Return a pointer to a substring found in the buffer.
Definition: buffer.c:639
void rfc2047_decode(char **pd)
Decode any RFC2047-encoded header fields.
Definition: rfc2047.c:662
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_envelope()

void rfc2047_decode_envelope ( struct Envelope env)

Decode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 833 of file rfc2047.c.

834{
835 if (!env)
836 return;
845 rfc2047_decode(&env->x_label);
846
847 char *subj = env->subject;
848 *(char **) &env->subject = NULL;
849 rfc2047_decode(&subj);
850 mutt_env_set_subject(env, subj);
851 FREE(&subj);
852}
void mutt_env_set_subject(struct Envelope *env, const char *subj)
Set both subject and real_subj to subj.
Definition: envelope.c:69
void rfc2047_decode_addrlist(struct AddressList *al)
Decode any RFC2047 headers in an Address list.
Definition: rfc2047.c:802
struct AddressList return_path
Return path for the Email.
Definition: envelope.h:58
char *const subject
Email's subject.
Definition: envelope.h:70
struct AddressList to
Email's 'To' list.
Definition: envelope.h:60
struct AddressList reply_to
Email's 'reply-to'.
Definition: envelope.h:64
struct AddressList mail_followup_to
Email's 'mail-followup-to'.
Definition: envelope.h:65
struct AddressList cc
Email's 'Cc' list.
Definition: envelope.h:61
struct AddressList sender
Email's sender.
Definition: envelope.h:63
struct AddressList bcc
Email's 'Bcc' list.
Definition: envelope.h:62
char * x_label
X-Label.
Definition: envelope.h:76
struct AddressList from
Email's 'From' list.
Definition: envelope.h:59
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_envelope()

void rfc2047_encode_envelope ( struct Envelope env)

Encode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 858 of file rfc2047.c.

859{
860 if (!env)
861 return;
862 rfc2047_encode_addrlist(&env->from, "From");
863 rfc2047_encode_addrlist(&env->to, "To");
864 rfc2047_encode_addrlist(&env->cc, "Cc");
865 rfc2047_encode_addrlist(&env->bcc, "Bcc");
866 rfc2047_encode_addrlist(&env->reply_to, "Reply-To");
867 rfc2047_encode_addrlist(&env->mail_followup_to, "Mail-Followup-To");
868 rfc2047_encode_addrlist(&env->sender, "Sender");
869 const struct Slist *const c_send_charset = cs_subset_slist(NeoMutt->sub, "send_charset");
870 rfc2047_encode(&env->x_label, NULL, sizeof("X-Label:"), c_send_charset);
871
872 char *subj = env->subject;
873 *(char **) &env->subject = NULL;
874 rfc2047_encode(&subj, NULL, sizeof("Subject:"), c_send_charset);
875 mutt_env_set_subject(env, subj);
876 FREE(&subj);
877}
void rfc2047_encode_addrlist(struct AddressList *al, const char *tag)
Encode any RFC2047 headers, where required, in an Address list.
Definition: rfc2047.c:767
+ Here is the call graph for this function:
+ Here is the caller graph for this function: