NeoMutt  2022-04-29-145-g9b6a0e
Teaching an old dog new tricks
DOXYGEN
rfc2047.c File Reference

RFC2047 MIME extensions encoding / decoding routines. More...

#include "config.h"
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <iconv.h>
#include <stdbool.h>
#include <string.h>
#include "mutt/lib.h"
#include "address/lib.h"
#include "config/lib.h"
#include "core/lib.h"
#include "rfc2047.h"
#include "envelope.h"
#include "mime.h"
+ Include dependency graph for rfc2047.c:

Go to the source code of this file.

Macros

#define ENCWORD_LEN_MAX   75
 
#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */
 
#define HSPACE(ch)   (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))
 
#define CONTINUATION_BYTE(ch)   (((ch) &0xc0) == 0x80)
 

Typedefs

typedef size_t(* encoder_t) (char *str, const char *buf, size_t buflen, const char *tocode)
 

Functions

static size_t b_encoder (char *str, const char *buf, size_t buflen, const char *tocode)
 Base64 Encode a string - Implements encoder_t -. More...
 
static size_t q_encoder (char *str, const char *buf, size_t buflen, const char *tocode)
 Quoted-printable Encode a string - Implements encoder_t -. More...
 
static char * parse_encoded_word (char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
 Parse a string and report RFC2047 elements. More...
 
static size_t try_block (const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Attempt to convert a block of text. More...
 
static size_t encode_block (char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
 Encode a block of text using an encoder. More...
 
static size_t choose_block (char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Calculate how much data can be converted. More...
 
static void finalize_chunk (struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
 Perform charset conversion and filtering. More...
 
static char * decode_word (const char *s, size_t len, enum ContentEncoding enc)
 Decode an RFC2047-encoded string. More...
 
static int encode (const char *d, size_t dlen, int col, const char *fromcode, const struct Slist *charsets, char **e, size_t *elen, const char *specials)
 RFC2047-encode a string. More...
 
void rfc2047_encode (char **pd, const char *specials, int col, const struct Slist *charsets)
 RFC-2047-encode a string. More...
 
void rfc2047_decode (char **pd)
 Decode any RFC2047-encoded header fields. More...
 
void rfc2047_encode_addrlist (struct AddressList *al, const char *tag)
 Encode any RFC2047 headers, where required, in an Address list. More...
 
void rfc2047_decode_addrlist (struct AddressList *al)
 Decode any RFC2047 headers in an Address list. More...
 
void rfc2047_decode_envelope (struct Envelope *env)
 Decode the fields of an Envelope. More...
 
void rfc2047_encode_envelope (struct Envelope *env)
 Encode the fields of an Envelope. More...
 

Detailed Description

RFC2047 MIME extensions encoding / decoding routines.

Authors
  • Michael R. Elkins
  • Edmund Grimley Evans
  • Pietro Cerutti

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file rfc2047.c.

Macro Definition Documentation

◆ ENCWORD_LEN_MAX

#define ENCWORD_LEN_MAX   75

Definition at line 46 of file rfc2047.c.

◆ ENCWORD_LEN_MIN

#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */

Definition at line 47 of file rfc2047.c.

◆ HSPACE

#define HSPACE (   ch)    (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))

Definition at line 49 of file rfc2047.c.

◆ CONTINUATION_BYTE

#define CONTINUATION_BYTE (   ch)    (((ch) &0xc0) == 0x80)

Definition at line 51 of file rfc2047.c.

Typedef Documentation

◆ encoder_t

typedef size_t(* encoder_t) (char *str, const char *buf, size_t buflen, const char *tocode)

Definition at line 64 of file rfc2047.c.

Function Documentation

◆ parse_encoded_word()

static char* parse_encoded_word ( char *  str,
enum ContentEncoding enc,
char **  charset,
size_t *  charsetlen,
char **  text,
size_t *  textlen 
)
static

Parse a string and report RFC2047 elements.

Parameters
[in]strString to parse
[out]encContent encoding found in the first RFC2047 word
[out]charsetCharset found in the first RFC2047 word
[out]charsetlenLength of the charset string found
[out]textStart of the first RFC2047 encoded text
[out]textlenLength of the encoded text found
Return values
ptrStart of the RFC2047 encoded word
NULLNone was found

Definition at line 143 of file rfc2047.c.

145 {
146  regmatch_t *match = mutt_prex_capture(PREX_RFC2047_ENCODED_WORD, str);
147  if (!match)
148  return NULL;
149 
150  const regmatch_t *mfull = &match[PREX_RFC2047_ENCODED_WORD_MATCH_FULL];
151  const regmatch_t *mcharset = &match[PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET];
152  const regmatch_t *mencoding = &match[PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING];
153  const regmatch_t *mtext = &match[PREX_RFC2047_ENCODED_WORD_MATCH_TEXT];
154 
155  /* Charset */
156  *charset = str + mutt_regmatch_start(mcharset);
157  *charsetlen = mutt_regmatch_len(mcharset);
158 
159  /* Encoding: either Q or B */
160  *enc = (tolower(str[mutt_regmatch_start(mencoding)]) == 'q') ? ENC_QUOTED_PRINTABLE : ENC_BASE64;
161 
162  *text = str + mutt_regmatch_start(mtext);
163  *textlen = mutt_regmatch_len(mtext);
164  return str + mutt_regmatch_start(mfull);
165 }
@ ENC_BASE64
Base-64 encoded text.
Definition: mime.h:52
@ ENC_QUOTED_PRINTABLE
Quoted-printable text.
Definition: mime.h:51
@ PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING
=?utf-8?[Q]?=E8=81...?=
Definition: prex.h:97
@ PREX_RFC2047_ENCODED_WORD_MATCH_TEXT
=?utf-8?Q?[=E8=81...]?=
Definition: prex.h:98
@ PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET
=?[utf-8]?Q?=E8=81...?=
Definition: prex.h:96
@ PREX_RFC2047_ENCODED_WORD_MATCH_FULL
[=?utf-8?Q?=E8=81...?=]
Definition: prex.h:95
regmatch_t * mutt_prex_capture(enum Prex which, const char *str)
@ PREX_RFC2047_ENCODED_WORD
[=?utf-8?Q?=E8=81=AA=E6=98=8E=E7=9A=84?=]
Definition: prex.h:35
static size_t mutt_regmatch_len(const regmatch_t *match)
Return the length of a match.
Definition: regex3.h:80
static regoff_t mutt_regmatch_start(const regmatch_t *match)
Return the start of a match.
Definition: regex3.h:60
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ try_block()

static size_t try_block ( const char *  d,
size_t  dlen,
const char *  fromcode,
const char *  tocode,
encoder_t encoder,
size_t *  wlen 
)
static

Attempt to convert a block of text.

Parameters
dString to convert
dlenLength of string
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
0Success, string converted
>0Error, number of bytes that could be converted

If the data could be converted using encoder, then set *encoder and *wlen. Otherwise return an upper bound on the maximum length of the data which could be converted.

The data is converted from fromcode (which must be stateless) to tocode, unless fromcode is NULL, in which case the data is assumed to be already in tocode, which should be 8-bit and stateless.

Definition at line 186 of file rfc2047.c.

188 {
189  char buf[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
190  const char *ib = NULL;
191  char *ob = NULL;
192  size_t ibl, obl;
193  int count, len, len_b, len_q;
194 
195  if (fromcode)
196  {
197  iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, MUTT_ICONV_NO_FLAGS);
198  assert(cd != (iconv_t) (-1));
199  ib = d;
200  ibl = dlen;
201  ob = buf;
202  obl = sizeof(buf) - strlen(tocode);
203  if ((iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl) == (size_t) (-1)) ||
204  (iconv(cd, NULL, NULL, &ob, &obl) == (size_t) (-1)))
205  {
206  assert(errno == E2BIG);
207  iconv_close(cd);
208  assert(ib > d);
209  return ((ib - d) == dlen) ? dlen : ib - d + 1;
210  }
211  iconv_close(cd);
212  }
213  else
214  {
215  if (dlen > (sizeof(buf) - strlen(tocode)))
216  return sizeof(buf) - strlen(tocode) + 1;
217  memcpy(buf, d, dlen);
218  ob = buf + dlen;
219  }
220 
221  count = 0;
222  for (char *p = buf; p < ob; p++)
223  {
224  unsigned char c = *p;
225  assert(strchr(MimeSpecials, '?'));
226  if ((c >= 0x7f) || (c < 0x20) || (*p == '_') ||
227  ((c != ' ') && strchr(MimeSpecials, *p)))
228  {
229  count++;
230  }
231  }
232 
233  len = ENCWORD_LEN_MIN - 2 + strlen(tocode);
234  len_b = len + (((ob - buf) + 2) / 3) * 4;
235  len_q = len + (ob - buf) + 2 * count;
236 
237  /* Apparently RFC1468 says to use B encoding for iso-2022-jp. */
238  if (mutt_istr_equal(tocode, "ISO-2022-JP"))
239  len_q = ENCWORD_LEN_MAX + 1;
240 
241  if ((len_b < len_q) && (len_b <= ENCWORD_LEN_MAX))
242  {
243  *encoder = b_encoder;
244  *wlen = len_b;
245  return 0;
246  }
247  else if (len_q <= ENCWORD_LEN_MAX)
248  {
249  *encoder = q_encoder;
250  *wlen = len_q;
251  return 0;
252  }
253  else
254  return dlen;
255 }
static size_t b_encoder(char *str, const char *buf, size_t buflen, const char *tocode)
Base64 Encode a string - Implements encoder_t -.
Definition: rfc2047.c:69
static size_t q_encoder(char *str, const char *buf, size_t buflen, const char *tocode)
Quoted-printable Encode a string - Implements encoder_t -.
Definition: rfc2047.c:102
const char MimeSpecials[]
Characters that need special treatment in MIME.
Definition: mime.c:67
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, uint8_t flags)
Set up iconv for conversions.
Definition: charset.c:564
#define MUTT_ICONV_NO_FLAGS
No flags are set.
Definition: charset.h:71
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:796
#define ENCWORD_LEN_MIN
Definition: rfc2047.c:47
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:46
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode_block()

static size_t encode_block ( char *  str,
char *  buf,
size_t  buflen,
const char *  fromcode,
const char *  tocode,
encoder_t  encoder 
)
static

Encode a block of text using an encoder.

Parameters
strString to convert
bufBuffer for result
buflenBuffer length
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
Return values
numLength of the encoded word

Encode the data (buf, buflen) into str using the encoder.

Definition at line 269 of file rfc2047.c.

271 {
272  if (!fromcode)
273  {
274  return (*encoder)(str, buf, buflen, tocode);
275  }
276 
277  const iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, MUTT_ICONV_NO_FLAGS);
278  assert(cd != (iconv_t) (-1));
279  const char *ib = buf;
280  size_t ibl = buflen;
281  char tmp[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
282  char *ob = tmp;
283  size_t obl = sizeof(tmp) - strlen(tocode);
284  const size_t n1 = iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl);
285  const size_t n2 = iconv(cd, NULL, NULL, &ob, &obl);
286  assert(n1 != (size_t) (-1) && n2 != (size_t) (-1));
287  iconv_close(cd);
288  return (*encoder)(str, tmp, ob - tmp, tocode);
289 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ choose_block()

static size_t choose_block ( char *  d,
size_t  dlen,
int  col,
const char *  fromcode,
const char *  tocode,
encoder_t encoder,
size_t *  wlen 
)
static

Calculate how much data can be converted.

Parameters
dString to convert
dlenLength of string
colStarting column to convert
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
numBytes that can be converted

Discover how much of the data (d, dlen) can be converted into a single encoded word. Return how much data can be converted, and set the length *wlen of the encoded word and *encoder. We start in column col, which limits the length of the word.

Definition at line 307 of file rfc2047.c.

309 {
310  const bool utf8 = fromcode && mutt_istr_equal(fromcode, "utf-8");
311 
312  size_t n = dlen;
313  while (true)
314  {
315  assert(n > 0);
316  const size_t nn = try_block(d, n, fromcode, tocode, encoder, wlen);
317  if ((nn == 0) && (((col + *wlen) <= (ENCWORD_LEN_MAX + 1)) || (n <= 1)))
318  break;
319  n = ((nn != 0) ? nn : n) - 1;
320  assert(n > 0);
321  if (utf8)
322  while ((n > 1) && CONTINUATION_BYTE(d[n]))
323  n--;
324  }
325  return n;
326 }
#define CONTINUATION_BYTE(ch)
Definition: rfc2047.c:51
static size_t try_block(const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Attempt to convert a block of text.
Definition: rfc2047.c:186
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ finalize_chunk()

static void finalize_chunk ( struct Buffer res,
struct Buffer buf,
char *  charset,
size_t  charsetlen 
)
static

Perform charset conversion and filtering.

Parameters
[out]resBuffer where the resulting string is appended
[in]bufBuffer with the input string
[in]charsetCharset to use for the conversion
[in]charsetlenLength of the charset parameter

The buffer buf is reinitialized at the end of this function.

Definition at line 337 of file rfc2047.c.

338 {
339  if (!charset)
340  return;
341  char end = charset[charsetlen];
342  charset[charsetlen] = '\0';
343  const char *const c_charset = cs_subset_string(NeoMutt->sub, "charset");
344  mutt_ch_convert_string(&buf->data, charset, c_charset, MUTT_ICONV_HOOK_FROM);
345  charset[charsetlen] = end;
347  mutt_buffer_addstr(res, buf->data);
348  FREE(&buf->data);
349  mutt_buffer_init(buf);
350 }
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:223
struct Buffer * mutt_buffer_init(struct Buffer *buf)
Initialise a new Buffer.
Definition: buffer.c:48
const char * cs_subset_string(const struct ConfigSubset *sub, const char *name)
Get a string config item by name.
Definition: helpers.c:317
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
Definition: mbyte.c:421
#define FREE(x)
Definition: memory.h:43
int mutt_ch_convert_string(char **ps, const char *from, const char *to, uint8_t flags)
Convert a string between encodings.
Definition: charset.c:752
#define MUTT_ICONV_HOOK_FROM
apply charset-hooks to fromcode
Definition: charset.h:72
char * data
Pointer to data.
Definition: buffer.h:35
Container for Accounts, Notifications.
Definition: neomutt.h:37
struct ConfigSubset * sub
Inherited config items.
Definition: neomutt.h:39
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ decode_word()

static char* decode_word ( const char *  s,
size_t  len,
enum ContentEncoding  enc 
)
static

Decode an RFC2047-encoded string.

Parameters
sString to decode
lenLength of the string
encEncoding type
Return values
ptrDecoded string
Note
The caller must free the returned string

Definition at line 361 of file rfc2047.c.

362 {
363  const char *it = s;
364  const char *end = s + len;
365 
366  if (enc == ENC_QUOTED_PRINTABLE)
367  {
368  struct Buffer buf = mutt_buffer_make(0);
369  for (; it < end; it++)
370  {
371  if (*it == '_')
372  {
373  mutt_buffer_addch(&buf, ' ');
374  }
375  else if ((it[0] == '=') && (!(it[1] & ~127) && (hexval(it[1]) != -1)) &&
376  (!(it[2] & ~127) && (hexval(it[2]) != -1)))
377  {
378  mutt_buffer_addch(&buf, (hexval(it[1]) << 4) | hexval(it[2]));
379  it += 2;
380  }
381  else
382  {
383  mutt_buffer_addch(&buf, *it);
384  }
385  }
386  mutt_buffer_addch(&buf, '\0');
387  return buf.data;
388  }
389  else if (enc == ENC_BASE64)
390  {
391  const int olen = 3 * len / 4 + 1;
392  char *out = mutt_mem_malloc(olen);
393  int dlen = mutt_b64_decode(it, out, olen);
394  if (dlen == -1)
395  {
396  FREE(&out);
397  return NULL;
398  }
399  out[dlen] = '\0';
400  return out;
401  }
402 
403  assert(0); /* The enc parameter has an invalid value */
404  return NULL;
405 }
int mutt_b64_decode(const char *in, char *out, size_t olen)
Convert null-terminated base64 string to raw bytes.
Definition: base64.c:136
struct Buffer mutt_buffer_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:63
size_t mutt_buffer_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
Definition: buffer.c:238
void * mutt_mem_malloc(size_t size)
Allocate memory on the heap.
Definition: memory.c:90
#define hexval(ch)
Definition: mime.h:80
String manipulation buffer.
Definition: buffer.h:34
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode()

static int encode ( const char *  d,
size_t  dlen,
int  col,
const char *  fromcode,
const struct Slist charsets,
char **  e,
size_t *  elen,
const char *  specials 
)
static

RFC2047-encode a string.

Parameters
[in]dString to convert
[in]dlenLength of string
[in]colStarting column to convert
[in]fromcodeOriginal encoding
[in]charsetsList of allowable encodings (colon separated)
[out]eEncoded string
[out]elenLength of encoded string
[in]specialsSpecial characters to be encoded
Return values
0Success

Definition at line 419 of file rfc2047.c.

421 {
422  int rc = 0;
423  char *buf = NULL;
424  size_t bufpos, buflen;
425  char *t0 = NULL, *t1 = NULL, *t = NULL;
426  char *s0 = NULL, *s1 = NULL;
427  size_t ulen, r, wlen = 0;
428  encoder_t encoder = NULL;
429  char *tocode1 = NULL;
430  const char *tocode = NULL;
431  const char *icode = "utf-8";
432 
433  /* Try to convert to UTF-8. */
434  char *u = mutt_strn_dup(d, dlen);
435  if (mutt_ch_convert_string(&u, fromcode, icode, MUTT_ICONV_NO_FLAGS) != 0)
436  {
437  rc = 1;
438  icode = 0;
439  }
440  ulen = mutt_str_len(u);
441 
442  /* Find earliest and latest things we must encode. */
443  s0 = 0;
444  s1 = 0;
445  t0 = 0;
446  t1 = 0;
447  for (t = u; t < (u + ulen); t++)
448  {
449  if ((*t & 0x80) || ((*t == '=') && (t[1] == '?') && ((t == u) || HSPACE(*(t - 1)))))
450  {
451  if (!t0)
452  t0 = t;
453  t1 = t;
454  }
455  else if (specials && *t && strchr(specials, *t))
456  {
457  if (!s0)
458  s0 = t;
459  s1 = t;
460  }
461  }
462 
463  /* If we have something to encode, include RFC822 specials */
464  if (t0 && s0 && (s0 < t0))
465  t0 = s0;
466  if (t1 && s1 && (s1 > t1))
467  t1 = s1;
468 
469  if (!t0)
470  {
471  /* No encoding is required. */
472  *e = u;
473  *elen = ulen;
474  return rc;
475  }
476 
477  /* Choose target charset. */
478  tocode = fromcode;
479  if (icode)
480  {
481  tocode1 = mutt_ch_choose(icode, charsets, u, ulen, 0, 0);
482  if (tocode1)
483  tocode = tocode1;
484  else
485  {
486  rc = 2;
487  icode = 0;
488  }
489  }
490 
491  /* Hack to avoid labelling 8-bit data as us-ascii. */
492  if (!icode && mutt_ch_is_us_ascii(tocode))
493  tocode = "unknown-8bit";
494 
495  /* Adjust t0 for maximum length of line. */
496  t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
497  if (t < u)
498  t = u;
499  if (t < t0)
500  t0 = t;
501 
502  /* Adjust t0 until we can encode a character after a space. */
503  for (; t0 > u; t0--)
504  {
505  if (!HSPACE(*(t0 - 1)))
506  continue;
507  t = t0 + 1;
508  if (icode)
509  while ((t < (u + ulen)) && CONTINUATION_BYTE(*t))
510  t++;
511  if ((try_block(t0, t - t0, icode, tocode, &encoder, &wlen) == 0) &&
512  ((col + (t0 - u) + wlen) <= (ENCWORD_LEN_MAX + 1)))
513  {
514  break;
515  }
516  }
517 
518  /* Adjust t1 until we can encode a character before a space. */
519  for (; t1 < (u + ulen); t1++)
520  {
521  if (!HSPACE(*t1))
522  continue;
523  t = t1 - 1;
524  if (icode)
525  while (CONTINUATION_BYTE(*t))
526  t--;
527  if ((try_block(t, t1 - t, icode, tocode, &encoder, &wlen) == 0) &&
528  ((1 + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1)))
529  {
530  break;
531  }
532  }
533 
534  /* We shall encode the region [t0,t1). */
535 
536  /* Initialise the output buffer with the us-ascii prefix. */
537  buflen = 2 * ulen;
538  buf = mutt_mem_malloc(buflen);
539  bufpos = t0 - u;
540  memcpy(buf, u, t0 - u);
541 
542  col += t0 - u;
543 
544  t = t0;
545  while (true)
546  {
547  /* Find how much we can encode. */
548  size_t n = choose_block(t, t1 - t, col, icode, tocode, &encoder, &wlen);
549  if (n == (t1 - t))
550  {
551  /* See if we can fit the us-ascii suffix, too. */
552  if ((col + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1))
553  break;
554  n = t1 - t - 1;
555  if (icode)
556  while (CONTINUATION_BYTE(t[n]))
557  n--;
558  if (n == 0)
559  {
560  /* This should only happen in the really stupid case where the
561  * only word that needs encoding is one character long, but
562  * there is too much us-ascii stuff after it to use a single
563  * encoded word. We add the next word to the encoded region
564  * and try again. */
565  assert(t1 < (u + ulen));
566  for (t1++; (t1 < (u + ulen)) && !HSPACE(*t1); t1++)
567  ; // do nothing
568 
569  continue;
570  }
571  n = choose_block(t, n, col, icode, tocode, &encoder, &wlen);
572  }
573 
574  /* Add to output buffer. */
575  const char *line_break = "\n\t";
576  const int lb_len = 2; /* strlen(line_break) */
577 
578  if ((bufpos + wlen + lb_len) > buflen)
579  {
580  buflen = bufpos + wlen + lb_len;
581  mutt_mem_realloc(&buf, buflen);
582  }
583  r = encode_block(buf + bufpos, t, n, icode, tocode, encoder);
584  assert(r == wlen);
585  bufpos += wlen;
586  memcpy(buf + bufpos, line_break, lb_len);
587  bufpos += lb_len;
588 
589  col = 1;
590 
591  t += n;
592  }
593 
594  /* Add last encoded word and us-ascii suffix to buffer. */
595  buflen = bufpos + wlen + (u + ulen - t1);
596  mutt_mem_realloc(&buf, buflen + 1);
597  r = encode_block(buf + bufpos, t, t1 - t, icode, tocode, encoder);
598  assert(r == wlen);
599  bufpos += wlen;
600  memcpy(buf + bufpos, t1, u + ulen - t1);
601 
602  FREE(&tocode1);
603  FREE(&u);
604 
605  buf[buflen] = '\0';
606 
607  *e = buf;
608  *elen = buflen + 1;
609  return rc;
610 }
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
char * mutt_ch_choose(const char *fromcode, const struct Slist *charsets, const char *u, size_t ulen, char **d, size_t *dlen)
Figure the best charset to encode a string.
Definition: charset.c:1035
#define mutt_ch_is_us_ascii(str)
Definition: charset.h:96
char * mutt_strn_dup(const char *begin, size_t len)
Duplicate a sub-string.
Definition: string.c:428
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:544
size_t(* encoder_t)(char *str, const char *buf, size_t buflen, const char *tocode)
Definition: rfc2047.c:64
static size_t choose_block(char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Calculate how much data can be converted.
Definition: rfc2047.c:307
static size_t encode_block(char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
Encode a block of text using an encoder.
Definition: rfc2047.c:269
#define HSPACE(ch)
Definition: rfc2047.c:49
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode()

void rfc2047_encode ( char **  pd,
const char *  specials,
int  col,
const struct Slist charsets 
)

RFC-2047-encode a string.

Parameters
[in,out]pdString to be encoded, and resulting encoded string
[in]specialsSpecial characters to be encoded
[in]colStarting index in string
[in]charsetsList of charsets to choose from

Definition at line 619 of file rfc2047.c.

620 {
621  const char *const c_charset = cs_subset_string(NeoMutt->sub, "charset");
622  if (!c_charset || !pd || !*pd)
623  return;
624 
625  struct Slist *fallback = NULL;
626  if (!charsets)
627  {
628  fallback = slist_parse("utf-8", SLIST_SEP_COLON);
629  charsets = fallback;
630  }
631 
632  char *e = NULL;
633  size_t elen = 0;
634  encode(*pd, strlen(*pd), col, c_charset, charsets, &e, &elen, specials);
635 
636  slist_free(&fallback);
637  FREE(pd);
638  *pd = e;
639 }
struct Slist * slist_parse(const char *str, uint32_t flags)
Parse a list of strings into a list.
Definition: slist.c:200
void slist_free(struct Slist **list)
Free an Slist object.
Definition: slist.c:162
static int encode(const char *d, size_t dlen, int col, const char *fromcode, const struct Slist *charsets, char **e, size_t *elen, const char *specials)
RFC2047-encode a string.
Definition: rfc2047.c:419
#define SLIST_SEP_COLON
Definition: slist.h:35
String list.
Definition: slist.h:47
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode()

void rfc2047_decode ( char **  pd)

Decode any RFC2047-encoded header fields.

Parameters
[in,out]pdString to be decoded, and resulting decoded string

Try to decode anything that looks like a valid RFC2047 encoded header field, ignoring RFC822 parsing rules. If decoding fails, for example due to an invalid base64 string, the original input is left untouched.

Definition at line 649 of file rfc2047.c.

650 {
651  if (!pd || !*pd)
652  return;
653 
654  struct Buffer buf = mutt_buffer_make(0); /* Output buffer */
655  char *s = *pd; /* Read pointer */
656  char *beg = NULL; /* Begin of encoded word */
657  enum ContentEncoding enc; /* ENC_BASE64 or ENC_QUOTED_PRINTABLE */
658  char *charset = NULL; /* Which charset */
659  size_t charsetlen; /* Length of the charset */
660  char *text = NULL; /* Encoded text */
661  size_t textlen; /* Length of encoded text */
662 
663  /* Keep some state in case the next decoded word is using the same charset
664  * and it happens to be split in the middle of a multibyte character.
665  * See https://github.com/neomutt/neomutt/issues/1015 */
666  struct Buffer prev = mutt_buffer_make(0); /* Previously decoded word */
667  char *prev_charset = NULL; /* Previously used charset */
668  size_t prev_charsetlen = 0; /* Length of the previously used charset */
669 
670  while (*s)
671  {
672  beg = parse_encoded_word(s, &enc, &charset, &charsetlen, &text, &textlen);
673  if (beg != s)
674  {
675  /* Some non-encoded text was found */
676  size_t holelen = beg ? beg - s : mutt_str_len(s);
677 
678  /* Ignore whitespace between encoded words */
679  if (beg && (mutt_str_lws_len(s, holelen) == holelen))
680  {
681  s = beg;
682  continue;
683  }
684 
685  /* If we have some previously decoded text, add it now */
686  if (!mutt_buffer_is_empty(&prev))
687  {
688  finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
689  }
690 
691  /* Add non-encoded part */
692  {
693  const struct Slist *const c_assumed_charset = cs_subset_slist(NeoMutt->sub, "assumed_charset");
694  if (c_assumed_charset)
695  {
696  char *conv = mutt_strn_dup(s, holelen);
698  mutt_buffer_addstr(&buf, conv);
699  FREE(&conv);
700  }
701  else
702  {
703  mutt_buffer_addstr_n(&buf, s, holelen);
704  }
705  }
706  s += holelen;
707  }
708  if (beg)
709  {
710  /* Some encoded text was found */
711  text[textlen] = '\0';
712  char *decoded = decode_word(text, textlen, enc);
713  if (!decoded)
714  {
715  mutt_buffer_dealloc(&buf);
716  return;
717  }
718  if (prev.data && ((prev_charsetlen != charsetlen) ||
719  !mutt_strn_equal(prev_charset, charset, charsetlen)))
720  {
721  /* Different charset, convert the previous chunk and add it to the
722  * final result */
723  finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
724  }
725 
726  mutt_buffer_addstr(&prev, decoded);
727  FREE(&decoded);
728  prev_charset = charset;
729  prev_charsetlen = charsetlen;
730  s = text + textlen + 2; /* Skip final ?= */
731  }
732  }
733 
734  /* Save the last chunk */
735  if (prev.data)
736  {
737  finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
738  }
739 
740  mutt_buffer_addch(&buf, '\0');
741  FREE(pd);
742  *pd = buf.data;
743 }
bool mutt_buffer_is_empty(const struct Buffer *buf)
Is the Buffer empty?
Definition: buffer.c:250
void mutt_buffer_dealloc(struct Buffer *buf)
Release the memory allocated by a buffer.
Definition: buffer.c:292
size_t mutt_buffer_addstr_n(struct Buffer *buf, const char *s, size_t len)
Add a string to a Buffer, expanding it if necessary.
Definition: buffer.c:101
const struct Slist * cs_subset_slist(const struct ConfigSubset *sub, const char *name)
Get a string-list config item by name.
Definition: helpers.c:268
ContentEncoding
Content-Transfer-Encoding.
Definition: mime.h:47
int mutt_ch_convert_nonmime_string(char **ps)
Try to convert a string using a list of character sets.
Definition: charset.c:307
size_t mutt_str_lws_len(const char *s, size_t n)
Measure the linear-white-space at the beginning of a string.
Definition: string.c:686
bool mutt_strn_equal(const char *a, const char *b, size_t num)
Check for equality of two strings (to a maximum), safely.
Definition: string.c:473
static char * parse_encoded_word(char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
Parse a string and report RFC2047 elements.
Definition: rfc2047.c:143
static char * decode_word(const char *s, size_t len, enum ContentEncoding enc)
Decode an RFC2047-encoded string.
Definition: rfc2047.c:361
static void finalize_chunk(struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
Perform charset conversion and filtering.
Definition: rfc2047.c:337
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_addrlist()

void rfc2047_encode_addrlist ( struct AddressList *  al,
const char *  tag 
)

Encode any RFC2047 headers, where required, in an Address list.

Parameters
alAddressList
tagHeader tag (used for wrapping calculation)

Definition at line 750 of file rfc2047.c.

751 {
752  if (!al)
753  return;
754 
755  int col = tag ? strlen(tag) + 2 : 32;
756  struct Address *a = NULL;
757  TAILQ_FOREACH(a, al, entries)
758  {
759  const struct Slist *const c_send_charset = cs_subset_slist(NeoMutt->sub, "send_charset");
760  if (a->personal)
761  rfc2047_encode(&a->personal, AddressSpecials, col, c_send_charset);
762  else if (a->group && a->mailbox)
763  rfc2047_encode(&a->mailbox, AddressSpecials, col, c_send_charset);
764  }
765 }
const char AddressSpecials[]
Characters with special meaning for email addresses.
Definition: address.c:42
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:725
void rfc2047_encode(char **pd, const char *specials, int col, const struct Slist *charsets)
RFC-2047-encode a string.
Definition: rfc2047.c:619
An email address.
Definition: address.h:36
bool group
Group mailbox?
Definition: address.h:39
char * mailbox
Mailbox and host address.
Definition: address.h:38
char * personal
Real name of address.
Definition: address.h:37
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_addrlist()

void rfc2047_decode_addrlist ( struct AddressList *  al)

Decode any RFC2047 headers in an Address list.

Parameters
alAddressList

Definition at line 771 of file rfc2047.c.

772 {
773  if (!al)
774  return;
775 
776  struct Address *a = NULL;
777  TAILQ_FOREACH(a, al, entries)
778  {
779  const struct Slist *const c_assumed_charset = cs_subset_slist(NeoMutt->sub, "assumed_charset");
780  if (a->personal && ((strstr(a->personal, "=?")) || c_assumed_charset))
781  {
783  }
784  else if (a->group && a->mailbox && strstr(a->mailbox, "=?"))
785  rfc2047_decode(&a->mailbox);
786  }
787 }
void rfc2047_decode(char **pd)
Decode any RFC2047-encoded header fields.
Definition: rfc2047.c:649
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_envelope()

void rfc2047_decode_envelope ( struct Envelope env)

Decode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 793 of file rfc2047.c.

794 {
795  if (!env)
796  return;
805  rfc2047_decode(&env->x_label);
806  rfc2047_decode(&env->subject);
807 }
void rfc2047_decode_addrlist(struct AddressList *al)
Decode any RFC2047 headers in an Address list.
Definition: rfc2047.c:771
struct AddressList return_path
Return path for the Email.
Definition: envelope.h:58
struct AddressList to
Email's 'To' list.
Definition: envelope.h:60
struct AddressList reply_to
Email's 'reply-to'.
Definition: envelope.h:64
struct AddressList mail_followup_to
Email's 'mail-followup-to'.
Definition: envelope.h:65
struct AddressList cc
Email's 'Cc' list.
Definition: envelope.h:61
struct AddressList sender
Email's sender.
Definition: envelope.h:63
char * subject
Email's subject.
Definition: envelope.h:70
struct AddressList bcc
Email's 'Bcc' list.
Definition: envelope.h:62
char * x_label
X-Label.
Definition: envelope.h:76
struct AddressList from
Email's 'From' list.
Definition: envelope.h:59
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_envelope()

void rfc2047_encode_envelope ( struct Envelope env)

Encode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 813 of file rfc2047.c.

814 {
815  if (!env)
816  return;
817  rfc2047_encode_addrlist(&env->from, "From");
818  rfc2047_encode_addrlist(&env->to, "To");
819  rfc2047_encode_addrlist(&env->cc, "Cc");
820  rfc2047_encode_addrlist(&env->bcc, "Bcc");
821  rfc2047_encode_addrlist(&env->reply_to, "Reply-To");
822  rfc2047_encode_addrlist(&env->mail_followup_to, "Mail-Followup-To");
823  rfc2047_encode_addrlist(&env->sender, "Sender");
824  const struct Slist *const c_send_charset = cs_subset_slist(NeoMutt->sub, "send_charset");
825  rfc2047_encode(&env->x_label, NULL, sizeof("X-Label:"), c_send_charset);
826  rfc2047_encode(&env->subject, NULL, sizeof("Subject:"), c_send_charset);
827 }
void rfc2047_encode_addrlist(struct AddressList *al, const char *tag)
Encode any RFC2047 headers, where required, in an Address list.
Definition: rfc2047.c:750
+ Here is the call graph for this function:
+ Here is the caller graph for this function: