NeoMutt  2021-02-05-89-gabe350
Teaching an old dog new tricks
DOXYGEN
rfc2047.c File Reference
#include "config.h"
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <iconv.h>
#include <stdbool.h>
#include <string.h>
#include "mutt/lib.h"
#include "address/lib.h"
#include "rfc2047.h"
#include "envelope.h"
#include "globals.h"
#include "mime.h"
#include "mutt_globals.h"
+ Include dependency graph for rfc2047.c:

Go to the source code of this file.

Macros

#define ENCWORD_LEN_MAX   75
 
#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */
 
#define HSPACE(ch)   (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))
 
#define CONTINUATION_BYTE(ch)   (((ch) &0xc0) == 0x80)
 

Typedefs

typedef size_t(* encoder_t) (char *str, const char *buf, size_t buflen, const char *tocode)
 Prototype for an encoding function. More...
 

Functions

static size_t b_encoder (char *str, const char *buf, size_t buflen, const char *tocode)
 Base64 Encode a string - Implements encoder_t. More...
 
static size_t q_encoder (char *str, const char *buf, size_t buflen, const char *tocode)
 Quoted-printable Encode a string - Implements encoder_t. More...
 
static char * parse_encoded_word (char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
 Parse a string and report RFC2047 elements. More...
 
static size_t try_block (const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Attempt to convert a block of text. More...
 
static size_t encode_block (char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
 Encode a block of text using an encoder. More...
 
static size_t choose_block (char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Calculate how much data can be converted. More...
 
static void finalize_chunk (struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
 Perform charset conversion and filtering. More...
 
static char * decode_word (const char *s, size_t len, enum ContentEncoding enc)
 Decode an RFC2047-encoded string. More...
 
static int encode (const char *d, size_t dlen, int col, const char *fromcode, const char *charsets, char **e, size_t *elen, const char *specials)
 RFC2047-encode a string. More...
 
void rfc2047_encode (char **pd, const char *specials, int col, const char *charsets)
 RFC-2047-encode a string. More...
 
void rfc2047_decode (char **pd)
 Decode any RFC2047-encoded header fields. More...
 
void rfc2047_encode_addrlist (struct AddressList *al, const char *tag)
 Encode any RFC2047 headers, where required, in an Address list. More...
 
void rfc2047_decode_addrlist (struct AddressList *al)
 Decode any RFC2047 headers in an Address list. More...
 
void rfc2047_decode_envelope (struct Envelope *env)
 Decode the fields of an Envelope. More...
 
void rfc2047_encode_envelope (struct Envelope *env)
 Encode the fields of an Envelope. More...
 

Detailed Description

RFC2047 MIME extensions encoding / decoding routines

Authors
  • Michael R. Elkins
  • Edmund Grimley Evans
  • Pietro Cerutti

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file rfc2047.c.

Macro Definition Documentation

◆ ENCWORD_LEN_MAX

#define ENCWORD_LEN_MAX   75

Definition at line 46 of file rfc2047.c.

◆ ENCWORD_LEN_MIN

#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */

Definition at line 47 of file rfc2047.c.

◆ HSPACE

#define HSPACE (   ch)    (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))

Definition at line 49 of file rfc2047.c.

◆ CONTINUATION_BYTE

#define CONTINUATION_BYTE (   ch)    (((ch) &0xc0) == 0x80)

Definition at line 51 of file rfc2047.c.

Typedef Documentation

◆ encoder_t

typedef size_t(* encoder_t) (char *str, const char *buf, size_t buflen, const char *tocode)

Prototype for an encoding function.

Parameters
strString to encode
bufBuffer for result
buflenLength of buffer
tocodeCharacter encoding
Return values
numBytes written to buffer

Definition at line 61 of file rfc2047.c.

Function Documentation

◆ b_encoder()

static size_t b_encoder ( char *  str,
const char *  buf,
size_t  buflen,
const char *  tocode 
)
static

Base64 Encode a string - Implements encoder_t.

Definition at line 66 of file rfc2047.c.

67 {
68  char *s0 = str;
69 
70  memcpy(str, "=?", 2);
71  str += 2;
72  memcpy(str, tocode, strlen(tocode));
73  str += strlen(tocode);
74  memcpy(str, "?B?", 3);
75  str += 3;
76 
77  while (buflen)
78  {
79  char encoded[11];
80  size_t ret;
81  size_t in_len = MIN(3, buflen);
82 
83  ret = mutt_b64_encode(buf, in_len, encoded, sizeof(encoded));
84  for (size_t i = 0; i < ret; i++)
85  *str++ = encoded[i];
86 
87  buflen -= in_len;
88  buf += in_len;
89  }
90 
91  memcpy(str, "?=", 2);
92  str += 2;
93  return str - s0;
94 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ q_encoder()

static size_t q_encoder ( char *  str,
const char *  buf,
size_t  buflen,
const char *  tocode 
)
static

Quoted-printable Encode a string - Implements encoder_t.

Definition at line 99 of file rfc2047.c.

100 {
101  static const char hex[] = "0123456789ABCDEF";
102  char *s0 = str;
103 
104  memcpy(str, "=?", 2);
105  str += 2;
106  memcpy(str, tocode, strlen(tocode));
107  str += strlen(tocode);
108  memcpy(str, "?Q?", 3);
109  str += 3;
110  while (buflen--)
111  {
112  unsigned char c = *buf++;
113  if (c == ' ')
114  *str++ = '_';
115  else if ((c >= 0x7f) || (c < 0x20) || (c == '_') || strchr(MimeSpecials, c))
116  {
117  *str++ = '=';
118  *str++ = hex[(c & 0xf0) >> 4];
119  *str++ = hex[c & 0x0f];
120  }
121  else
122  *str++ = c;
123  }
124  memcpy(str, "?=", 2);
125  str += 2;
126  return str - s0;
127 }
+ Here is the caller graph for this function:

◆ parse_encoded_word()

static char* parse_encoded_word ( char *  str,
enum ContentEncoding enc,
char **  charset,
size_t *  charsetlen,
char **  text,
size_t *  textlen 
)
static

Parse a string and report RFC2047 elements.

Parameters
[in]strString to parse
[out]encContent encoding found in the first RFC2047 word
[out]charsetCharset found in the first RFC2047 word
[out]charsetlenLength of the charset string found
[out]textStart of the first RFC2047 encoded text
[out]textlenLength of the encoded text found
Return values
ptrStart of the RFC2047 encoded word
NULLNone was found

Definition at line 140 of file rfc2047.c.

142 {
143  regmatch_t *match = mutt_prex_capture(PREX_RFC2047_ENCODED_WORD, str);
144  if (!match)
145  return NULL;
146 
147  const regmatch_t *mfull = &match[PREX_RFC2047_ENCODED_WORD_MATCH_FULL];
148  const regmatch_t *mcharset = &match[PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET];
149  const regmatch_t *mencoding = &match[PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING];
150  const regmatch_t *mtext = &match[PREX_RFC2047_ENCODED_WORD_MATCH_TEXT];
151 
152  /* Charset */
153  *charset = str + mutt_regmatch_start(mcharset);
154  *charsetlen = mutt_regmatch_len(mcharset);
155 
156  /* Encoding: either Q or B */
157  *enc = (tolower(str[mutt_regmatch_start(mencoding)]) == 'q') ? ENC_QUOTED_PRINTABLE : ENC_BASE64;
158 
159  *text = str + mutt_regmatch_start(mtext);
160  *textlen = mutt_regmatch_len(mtext);
161  return str + mutt_regmatch_start(mfull);
162 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ try_block()

static size_t try_block ( const char *  d,
size_t  dlen,
const char *  fromcode,
const char *  tocode,
encoder_t encoder,
size_t *  wlen 
)
static

Attempt to convert a block of text.

Parameters
dString to convert
dlenLength of string
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
0Success, string converted
>0Error, number of bytes that could be converted

If the data could be converted using encoder, then set *encoder and *wlen. Otherwise return an upper bound on the maximum length of the data which could be converted.

The data is converted from fromcode (which must be stateless) to tocode, unless fromcode is NULL, in which case the data is assumed to be already in tocode, which should be 8-bit and stateless.

Definition at line 183 of file rfc2047.c.

185 {
186  char buf[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
187  const char *ib = NULL;
188  char *ob = NULL;
189  size_t ibl, obl;
190  int count, len, len_b, len_q;
191 
192  if (fromcode)
193  {
194  iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, MUTT_ICONV_NO_FLAGS);
195  assert(cd != (iconv_t)(-1));
196  ib = d;
197  ibl = dlen;
198  ob = buf;
199  obl = sizeof(buf) - strlen(tocode);
200  if ((iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl) == (size_t)(-1)) ||
201  (iconv(cd, NULL, NULL, &ob, &obl) == (size_t)(-1)))
202  {
203  assert(errno == E2BIG);
204  iconv_close(cd);
205  assert(ib > d);
206  return ((ib - d) == dlen) ? dlen : ib - d + 1;
207  }
208  iconv_close(cd);
209  }
210  else
211  {
212  if (dlen > (sizeof(buf) - strlen(tocode)))
213  return sizeof(buf) - strlen(tocode) + 1;
214  memcpy(buf, d, dlen);
215  ob = buf + dlen;
216  }
217 
218  count = 0;
219  for (char *p = buf; p < ob; p++)
220  {
221  unsigned char c = *p;
222  assert(strchr(MimeSpecials, '?'));
223  if ((c >= 0x7f) || (c < 0x20) || (*p == '_') ||
224  ((c != ' ') && strchr(MimeSpecials, *p)))
225  {
226  count++;
227  }
228  }
229 
230  len = ENCWORD_LEN_MIN - 2 + strlen(tocode);
231  len_b = len + (((ob - buf) + 2) / 3) * 4;
232  len_q = len + (ob - buf) + 2 * count;
233 
234  /* Apparently RFC1468 says to use B encoding for iso-2022-jp. */
235  if (mutt_istr_equal(tocode, "ISO-2022-JP"))
236  len_q = ENCWORD_LEN_MAX + 1;
237 
238  if ((len_b < len_q) && (len_b <= ENCWORD_LEN_MAX))
239  {
240  *encoder = b_encoder;
241  *wlen = len_b;
242  return 0;
243  }
244  else if (len_q <= ENCWORD_LEN_MAX)
245  {
246  *encoder = q_encoder;
247  *wlen = len_q;
248  return 0;
249  }
250  else
251  return dlen;
252 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode_block()

static size_t encode_block ( char *  str,
char *  buf,
size_t  buflen,
const char *  fromcode,
const char *  tocode,
encoder_t  encoder 
)
static

Encode a block of text using an encoder.

Parameters
strString to convert
bufBuffer for result
buflenBuffer length
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
Return values
numLength of the encoded word

Encode the data (buf, buflen) into str using the encoder.

Definition at line 266 of file rfc2047.c.

268 {
269  if (!fromcode)
270  {
271  return (*encoder)(str, buf, buflen, tocode);
272  }
273 
274  const iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, MUTT_ICONV_NO_FLAGS);
275  assert(cd != (iconv_t)(-1));
276  const char *ib = buf;
277  size_t ibl = buflen;
278  char tmp[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
279  char *ob = tmp;
280  size_t obl = sizeof(tmp) - strlen(tocode);
281  const size_t n1 = iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl);
282  const size_t n2 = iconv(cd, NULL, NULL, &ob, &obl);
283  assert(n1 != (size_t)(-1) && n2 != (size_t)(-1));
284  iconv_close(cd);
285  return (*encoder)(str, tmp, ob - tmp, tocode);
286 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ choose_block()

static size_t choose_block ( char *  d,
size_t  dlen,
int  col,
const char *  fromcode,
const char *  tocode,
encoder_t encoder,
size_t *  wlen 
)
static

Calculate how much data can be converted.

Parameters
dString to convert
dlenLength of string
colStarting column to convert
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
numBytes that can be converted

Discover how much of the data (d, dlen) can be converted into a single encoded word. Return how much data can be converted, and set the length *wlen of the encoded word and *encoder. We start in column col, which limits the length of the word.

Definition at line 304 of file rfc2047.c.

306 {
307  const bool utf8 = fromcode && mutt_istr_equal(fromcode, "utf-8");
308 
309  size_t n = dlen;
310  while (true)
311  {
312  assert(n > 0);
313  const size_t nn = try_block(d, n, fromcode, tocode, encoder, wlen);
314  if ((nn == 0) && (((col + *wlen) <= (ENCWORD_LEN_MAX + 1)) || (n <= 1)))
315  break;
316  n = ((nn != 0) ? nn : n) - 1;
317  assert(n > 0);
318  if (utf8)
319  while ((n > 1) && CONTINUATION_BYTE(d[n]))
320  n--;
321  }
322  return n;
323 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ finalize_chunk()

static void finalize_chunk ( struct Buffer res,
struct Buffer buf,
char *  charset,
size_t  charsetlen 
)
static

Perform charset conversion and filtering.

Parameters
[out]resBuffer where the resulting string is appended
[in]bufBuffer with the input string
[in]charsetCharset to use for the conversion
[in]charsetlenLength of the charset parameter

The buffer buf is reinitialized at the end of this function.

Definition at line 334 of file rfc2047.c.

335 {
336  if (!charset)
337  return;
338  char end = charset[charsetlen];
339  charset[charsetlen] = '\0';
341  charset[charsetlen] = end;
343  mutt_buffer_addstr(res, buf->data);
344  FREE(&buf->data);
345  mutt_buffer_init(buf);
346 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ decode_word()

static char* decode_word ( const char *  s,
size_t  len,
enum ContentEncoding  enc 
)
static

Decode an RFC2047-encoded string.

Parameters
sString to decode
lenLength of the string
encEncoding type
Return values
ptrDecoded string
Note
The caller must free the returned string

Definition at line 357 of file rfc2047.c.

358 {
359  const char *it = s;
360  const char *end = s + len;
361 
362  if (enc == ENC_QUOTED_PRINTABLE)
363  {
364  struct Buffer buf = mutt_buffer_make(0);
365  for (; it < end; it++)
366  {
367  if (*it == '_')
368  {
369  mutt_buffer_addch(&buf, ' ');
370  }
371  else if ((it[0] == '=') && (!(it[1] & ~127) && (hexval(it[1]) != -1)) &&
372  (!(it[2] & ~127) && (hexval(it[2]) != -1)))
373  {
374  mutt_buffer_addch(&buf, (hexval(it[1]) << 4) | hexval(it[2]));
375  it += 2;
376  }
377  else
378  {
379  mutt_buffer_addch(&buf, *it);
380  }
381  }
382  mutt_buffer_addch(&buf, '\0');
383  return buf.data;
384  }
385  else if (enc == ENC_BASE64)
386  {
387  const int olen = 3 * len / 4 + 1;
388  char *out = mutt_mem_malloc(olen);
389  int dlen = mutt_b64_decode(it, out, olen);
390  if (dlen == -1)
391  {
392  FREE(&out);
393  return NULL;
394  }
395  out[dlen] = '\0';
396  return out;
397  }
398 
399  assert(0); /* The enc parameter has an invalid value */
400  return NULL;
401 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode()

static int encode ( const char *  d,
size_t  dlen,
int  col,
const char *  fromcode,
const char *  charsets,
char **  e,
size_t *  elen,
const char *  specials 
)
static

RFC2047-encode a string.

Parameters
[in]dString to convert
[in]dlenLength of string
[in]colStarting column to convert
[in]fromcodeOriginal encoding
[in]charsetsList of allowable encodings (colon separated)
[out]eEncoded string
[out]elenLength of encoded string
[in]specialsSpecial characters to be encoded
Return values
0Success

Definition at line 415 of file rfc2047.c.

417 {
418  int rc = 0;
419  char *buf = NULL;
420  size_t bufpos, buflen;
421  char *t0 = NULL, *t1 = NULL, *t = NULL;
422  char *s0 = NULL, *s1 = NULL;
423  size_t ulen, r, wlen = 0;
424  encoder_t encoder = NULL;
425  char *tocode1 = NULL;
426  const char *tocode = NULL;
427  const char *icode = "utf-8";
428 
429  /* Try to convert to UTF-8. */
430  char *u = mutt_strn_dup(d, dlen);
431  if (mutt_ch_convert_string(&u, fromcode, icode, MUTT_ICONV_NO_FLAGS) != 0)
432  {
433  rc = 1;
434  icode = 0;
435  }
436  ulen = mutt_str_len(u);
437 
438  /* Find earliest and latest things we must encode. */
439  s0 = 0;
440  s1 = 0;
441  t0 = 0;
442  t1 = 0;
443  for (t = u; t < (u + ulen); t++)
444  {
445  if ((*t & 0x80) || ((*t == '=') && (t[1] == '?') && ((t == u) || HSPACE(*(t - 1)))))
446  {
447  if (!t0)
448  t0 = t;
449  t1 = t;
450  }
451  else if (specials && *t && strchr(specials, *t))
452  {
453  if (!s0)
454  s0 = t;
455  s1 = t;
456  }
457  }
458 
459  /* If we have something to encode, include RFC822 specials */
460  if (t0 && s0 && (s0 < t0))
461  t0 = s0;
462  if (t1 && s1 && (s1 > t1))
463  t1 = s1;
464 
465  if (!t0)
466  {
467  /* No encoding is required. */
468  *e = u;
469  *elen = ulen;
470  return rc;
471  }
472 
473  /* Choose target charset. */
474  tocode = fromcode;
475  if (icode)
476  {
477  tocode1 = mutt_ch_choose(icode, charsets, u, ulen, 0, 0);
478  if (tocode1)
479  tocode = tocode1;
480  else
481  {
482  rc = 2;
483  icode = 0;
484  }
485  }
486 
487  /* Hack to avoid labelling 8-bit data as us-ascii. */
488  if (!icode && mutt_ch_is_us_ascii(tocode))
489  tocode = "unknown-8bit";
490 
491  /* Adjust t0 for maximum length of line. */
492  t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
493  if (t < u)
494  t = u;
495  if (t < t0)
496  t0 = t;
497 
498  /* Adjust t0 until we can encode a character after a space. */
499  for (; t0 > u; t0--)
500  {
501  if (!HSPACE(*(t0 - 1)))
502  continue;
503  t = t0 + 1;
504  if (icode)
505  while ((t < (u + ulen)) && CONTINUATION_BYTE(*t))
506  t++;
507  if ((try_block(t0, t - t0, icode, tocode, &encoder, &wlen) == 0) &&
508  ((col + (t0 - u) + wlen) <= (ENCWORD_LEN_MAX + 1)))
509  {
510  break;
511  }
512  }
513 
514  /* Adjust t1 until we can encode a character before a space. */
515  for (; t1 < (u + ulen); t1++)
516  {
517  if (!HSPACE(*t1))
518  continue;
519  t = t1 - 1;
520  if (icode)
521  while (CONTINUATION_BYTE(*t))
522  t--;
523  if ((try_block(t, t1 - t, icode, tocode, &encoder, &wlen) == 0) &&
524  ((1 + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1)))
525  {
526  break;
527  }
528  }
529 
530  /* We shall encode the region [t0,t1). */
531 
532  /* Initialise the output buffer with the us-ascii prefix. */
533  buflen = 2 * ulen;
534  buf = mutt_mem_malloc(buflen);
535  bufpos = t0 - u;
536  memcpy(buf, u, t0 - u);
537 
538  col += t0 - u;
539 
540  t = t0;
541  while (true)
542  {
543  /* Find how much we can encode. */
544  size_t n = choose_block(t, t1 - t, col, icode, tocode, &encoder, &wlen);
545  if (n == (t1 - t))
546  {
547  /* See if we can fit the us-ascii suffix, too. */
548  if ((col + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1))
549  break;
550  n = t1 - t - 1;
551  if (icode)
552  while (CONTINUATION_BYTE(t[n]))
553  n--;
554  if (n == 0)
555  {
556  /* This should only happen in the really stupid case where the
557  * only word that needs encoding is one character long, but
558  * there is too much us-ascii stuff after it to use a single
559  * encoded word. We add the next word to the encoded region
560  * and try again. */
561  assert(t1 < (u + ulen));
562  for (t1++; (t1 < (u + ulen)) && !HSPACE(*t1); t1++)
563  ; // do nothing
564 
565  continue;
566  }
567  n = choose_block(t, n, col, icode, tocode, &encoder, &wlen);
568  }
569 
570  /* Add to output buffer. */
571  const char *line_break = "\n\t";
572  const int lb_len = 2; /* strlen(line_break) */
573 
574  if ((bufpos + wlen + lb_len) > buflen)
575  {
576  buflen = bufpos + wlen + lb_len;
577  mutt_mem_realloc(&buf, buflen);
578  }
579  r = encode_block(buf + bufpos, t, n, icode, tocode, encoder);
580  assert(r == wlen);
581  bufpos += wlen;
582  memcpy(buf + bufpos, line_break, lb_len);
583  bufpos += lb_len;
584 
585  col = 1;
586 
587  t += n;
588  }
589 
590  /* Add last encoded word and us-ascii suffix to buffer. */
591  buflen = bufpos + wlen + (u + ulen - t1);
592  mutt_mem_realloc(&buf, buflen + 1);
593  r = encode_block(buf + bufpos, t, t1 - t, icode, tocode, encoder);
594  assert(r == wlen);
595  bufpos += wlen;
596  memcpy(buf + bufpos, t1, u + ulen - t1);
597 
598  FREE(&tocode1);
599  FREE(&u);
600 
601  buf[buflen] = '\0';
602 
603  *e = buf;
604  *elen = buflen + 1;
605  return rc;
606 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode()

void rfc2047_encode ( char **  pd,
const char *  specials,
int  col,
const char *  charsets 
)

RFC-2047-encode a string.

Parameters
[in,out]pdString to be encoded, and resulting encoded string
[in]specialsSpecial characters to be encoded
[in]colStarting index in string
[in]charsetsList of charsets to choose from

Definition at line 615 of file rfc2047.c.

616 {
617  if (!C_Charset || !pd || !*pd)
618  return;
619 
620  if (!charsets)
621  charsets = "utf-8";
622 
623  char *e = NULL;
624  size_t elen = 0;
625  encode(*pd, strlen(*pd), col, C_Charset, charsets, &e, &elen, specials);
626 
627  FREE(pd);
628  *pd = e;
629 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode()

void rfc2047_decode ( char **  pd)

Decode any RFC2047-encoded header fields.

Parameters
[in,out]pdString to be decoded, and resulting decoded string

Try to decode anything that looks like a valid RFC2047 encoded header field, ignoring RFC822 parsing rules. If decoding fails, for example due to an invalid base64 string, the original input is left untouched.

Definition at line 639 of file rfc2047.c.

640 {
641  if (!pd || !*pd)
642  return;
643 
644  struct Buffer buf = mutt_buffer_make(0); /* Output buffer */
645  char *s = *pd; /* Read pointer */
646  char *beg = NULL; /* Begin of encoded word */
647  enum ContentEncoding enc; /* ENC_BASE64 or ENC_QUOTED_PRINTABLE */
648  char *charset = NULL; /* Which charset */
649  size_t charsetlen; /* Length of the charset */
650  char *text = NULL; /* Encoded text */
651  size_t textlen; /* Length of encoded text */
652 
653  /* Keep some state in case the next decoded word is using the same charset
654  * and it happens to be split in the middle of a multibyte character.
655  * See https://github.com/neomutt/neomutt/issues/1015 */
656  struct Buffer prev = mutt_buffer_make(0); /* Previously decoded word */
657  char *prev_charset = NULL; /* Previously used charset */
658  size_t prev_charsetlen = 0; /* Length of the previously used charset */
659 
660  while (*s)
661  {
662  beg = parse_encoded_word(s, &enc, &charset, &charsetlen, &text, &textlen);
663  if (beg != s)
664  {
665  /* Some non-encoded text was found */
666  size_t holelen = beg ? beg - s : mutt_str_len(s);
667 
668  /* Ignore whitespace between encoded words */
669  if (beg && (mutt_str_lws_len(s, holelen) == holelen))
670  {
671  s = beg;
672  continue;
673  }
674 
675  /* If we have some previously decoded text, add it now */
676  if (!mutt_buffer_is_empty(&prev))
677  {
678  finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
679  }
680 
681  /* Add non-encoded part */
682  {
683  if (C_AssumedCharset)
684  {
685  char *conv = mutt_strn_dup(s, holelen);
687  mutt_buffer_addstr(&buf, conv);
688  FREE(&conv);
689  }
690  else
691  {
692  mutt_buffer_addstr_n(&buf, s, holelen);
693  }
694  }
695  s += holelen;
696  }
697  if (beg)
698  {
699  /* Some encoded text was found */
700  text[textlen] = '\0';
701  char *decoded = decode_word(text, textlen, enc);
702  if (!decoded)
703  {
704  return;
705  }
706  if (prev.data && ((prev_charsetlen != charsetlen) ||
707  !mutt_strn_equal(prev_charset, charset, charsetlen)))
708  {
709  /* Different charset, convert the previous chunk and add it to the
710  * final result */
711  finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
712  }
713 
714  mutt_buffer_addstr(&prev, decoded);
715  FREE(&decoded);
716  prev_charset = charset;
717  prev_charsetlen = charsetlen;
718  s = text + textlen + 2; /* Skip final ?= */
719  }
720  }
721 
722  /* Save the last chunk */
723  if (prev.data)
724  {
725  finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
726  }
727 
728  mutt_buffer_addch(&buf, '\0');
729  FREE(pd);
730  *pd = buf.data;
731 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_addrlist()

void rfc2047_encode_addrlist ( struct AddressList *  al,
const char *  tag 
)

Encode any RFC2047 headers, where required, in an Address list.

Parameters
alAddressList
tagHeader tag (used for wrapping calculation)

Definition at line 738 of file rfc2047.c.

739 {
740  if (!al)
741  return;
742 
743  int col = tag ? strlen(tag) + 2 : 32;
744  struct Address *a = NULL;
745  TAILQ_FOREACH(a, al, entries)
746  {
747  if (a->personal)
749  else if (a->group && a->mailbox)
751  }
752 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_addrlist()

void rfc2047_decode_addrlist ( struct AddressList *  al)

Decode any RFC2047 headers in an Address list.

Parameters
alAddressList

Definition at line 758 of file rfc2047.c.

759 {
760  if (!al)
761  return;
762 
763  struct Address *a = NULL;
764  TAILQ_FOREACH(a, al, entries)
765  {
766  if (a->personal && ((strstr(a->personal, "=?")) || C_AssumedCharset))
767  {
769  }
770  else if (a->group && a->mailbox && strstr(a->mailbox, "=?"))
771  rfc2047_decode(&a->mailbox);
772  }
773 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_envelope()

void rfc2047_decode_envelope ( struct Envelope env)

Decode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 779 of file rfc2047.c.

780 {
781  if (!env)
782  return;
791  rfc2047_decode(&env->x_label);
792  rfc2047_decode(&env->subject);
793 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_envelope()

void rfc2047_encode_envelope ( struct Envelope env)

Encode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 799 of file rfc2047.c.

800 {
801  if (!env)
802  return;
803  rfc2047_encode_addrlist(&env->from, "From");
804  rfc2047_encode_addrlist(&env->to, "To");
805  rfc2047_encode_addrlist(&env->cc, "Cc");
806  rfc2047_encode_addrlist(&env->bcc, "Bcc");
807  rfc2047_encode_addrlist(&env->reply_to, "Reply-To");
808  rfc2047_encode_addrlist(&env->mail_followup_to, "Mail-Followup-To");
809  rfc2047_encode_addrlist(&env->sender, "Sender");
810  rfc2047_encode(&env->x_label, NULL, sizeof("X-Label:"), C_SendCharset);
811  rfc2047_encode(&env->subject, NULL, sizeof("Subject:"), C_SendCharset);
812 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:
Envelope::subject
char * subject
Email's subject.
Definition: envelope.h:66
Envelope::bcc
struct AddressList bcc
Email's 'Bcc' list.
Definition: envelope.h:60
ENC_QUOTED_PRINTABLE
@ ENC_QUOTED_PRINTABLE
Quoted-printable text.
Definition: mime.h:51
CONTINUATION_BYTE
#define CONTINUATION_BYTE(ch)
Definition: rfc2047.c:51
mutt_mb_filter_unprintable
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
Definition: mbyte.c:424
q_encoder
static size_t q_encoder(char *str, const char *buf, size_t buflen, const char *tocode)
Quoted-printable Encode a string - Implements encoder_t.
Definition: rfc2047.c:99
choose_block
static size_t choose_block(char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Calculate how much data can be converted.
Definition: rfc2047.c:304
mutt_strn_equal
bool mutt_strn_equal(const char *a, const char *b, size_t num)
Check for equality of two strings (to a maximum), safely.
Definition: string.c:593
Address::personal
char * personal
Real name of address.
Definition: address.h:36
Buffer
String manipulation buffer.
Definition: buffer.h:33
PREX_RFC2047_ENCODED_WORD_MATCH_TEXT
@ PREX_RFC2047_ENCODED_WORD_MATCH_TEXT
=?utf-8?Q?[=E8=81...]?=
Definition: prex.h:97
mutt_buffer_is_empty
bool mutt_buffer_is_empty(const struct Buffer *buf)
Is the Buffer empty?
Definition: buffer.c:252
decode_word
static char * decode_word(const char *s, size_t len, enum ContentEncoding enc)
Decode an RFC2047-encoded string.
Definition: rfc2047.c:357
mutt_b64_decode
int mutt_b64_decode(const char *in, char *out, size_t olen)
Convert null-terminated base64 string to raw bytes.
Definition: base64.c:136
MUTT_ICONV_NO_FLAGS
#define MUTT_ICONV_NO_FLAGS
No flags are set.
Definition: charset.h:73
Envelope::x_label
char * x_label
X-Label.
Definition: envelope.h:72
encoder_t
size_t(* encoder_t)(char *str, const char *buf, size_t buflen, const char *tocode)
Prototype for an encoding function.
Definition: rfc2047.c:61
parse_encoded_word
static char * parse_encoded_word(char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
Parse a string and report RFC2047 elements.
Definition: rfc2047.c:140
ContentEncoding
ContentEncoding
Content-Transfer-Encoding.
Definition: mime.h:46
TAILQ_FOREACH
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:718
mutt_buffer_init
struct Buffer * mutt_buffer_init(struct Buffer *buf)
Initialise a new Buffer.
Definition: buffer.c:46
PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET
@ PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET
=?[utf-8]?Q?=E8=81...?=
Definition: prex.h:95
FREE
#define FREE(x)
Definition: memory.h:40
rfc2047_decode_addrlist
void rfc2047_decode_addrlist(struct AddressList *al)
Decode any RFC2047 headers in an Address list.
Definition: rfc2047.c:758
MimeSpecials
const char MimeSpecials[]
Characters that need special treatment in MIME.
Definition: mime.c:67
mutt_ch_convert_string
int mutt_ch_convert_string(char **ps, const char *from, const char *to, uint8_t flags)
Convert a string between encodings.
Definition: charset.c:754
mutt_buffer_addstr_n
size_t mutt_buffer_addstr_n(struct Buffer *buf, const char *s, size_t len)
Add a string to a Buffer, expanding it if necessary.
Definition: buffer.c:99
try_block
static size_t try_block(const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Attempt to convert a block of text.
Definition: rfc2047.c:183
mutt_ch_iconv_open
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, uint8_t flags)
Set up iconv for conversions.
Definition: charset.c:565
mutt_istr_equal
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:883
rfc2047_encode_addrlist
void rfc2047_encode_addrlist(struct AddressList *al, const char *tag)
Encode any RFC2047 headers, where required, in an Address list.
Definition: rfc2047.c:738
Envelope::reply_to
struct AddressList reply_to
Email's 'reply-to'.
Definition: envelope.h:62
AddressSpecials
const char AddressSpecials[]
Characters with special meaning for email addresses.
Definition: address.c:42
PREX_RFC2047_ENCODED_WORD_MATCH_FULL
@ PREX_RFC2047_ENCODED_WORD_MATCH_FULL
[=?utf-8?Q?=E8=81...?=]
Definition: prex.h:94
MUTT_ICONV_HOOK_FROM
#define MUTT_ICONV_HOOK_FROM
apply charset-hooks to fromcode
Definition: charset.h:74
Envelope::cc
struct AddressList cc
Email's 'Cc' list.
Definition: envelope.h:59
mutt_buffer_addch
size_t mutt_buffer_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
Definition: buffer.c:240
mutt_regmatch_start
static regoff_t mutt_regmatch_start(const regmatch_t *match)
Return the start of a match.
Definition: regex3.h:61
PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING
@ PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING
=?utf-8?[Q]?=E8=81...?=
Definition: prex.h:96
mutt_b64_encode
size_t mutt_b64_encode(const char *in, size_t inlen, char *out, size_t outlen)
Convert raw bytes to null-terminated base64 string.
Definition: base64.c:88
mutt_regmatch_len
static size_t mutt_regmatch_len(const regmatch_t *match)
Return the length of a match.
Definition: regex3.h:81
rfc2047_decode
void rfc2047_decode(char **pd)
Decode any RFC2047-encoded header fields.
Definition: rfc2047.c:639
Address::group
bool group
Group mailbox?
Definition: address.h:38
encode_block
static size_t encode_block(char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
Encode a block of text using an encoder.
Definition: rfc2047.c:266
rfc2047_encode
void rfc2047_encode(char **pd, const char *specials, int col, const char *charsets)
RFC-2047-encode a string.
Definition: rfc2047.c:615
mutt_ch_is_us_ascii
#define mutt_ch_is_us_ascii(str)
Definition: charset.h:98
Envelope::to
struct AddressList to
Email's 'To' list.
Definition: envelope.h:58
mutt_mem_realloc
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
HSPACE
#define HSPACE(ch)
Definition: rfc2047.c:49
mutt_ch_convert_nonmime_string
int mutt_ch_convert_nonmime_string(char **ps)
Try to convert a string using a list of character sets.
Definition: charset.c:309
C_SendCharset
char * C_SendCharset
Config: Character sets for outgoing mail.
Definition: globals.c:38
mutt_str_len
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:631
b_encoder
static size_t b_encoder(char *str, const char *buf, size_t buflen, const char *tocode)
Base64 Encode a string - Implements encoder_t.
Definition: rfc2047.c:66
Address::mailbox
char * mailbox
Mailbox and host address.
Definition: address.h:37
ENCWORD_LEN_MAX
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:46
Envelope::from
struct AddressList from
Email's 'From' list.
Definition: envelope.h:57
ENC_BASE64
@ ENC_BASE64
Base-64 encoded text.
Definition: mime.h:52
mutt_mem_malloc
void * mutt_mem_malloc(size_t size)
Allocate memory on the heap.
Definition: memory.c:90
mutt_strn_dup
char * mutt_strn_dup(const char *begin, size_t len)
Duplicate a sub-string.
Definition: string.c:548
C_AssumedCharset
char * C_AssumedCharset
Config: If a message is missing a character set, assume this character set.
Definition: charset.c:52
PREX_RFC2047_ENCODED_WORD
@ PREX_RFC2047_ENCODED_WORD
[=?utf-8?Q?=E8=81=AA=E6=98=8E=E7=9A=84?=]
Definition: prex.h:35
hexval
#define hexval(ch)
Definition: mime.h:80
ENCWORD_LEN_MIN
#define ENCWORD_LEN_MIN
Definition: rfc2047.c:47
mutt_buffer_addstr
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:225
Envelope::return_path
struct AddressList return_path
Return path for the Email.
Definition: envelope.h:56
finalize_chunk
static void finalize_chunk(struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
Perform charset conversion and filtering.
Definition: rfc2047.c:334
C_Charset
char * C_Charset
Config: Default character set for displaying text on screen.
Definition: charset.c:53
Buffer::data
char * data
Pointer to data.
Definition: buffer.h:35
Envelope::mail_followup_to
struct AddressList mail_followup_to
Email's 'mail-followup-to'.
Definition: envelope.h:63
mutt_buffer_make
struct Buffer mutt_buffer_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:61
Envelope::sender
struct AddressList sender
Email's sender.
Definition: envelope.h:61
encode
static int encode(const char *d, size_t dlen, int col, const char *fromcode, const char *charsets, char **e, size_t *elen, const char *specials)
RFC2047-encode a string.
Definition: rfc2047.c:415
Address
An email address.
Definition: address.h:34
mutt_ch_choose
char * mutt_ch_choose(const char *fromcode, const char *charsets, const char *u, size_t ulen, char **d, size_t *dlen)
Figure the best charset to encode a string.
Definition: charset.c:1036
mutt_str_lws_len
size_t mutt_str_lws_len(const char *s, size_t n)
Measure the linear-white-space at the beginning of a string.
Definition: string.c:773
MIN
#define MIN(a, b)
Definition: memory.h:31
mutt_prex_capture
regmatch_t * mutt_prex_capture(enum Prex which, const char *str)