NeoMutt  2021-02-05-666-ge300cd
Teaching an old dog new tricks
DOXYGEN
rfc2047.c File Reference

RFC2047 MIME extensions encoding / decoding routines. More...

#include "config.h"
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <iconv.h>
#include <stdbool.h>
#include <string.h>
#include "mutt/lib.h"
#include "address/lib.h"
#include "config/lib.h"
#include "core/lib.h"
#include "rfc2047.h"
#include "envelope.h"
#include "mime.h"
+ Include dependency graph for rfc2047.c:

Go to the source code of this file.

Macros

#define ENCWORD_LEN_MAX   75
 
#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */
 
#define HSPACE(ch)   (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))
 
#define CONTINUATION_BYTE(ch)   (((ch) &0xc0) == 0x80)
 

Typedefs

typedef size_t(* encoder_t) (char *str, const char *buf, size_t buflen, const char *tocode)
 Prototype for an encoding function. More...
 

Functions

static size_t b_encoder (char *str, const char *buf, size_t buflen, const char *tocode)
 Base64 Encode a string - Implements encoder_t. More...
 
static size_t q_encoder (char *str, const char *buf, size_t buflen, const char *tocode)
 Quoted-printable Encode a string - Implements encoder_t. More...
 
static char * parse_encoded_word (char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
 Parse a string and report RFC2047 elements. More...
 
static size_t try_block (const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Attempt to convert a block of text. More...
 
static size_t encode_block (char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
 Encode a block of text using an encoder. More...
 
static size_t choose_block (char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Calculate how much data can be converted. More...
 
static void finalize_chunk (struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
 Perform charset conversion and filtering. More...
 
static char * decode_word (const char *s, size_t len, enum ContentEncoding enc)
 Decode an RFC2047-encoded string. More...
 
static int encode (const char *d, size_t dlen, int col, const char *fromcode, const char *charsets, char **e, size_t *elen, const char *specials)
 RFC2047-encode a string. More...
 
void rfc2047_encode (char **pd, const char *specials, int col, const char *charsets)
 RFC-2047-encode a string. More...
 
void rfc2047_decode (char **pd)
 Decode any RFC2047-encoded header fields. More...
 
void rfc2047_encode_addrlist (struct AddressList *al, const char *tag)
 Encode any RFC2047 headers, where required, in an Address list. More...
 
void rfc2047_decode_addrlist (struct AddressList *al)
 Decode any RFC2047 headers in an Address list. More...
 
void rfc2047_decode_envelope (struct Envelope *env)
 Decode the fields of an Envelope. More...
 
void rfc2047_encode_envelope (struct Envelope *env)
 Encode the fields of an Envelope. More...
 

Detailed Description

RFC2047 MIME extensions encoding / decoding routines.

Authors
  • Michael R. Elkins
  • Edmund Grimley Evans
  • Pietro Cerutti

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file rfc2047.c.

Macro Definition Documentation

◆ ENCWORD_LEN_MAX

#define ENCWORD_LEN_MAX   75

Definition at line 46 of file rfc2047.c.

◆ ENCWORD_LEN_MIN

#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */

Definition at line 47 of file rfc2047.c.

◆ HSPACE

#define HSPACE (   ch)    (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))

Definition at line 49 of file rfc2047.c.

◆ CONTINUATION_BYTE

#define CONTINUATION_BYTE (   ch)    (((ch) &0xc0) == 0x80)

Definition at line 51 of file rfc2047.c.

Typedef Documentation

◆ encoder_t

typedef size_t(* encoder_t) (char *str, const char *buf, size_t buflen, const char *tocode)

Prototype for an encoding function.

Parameters
strString to encode
bufBuffer for result
buflenLength of buffer
tocodeCharacter encoding
Return values
numBytes written to buffer

Definition at line 61 of file rfc2047.c.

Function Documentation

◆ b_encoder()

static size_t b_encoder ( char *  str,
const char *  buf,
size_t  buflen,
const char *  tocode 
)
static

Base64 Encode a string - Implements encoder_t.

Definition at line 66 of file rfc2047.c.

67 {
68  char *s0 = str;
69 
70  memcpy(str, "=?", 2);
71  str += 2;
72  memcpy(str, tocode, strlen(tocode));
73  str += strlen(tocode);
74  memcpy(str, "?B?", 3);
75  str += 3;
76 
77  while (buflen)
78  {
79  char encoded[11];
80  size_t ret;
81  size_t in_len = MIN(3, buflen);
82 
83  ret = mutt_b64_encode(buf, in_len, encoded, sizeof(encoded));
84  for (size_t i = 0; i < ret; i++)
85  *str++ = encoded[i];
86 
87  buflen -= in_len;
88  buf += in_len;
89  }
90 
91  memcpy(str, "?=", 2);
92  str += 2;
93  return str - s0;
94 }
#define MIN(a, b)
Definition: memory.h:31
size_t mutt_b64_encode(const char *in, size_t inlen, char *out, size_t outlen)
Convert raw bytes to null-terminated base64 string.
Definition: base64.c:88
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ q_encoder()

static size_t q_encoder ( char *  str,
const char *  buf,
size_t  buflen,
const char *  tocode 
)
static

Quoted-printable Encode a string - Implements encoder_t.

Definition at line 99 of file rfc2047.c.

100 {
101  static const char hex[] = "0123456789ABCDEF";
102  char *s0 = str;
103 
104  memcpy(str, "=?", 2);
105  str += 2;
106  memcpy(str, tocode, strlen(tocode));
107  str += strlen(tocode);
108  memcpy(str, "?Q?", 3);
109  str += 3;
110  while (buflen--)
111  {
112  unsigned char c = *buf++;
113  if (c == ' ')
114  *str++ = '_';
115  else if ((c >= 0x7f) || (c < 0x20) || (c == '_') || strchr(MimeSpecials, c))
116  {
117  *str++ = '=';
118  *str++ = hex[(c & 0xf0) >> 4];
119  *str++ = hex[c & 0x0f];
120  }
121  else
122  *str++ = c;
123  }
124  memcpy(str, "?=", 2);
125  str += 2;
126  return str - s0;
127 }
const char MimeSpecials[]
Characters that need special treatment in MIME.
Definition: mime.c:67
+ Here is the caller graph for this function:

◆ parse_encoded_word()

static char* parse_encoded_word ( char *  str,
enum ContentEncoding enc,
char **  charset,
size_t *  charsetlen,
char **  text,
size_t *  textlen 
)
static

Parse a string and report RFC2047 elements.

Parameters
[in]strString to parse
[out]encContent encoding found in the first RFC2047 word
[out]charsetCharset found in the first RFC2047 word
[out]charsetlenLength of the charset string found
[out]textStart of the first RFC2047 encoded text
[out]textlenLength of the encoded text found
Return values
ptrStart of the RFC2047 encoded word
NULLNone was found

Definition at line 140 of file rfc2047.c.

142 {
143  regmatch_t *match = mutt_prex_capture(PREX_RFC2047_ENCODED_WORD, str);
144  if (!match)
145  return NULL;
146 
147  const regmatch_t *mfull = &match[PREX_RFC2047_ENCODED_WORD_MATCH_FULL];
148  const regmatch_t *mcharset = &match[PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET];
149  const regmatch_t *mencoding = &match[PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING];
150  const regmatch_t *mtext = &match[PREX_RFC2047_ENCODED_WORD_MATCH_TEXT];
151 
152  /* Charset */
153  *charset = str + mutt_regmatch_start(mcharset);
154  *charsetlen = mutt_regmatch_len(mcharset);
155 
156  /* Encoding: either Q or B */
157  *enc = (tolower(str[mutt_regmatch_start(mencoding)]) == 'q') ? ENC_QUOTED_PRINTABLE : ENC_BASE64;
158 
159  *text = str + mutt_regmatch_start(mtext);
160  *textlen = mutt_regmatch_len(mtext);
161  return str + mutt_regmatch_start(mfull);
162 }
=?[utf-8]?Q?=E8=81...?=
Definition: prex.h:95
[=?utf-8?Q?=E8=81...?=]
Definition: prex.h:94
=?utf-8?[Q]?=E8=81...?=
Definition: prex.h:96
static size_t mutt_regmatch_len(const regmatch_t *match)
Return the length of a match.
Definition: regex3.h:81
=?utf-8?Q?[=E8=81...]?=
Definition: prex.h:97
Base-64 encoded text.
Definition: mime.h:52
regmatch_t * mutt_prex_capture(enum Prex which, const char *str)
match a precompiled regex against a string
Definition: prex.c:301
[=?utf-8?Q?=E8=81=AA=E6=98=8E=E7=9A=84?=]
Definition: prex.h:35
static regoff_t mutt_regmatch_start(const regmatch_t *match)
Return the start of a match.
Definition: regex3.h:61
Quoted-printable text.
Definition: mime.h:51
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ try_block()

static size_t try_block ( const char *  d,
size_t  dlen,
const char *  fromcode,
const char *  tocode,
encoder_t encoder,
size_t *  wlen 
)
static

Attempt to convert a block of text.

Parameters
dString to convert
dlenLength of string
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
0Success, string converted
>0Error, number of bytes that could be converted

If the data could be converted using encoder, then set *encoder and *wlen. Otherwise return an upper bound on the maximum length of the data which could be converted.

The data is converted from fromcode (which must be stateless) to tocode, unless fromcode is NULL, in which case the data is assumed to be already in tocode, which should be 8-bit and stateless.

Definition at line 183 of file rfc2047.c.

185 {
186  char buf[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
187  const char *ib = NULL;
188  char *ob = NULL;
189  size_t ibl, obl;
190  int count, len, len_b, len_q;
191 
192  if (fromcode)
193  {
194  iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, MUTT_ICONV_NO_FLAGS);
195  assert(cd != (iconv_t) (-1));
196  ib = d;
197  ibl = dlen;
198  ob = buf;
199  obl = sizeof(buf) - strlen(tocode);
200  if ((iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl) == (size_t) (-1)) ||
201  (iconv(cd, NULL, NULL, &ob, &obl) == (size_t) (-1)))
202  {
203  assert(errno == E2BIG);
204  iconv_close(cd);
205  assert(ib > d);
206  return ((ib - d) == dlen) ? dlen : ib - d + 1;
207  }
208  iconv_close(cd);
209  }
210  else
211  {
212  if (dlen > (sizeof(buf) - strlen(tocode)))
213  return sizeof(buf) - strlen(tocode) + 1;
214  memcpy(buf, d, dlen);
215  ob = buf + dlen;
216  }
217 
218  count = 0;
219  for (char *p = buf; p < ob; p++)
220  {
221  unsigned char c = *p;
222  assert(strchr(MimeSpecials, '?'));
223  if ((c >= 0x7f) || (c < 0x20) || (*p == '_') ||
224  ((c != ' ') && strchr(MimeSpecials, *p)))
225  {
226  count++;
227  }
228  }
229 
230  len = ENCWORD_LEN_MIN - 2 + strlen(tocode);
231  len_b = len + (((ob - buf) + 2) / 3) * 4;
232  len_q = len + (ob - buf) + 2 * count;
233 
234  /* Apparently RFC1468 says to use B encoding for iso-2022-jp. */
235  if (mutt_istr_equal(tocode, "ISO-2022-JP"))
236  len_q = ENCWORD_LEN_MAX + 1;
237 
238  if ((len_b < len_q) && (len_b <= ENCWORD_LEN_MAX))
239  {
240  *encoder = b_encoder;
241  *wlen = len_b;
242  return 0;
243  }
244  else if (len_q <= ENCWORD_LEN_MAX)
245  {
246  *encoder = q_encoder;
247  *wlen = len_q;
248  return 0;
249  }
250  else
251  return dlen;
252 }
static size_t q_encoder(char *str, const char *buf, size_t buflen, const char *tocode)
Quoted-printable Encode a string - Implements encoder_t.
Definition: rfc2047.c:99
#define MUTT_ICONV_NO_FLAGS
No flags are set.
Definition: charset.h:71
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, uint8_t flags)
Set up iconv for conversions.
Definition: charset.c:569
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:916
const char MimeSpecials[]
Characters that need special treatment in MIME.
Definition: mime.c:67
static size_t b_encoder(char *str, const char *buf, size_t buflen, const char *tocode)
Base64 Encode a string - Implements encoder_t.
Definition: rfc2047.c:66
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:46
#define ENCWORD_LEN_MIN
Definition: rfc2047.c:47
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode_block()

static size_t encode_block ( char *  str,
char *  buf,
size_t  buflen,
const char *  fromcode,
const char *  tocode,
encoder_t  encoder 
)
static

Encode a block of text using an encoder.

Parameters
strString to convert
bufBuffer for result
buflenBuffer length
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
Return values
numLength of the encoded word

Encode the data (buf, buflen) into str using the encoder.

Definition at line 266 of file rfc2047.c.

268 {
269  if (!fromcode)
270  {
271  return (*encoder)(str, buf, buflen, tocode);
272  }
273 
274  const iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, MUTT_ICONV_NO_FLAGS);
275  assert(cd != (iconv_t) (-1));
276  const char *ib = buf;
277  size_t ibl = buflen;
278  char tmp[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
279  char *ob = tmp;
280  size_t obl = sizeof(tmp) - strlen(tocode);
281  const size_t n1 = iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl);
282  const size_t n2 = iconv(cd, NULL, NULL, &ob, &obl);
283  assert(n1 != (size_t) (-1) && n2 != (size_t) (-1));
284  iconv_close(cd);
285  return (*encoder)(str, tmp, ob - tmp, tocode);
286 }
#define MUTT_ICONV_NO_FLAGS
No flags are set.
Definition: charset.h:71
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, uint8_t flags)
Set up iconv for conversions.
Definition: charset.c:569
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:46
#define ENCWORD_LEN_MIN
Definition: rfc2047.c:47
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ choose_block()

static size_t choose_block ( char *  d,
size_t  dlen,
int  col,
const char *  fromcode,
const char *  tocode,
encoder_t encoder,
size_t *  wlen 
)
static

Calculate how much data can be converted.

Parameters
dString to convert
dlenLength of string
colStarting column to convert
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
numBytes that can be converted

Discover how much of the data (d, dlen) can be converted into a single encoded word. Return how much data can be converted, and set the length *wlen of the encoded word and *encoder. We start in column col, which limits the length of the word.

Definition at line 304 of file rfc2047.c.

306 {
307  const bool utf8 = fromcode && mutt_istr_equal(fromcode, "utf-8");
308 
309  size_t n = dlen;
310  while (true)
311  {
312  assert(n > 0);
313  const size_t nn = try_block(d, n, fromcode, tocode, encoder, wlen);
314  if ((nn == 0) && (((col + *wlen) <= (ENCWORD_LEN_MAX + 1)) || (n <= 1)))
315  break;
316  n = ((nn != 0) ? nn : n) - 1;
317  assert(n > 0);
318  if (utf8)
319  while ((n > 1) && CONTINUATION_BYTE(d[n]))
320  n--;
321  }
322  return n;
323 }
#define CONTINUATION_BYTE(ch)
Definition: rfc2047.c:51
static size_t try_block(const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Attempt to convert a block of text.
Definition: rfc2047.c:183
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:916
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:46
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ finalize_chunk()

static void finalize_chunk ( struct Buffer res,
struct Buffer buf,
char *  charset,
size_t  charsetlen 
)
static

Perform charset conversion and filtering.

Parameters
[out]resBuffer where the resulting string is appended
[in]bufBuffer with the input string
[in]charsetCharset to use for the conversion
[in]charsetlenLength of the charset parameter

The buffer buf is reinitialized at the end of this function.

Definition at line 334 of file rfc2047.c.

335 {
336  if (!charset)
337  return;
338  char end = charset[charsetlen];
339  charset[charsetlen] = '\0';
340  const char *const c_charset = cs_subset_string(NeoMutt->sub, "charset");
341  mutt_ch_convert_string(&buf->data, charset, c_charset, MUTT_ICONV_HOOK_FROM);
342  charset[charsetlen] = end;
344  mutt_buffer_addstr(res, buf->data);
345  FREE(&buf->data);
346  mutt_buffer_init(buf);
347 }
int mutt_ch_convert_string(char **ps, const char *from, const char *to, uint8_t flags)
Convert a string between encodings.
Definition: charset.c:758
Container for Accounts, Notifications.
Definition: neomutt.h:36
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:225
#define MUTT_ICONV_HOOK_FROM
apply charset-hooks to fromcode
Definition: charset.h:72
char * data
Pointer to data.
Definition: buffer.h:35
const char * cs_subset_string(const struct ConfigSubset *sub, const char *name)
Get a string config item by name.
Definition: helpers.c:317
#define FREE(x)
Definition: memory.h:40
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
Definition: mbyte.c:423
struct ConfigSubset * sub
Inherited config items.
Definition: neomutt.h:39
struct Buffer * mutt_buffer_init(struct Buffer *buf)
Initialise a new Buffer.
Definition: buffer.c:46
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ decode_word()

static char* decode_word ( const char *  s,
size_t  len,
enum ContentEncoding  enc 
)
static

Decode an RFC2047-encoded string.

Parameters
sString to decode
lenLength of the string
encEncoding type
Return values
ptrDecoded string
Note
The caller must free the returned string

Definition at line 358 of file rfc2047.c.

359 {
360  const char *it = s;
361  const char *end = s + len;
362 
363  if (enc == ENC_QUOTED_PRINTABLE)
364  {
365  struct Buffer buf = mutt_buffer_make(0);
366  for (; it < end; it++)
367  {
368  if (*it == '_')
369  {
370  mutt_buffer_addch(&buf, ' ');
371  }
372  else if ((it[0] == '=') && (!(it[1] & ~127) && (hexval(it[1]) != -1)) &&
373  (!(it[2] & ~127) && (hexval(it[2]) != -1)))
374  {
375  mutt_buffer_addch(&buf, (hexval(it[1]) << 4) | hexval(it[2]));
376  it += 2;
377  }
378  else
379  {
380  mutt_buffer_addch(&buf, *it);
381  }
382  }
383  mutt_buffer_addch(&buf, '\0');
384  return buf.data;
385  }
386  else if (enc == ENC_BASE64)
387  {
388  const int olen = 3 * len / 4 + 1;
389  char *out = mutt_mem_malloc(olen);
390  int dlen = mutt_b64_decode(it, out, olen);
391  if (dlen == -1)
392  {
393  FREE(&out);
394  return NULL;
395  }
396  out[dlen] = '\0';
397  return out;
398  }
399 
400  assert(0); /* The enc parameter has an invalid value */
401  return NULL;
402 }
struct Buffer mutt_buffer_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:61
String manipulation buffer.
Definition: buffer.h:33
Base-64 encoded text.
Definition: mime.h:52
void * mutt_mem_malloc(size_t size)
Allocate memory on the heap.
Definition: memory.c:90
char * data
Pointer to data.
Definition: buffer.h:35
#define hexval(ch)
Definition: mime.h:80
size_t mutt_buffer_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
Definition: buffer.c:240
Quoted-printable text.
Definition: mime.h:51
#define FREE(x)
Definition: memory.h:40
int mutt_b64_decode(const char *in, char *out, size_t olen)
Convert null-terminated base64 string to raw bytes.
Definition: base64.c:136
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode()

static int encode ( const char *  d,
size_t  dlen,
int  col,
const char *  fromcode,
const char *  charsets,
char **  e,
size_t *  elen,
const char *  specials 
)
static

RFC2047-encode a string.

Parameters
[in]dString to convert
[in]dlenLength of string
[in]colStarting column to convert
[in]fromcodeOriginal encoding
[in]charsetsList of allowable encodings (colon separated)
[out]eEncoded string
[out]elenLength of encoded string
[in]specialsSpecial characters to be encoded
Return values
0Success

Definition at line 416 of file rfc2047.c.

418 {
419  int rc = 0;
420  char *buf = NULL;
421  size_t bufpos, buflen;
422  char *t0 = NULL, *t1 = NULL, *t = NULL;
423  char *s0 = NULL, *s1 = NULL;
424  size_t ulen, r, wlen = 0;
425  encoder_t encoder = NULL;
426  char *tocode1 = NULL;
427  const char *tocode = NULL;
428  const char *icode = "utf-8";
429 
430  /* Try to convert to UTF-8. */
431  char *u = mutt_strn_dup(d, dlen);
432  if (mutt_ch_convert_string(&u, fromcode, icode, MUTT_ICONV_NO_FLAGS) != 0)
433  {
434  rc = 1;
435  icode = 0;
436  }
437  ulen = mutt_str_len(u);
438 
439  /* Find earliest and latest things we must encode. */
440  s0 = 0;
441  s1 = 0;
442  t0 = 0;
443  t1 = 0;
444  for (t = u; t < (u + ulen); t++)
445  {
446  if ((*t & 0x80) || ((*t == '=') && (t[1] == '?') && ((t == u) || HSPACE(*(t - 1)))))
447  {
448  if (!t0)
449  t0 = t;
450  t1 = t;
451  }
452  else if (specials && *t && strchr(specials, *t))
453  {
454  if (!s0)
455  s0 = t;
456  s1 = t;
457  }
458  }
459 
460  /* If we have something to encode, include RFC822 specials */
461  if (t0 && s0 && (s0 < t0))
462  t0 = s0;
463  if (t1 && s1 && (s1 > t1))
464  t1 = s1;
465 
466  if (!t0)
467  {
468  /* No encoding is required. */
469  *e = u;
470  *elen = ulen;
471  return rc;
472  }
473 
474  /* Choose target charset. */
475  tocode = fromcode;
476  if (icode)
477  {
478  tocode1 = mutt_ch_choose(icode, charsets, u, ulen, 0, 0);
479  if (tocode1)
480  tocode = tocode1;
481  else
482  {
483  rc = 2;
484  icode = 0;
485  }
486  }
487 
488  /* Hack to avoid labelling 8-bit data as us-ascii. */
489  if (!icode && mutt_ch_is_us_ascii(tocode))
490  tocode = "unknown-8bit";
491 
492  /* Adjust t0 for maximum length of line. */
493  t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
494  if (t < u)
495  t = u;
496  if (t < t0)
497  t0 = t;
498 
499  /* Adjust t0 until we can encode a character after a space. */
500  for (; t0 > u; t0--)
501  {
502  if (!HSPACE(*(t0 - 1)))
503  continue;
504  t = t0 + 1;
505  if (icode)
506  while ((t < (u + ulen)) && CONTINUATION_BYTE(*t))
507  t++;
508  if ((try_block(t0, t - t0, icode, tocode, &encoder, &wlen) == 0) &&
509  ((col + (t0 - u) + wlen) <= (ENCWORD_LEN_MAX + 1)))
510  {
511  break;
512  }
513  }
514 
515  /* Adjust t1 until we can encode a character before a space. */
516  for (; t1 < (u + ulen); t1++)
517  {
518  if (!HSPACE(*t1))
519  continue;
520  t = t1 - 1;
521  if (icode)
522  while (CONTINUATION_BYTE(*t))
523  t--;
524  if ((try_block(t, t1 - t, icode, tocode, &encoder, &wlen) == 0) &&
525  ((1 + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1)))
526  {
527  break;
528  }
529  }
530 
531  /* We shall encode the region [t0,t1). */
532 
533  /* Initialise the output buffer with the us-ascii prefix. */
534  buflen = 2 * ulen;
535  buf = mutt_mem_malloc(buflen);
536  bufpos = t0 - u;
537  memcpy(buf, u, t0 - u);
538 
539  col += t0 - u;
540 
541  t = t0;
542  while (true)
543  {
544  /* Find how much we can encode. */
545  size_t n = choose_block(t, t1 - t, col, icode, tocode, &encoder, &wlen);
546  if (n == (t1 - t))
547  {
548  /* See if we can fit the us-ascii suffix, too. */
549  if ((col + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1))
550  break;
551  n = t1 - t - 1;
552  if (icode)
553  while (CONTINUATION_BYTE(t[n]))
554  n--;
555  if (n == 0)
556  {
557  /* This should only happen in the really stupid case where the
558  * only word that needs encoding is one character long, but
559  * there is too much us-ascii stuff after it to use a single
560  * encoded word. We add the next word to the encoded region
561  * and try again. */
562  assert(t1 < (u + ulen));
563  for (t1++; (t1 < (u + ulen)) && !HSPACE(*t1); t1++)
564  ; // do nothing
565 
566  continue;
567  }
568  n = choose_block(t, n, col, icode, tocode, &encoder, &wlen);
569  }
570 
571  /* Add to output buffer. */
572  const char *line_break = "\n\t";
573  const int lb_len = 2; /* strlen(line_break) */
574 
575  if ((bufpos + wlen + lb_len) > buflen)
576  {
577  buflen = bufpos + wlen + lb_len;
578  mutt_mem_realloc(&buf, buflen);
579  }
580  r = encode_block(buf + bufpos, t, n, icode, tocode, encoder);
581  assert(r == wlen);
582  bufpos += wlen;
583  memcpy(buf + bufpos, line_break, lb_len);
584  bufpos += lb_len;
585 
586  col = 1;
587 
588  t += n;
589  }
590 
591  /* Add last encoded word and us-ascii suffix to buffer. */
592  buflen = bufpos + wlen + (u + ulen - t1);
593  mutt_mem_realloc(&buf, buflen + 1);
594  r = encode_block(buf + bufpos, t, t1 - t, icode, tocode, encoder);
595  assert(r == wlen);
596  bufpos += wlen;
597  memcpy(buf + bufpos, t1, u + ulen - t1);
598 
599  FREE(&tocode1);
600  FREE(&u);
601 
602  buf[buflen] = '\0';
603 
604  *e = buf;
605  *elen = buflen + 1;
606  return rc;
607 }
char * mutt_ch_choose(const char *fromcode, const char *charsets, const char *u, size_t ulen, char **d, size_t *dlen)
Figure the best charset to encode a string.
Definition: charset.c:1040
#define CONTINUATION_BYTE(ch)
Definition: rfc2047.c:51
int mutt_ch_convert_string(char **ps, const char *from, const char *to, uint8_t flags)
Convert a string between encodings.
Definition: charset.c:758
#define MUTT_ICONV_NO_FLAGS
No flags are set.
Definition: charset.h:71
size_t(* encoder_t)(char *str, const char *buf, size_t buflen, const char *tocode)
Prototype for an encoding function.
Definition: rfc2047.c:61
static size_t try_block(const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Attempt to convert a block of text.
Definition: rfc2047.c:183
char * mutt_strn_dup(const char *begin, size_t len)
Duplicate a sub-string.
Definition: string.c:548
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
#define HSPACE(ch)
Definition: rfc2047.c:49
void * mutt_mem_malloc(size_t size)
Allocate memory on the heap.
Definition: memory.c:90
static size_t encode_block(char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
Encode a block of text using an encoder.
Definition: rfc2047.c:266
#define mutt_ch_is_us_ascii(str)
Definition: charset.h:96
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:664
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:46
#define FREE(x)
Definition: memory.h:40
#define ENCWORD_LEN_MIN
Definition: rfc2047.c:47
static size_t choose_block(char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Calculate how much data can be converted.
Definition: rfc2047.c:304
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode()

void rfc2047_encode ( char **  pd,
const char *  specials,
int  col,
const char *  charsets 
)

RFC-2047-encode a string.

Parameters
[in,out]pdString to be encoded, and resulting encoded string
[in]specialsSpecial characters to be encoded
[in]colStarting index in string
[in]charsetsList of charsets to choose from

Definition at line 616 of file rfc2047.c.

617 {
618  const char *const c_charset = cs_subset_string(NeoMutt->sub, "charset");
619  if (!c_charset || !pd || !*pd)
620  return;
621 
622  if (!charsets)
623  charsets = "utf-8";
624 
625  char *e = NULL;
626  size_t elen = 0;
627  encode(*pd, strlen(*pd), col, c_charset, charsets, &e, &elen, specials);
628 
629  FREE(pd);
630  *pd = e;
631 }
static int encode(const char *d, size_t dlen, int col, const char *fromcode, const char *charsets, char **e, size_t *elen, const char *specials)
RFC2047-encode a string.
Definition: rfc2047.c:416
Container for Accounts, Notifications.
Definition: neomutt.h:36
const char * cs_subset_string(const struct ConfigSubset *sub, const char *name)
Get a string config item by name.
Definition: helpers.c:317
#define FREE(x)
Definition: memory.h:40
struct ConfigSubset * sub
Inherited config items.
Definition: neomutt.h:39
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode()

void rfc2047_decode ( char **  pd)

Decode any RFC2047-encoded header fields.

Parameters
[in,out]pdString to be decoded, and resulting decoded string

Try to decode anything that looks like a valid RFC2047 encoded header field, ignoring RFC822 parsing rules. If decoding fails, for example due to an invalid base64 string, the original input is left untouched.

Definition at line 641 of file rfc2047.c.

642 {
643  if (!pd || !*pd)
644  return;
645 
646  struct Buffer buf = mutt_buffer_make(0); /* Output buffer */
647  char *s = *pd; /* Read pointer */
648  char *beg = NULL; /* Begin of encoded word */
649  enum ContentEncoding enc; /* ENC_BASE64 or ENC_QUOTED_PRINTABLE */
650  char *charset = NULL; /* Which charset */
651  size_t charsetlen; /* Length of the charset */
652  char *text = NULL; /* Encoded text */
653  size_t textlen; /* Length of encoded text */
654 
655  /* Keep some state in case the next decoded word is using the same charset
656  * and it happens to be split in the middle of a multibyte character.
657  * See https://github.com/neomutt/neomutt/issues/1015 */
658  struct Buffer prev = mutt_buffer_make(0); /* Previously decoded word */
659  char *prev_charset = NULL; /* Previously used charset */
660  size_t prev_charsetlen = 0; /* Length of the previously used charset */
661 
662  while (*s)
663  {
664  beg = parse_encoded_word(s, &enc, &charset, &charsetlen, &text, &textlen);
665  if (beg != s)
666  {
667  /* Some non-encoded text was found */
668  size_t holelen = beg ? beg - s : mutt_str_len(s);
669 
670  /* Ignore whitespace between encoded words */
671  if (beg && (mutt_str_lws_len(s, holelen) == holelen))
672  {
673  s = beg;
674  continue;
675  }
676 
677  /* If we have some previously decoded text, add it now */
678  if (!mutt_buffer_is_empty(&prev))
679  {
680  finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
681  }
682 
683  /* Add non-encoded part */
684  {
685  const char *const c_assumed_charset =
686  cs_subset_string(NeoMutt->sub, "assumed_charset");
687  if (c_assumed_charset)
688  {
689  char *conv = mutt_strn_dup(s, holelen);
691  mutt_buffer_addstr(&buf, conv);
692  FREE(&conv);
693  }
694  else
695  {
696  mutt_buffer_addstr_n(&buf, s, holelen);
697  }
698  }
699  s += holelen;
700  }
701  if (beg)
702  {
703  /* Some encoded text was found */
704  text[textlen] = '\0';
705  char *decoded = decode_word(text, textlen, enc);
706  if (!decoded)
707  {
708  return;
709  }
710  if (prev.data && ((prev_charsetlen != charsetlen) ||
711  !mutt_strn_equal(prev_charset, charset, charsetlen)))
712  {
713  /* Different charset, convert the previous chunk and add it to the
714  * final result */
715  finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
716  }
717 
718  mutt_buffer_addstr(&prev, decoded);
719  FREE(&decoded);
720  prev_charset = charset;
721  prev_charsetlen = charsetlen;
722  s = text + textlen + 2; /* Skip final ?= */
723  }
724  }
725 
726  /* Save the last chunk */
727  if (prev.data)
728  {
729  finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
730  }
731 
732  mutt_buffer_addch(&buf, '\0');
733  FREE(pd);
734  *pd = buf.data;
735 }
static char * decode_word(const char *s, size_t len, enum ContentEncoding enc)
Decode an RFC2047-encoded string.
Definition: rfc2047.c:358
struct Buffer mutt_buffer_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:61
String manipulation buffer.
Definition: buffer.h:33
static char * parse_encoded_word(char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
Parse a string and report RFC2047 elements.
Definition: rfc2047.c:140
size_t mutt_str_lws_len(const char *s, size_t n)
Measure the linear-white-space at the beginning of a string.
Definition: string.c:806
Container for Accounts, Notifications.
Definition: neomutt.h:36
char * mutt_strn_dup(const char *begin, size_t len)
Duplicate a sub-string.
Definition: string.c:548
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:225
size_t mutt_buffer_addstr_n(struct Buffer *buf, const char *s, size_t len)
Add a string to a Buffer, expanding it if necessary.
Definition: buffer.c:99
char * data
Pointer to data.
Definition: buffer.h:35
int mutt_ch_convert_nonmime_string(char **ps)
Try to convert a string using a list of character sets.
Definition: charset.c:308
const char * cs_subset_string(const struct ConfigSubset *sub, const char *name)
Get a string config item by name.
Definition: helpers.c:317
size_t mutt_buffer_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
Definition: buffer.c:240
static void finalize_chunk(struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
Perform charset conversion and filtering.
Definition: rfc2047.c:334
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:664
bool mutt_strn_equal(const char *a, const char *b, size_t num)
Check for equality of two strings (to a maximum), safely.
Definition: string.c:593
#define FREE(x)
Definition: memory.h:40
struct ConfigSubset * sub
Inherited config items.
Definition: neomutt.h:39
bool mutt_buffer_is_empty(const struct Buffer *buf)
Is the Buffer empty?
Definition: buffer.c:252
ContentEncoding
Content-Transfer-Encoding.
Definition: mime.h:46
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_addrlist()

void rfc2047_encode_addrlist ( struct AddressList *  al,
const char *  tag 
)

Encode any RFC2047 headers, where required, in an Address list.

Parameters
alAddressList
tagHeader tag (used for wrapping calculation)

Definition at line 742 of file rfc2047.c.

743 {
744  if (!al)
745  return;
746 
747  int col = tag ? strlen(tag) + 2 : 32;
748  struct Address *a = NULL;
749  TAILQ_FOREACH(a, al, entries)
750  {
751  const char *const c_send_charset =
752  cs_subset_string(NeoMutt->sub, "send_charset");
753  if (a->personal)
754  rfc2047_encode(&a->personal, AddressSpecials, col, c_send_charset);
755  else if (a->group && a->mailbox)
756  rfc2047_encode(&a->mailbox, AddressSpecials, col, c_send_charset);
757  }
758 }
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:725
const char AddressSpecials[]
Characters with special meaning for email addresses.
Definition: address.c:42
An email address.
Definition: address.h:35
char * mailbox
Mailbox and host address.
Definition: address.h:38
Container for Accounts, Notifications.
Definition: neomutt.h:36
const char * cs_subset_string(const struct ConfigSubset *sub, const char *name)
Get a string config item by name.
Definition: helpers.c:317
void rfc2047_encode(char **pd, const char *specials, int col, const char *charsets)
RFC-2047-encode a string.
Definition: rfc2047.c:616
char * personal
Real name of address.
Definition: address.h:37
bool group
Group mailbox?
Definition: address.h:39
struct ConfigSubset * sub
Inherited config items.
Definition: neomutt.h:39
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_addrlist()

void rfc2047_decode_addrlist ( struct AddressList *  al)

Decode any RFC2047 headers in an Address list.

Parameters
alAddressList

Definition at line 764 of file rfc2047.c.

765 {
766  if (!al)
767  return;
768 
769  struct Address *a = NULL;
770  TAILQ_FOREACH(a, al, entries)
771  {
772  const char *const c_assumed_charset =
773  cs_subset_string(NeoMutt->sub, "assumed_charset");
774  if (a->personal && ((strstr(a->personal, "=?")) || c_assumed_charset))
775  {
777  }
778  else if (a->group && a->mailbox && strstr(a->mailbox, "=?"))
779  rfc2047_decode(&a->mailbox);
780  }
781 }
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:725
An email address.
Definition: address.h:35
char * mailbox
Mailbox and host address.
Definition: address.h:38
Container for Accounts, Notifications.
Definition: neomutt.h:36
void rfc2047_decode(char **pd)
Decode any RFC2047-encoded header fields.
Definition: rfc2047.c:641
const char * cs_subset_string(const struct ConfigSubset *sub, const char *name)
Get a string config item by name.
Definition: helpers.c:317
char * personal
Real name of address.
Definition: address.h:37
bool group
Group mailbox?
Definition: address.h:39
struct ConfigSubset * sub
Inherited config items.
Definition: neomutt.h:39
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_envelope()

void rfc2047_decode_envelope ( struct Envelope env)

Decode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 787 of file rfc2047.c.

788 {
789  if (!env)
790  return;
799  rfc2047_decode(&env->x_label);
800  rfc2047_decode(&env->subject);
801 }
void rfc2047_decode_addrlist(struct AddressList *al)
Decode any RFC2047 headers in an Address list.
Definition: rfc2047.c:764
struct AddressList mail_followup_to
Email&#39;s &#39;mail-followup-to&#39;.
Definition: envelope.h:63
struct AddressList reply_to
Email&#39;s &#39;reply-to&#39;.
Definition: envelope.h:62
struct AddressList bcc
Email&#39;s &#39;Bcc&#39; list.
Definition: envelope.h:60
void rfc2047_decode(char **pd)
Decode any RFC2047-encoded header fields.
Definition: rfc2047.c:641
struct AddressList from
Email&#39;s &#39;From&#39; list.
Definition: envelope.h:57
struct AddressList cc
Email&#39;s &#39;Cc&#39; list.
Definition: envelope.h:59
char * subject
Email&#39;s subject.
Definition: envelope.h:66
struct AddressList return_path
Return path for the Email.
Definition: envelope.h:56
struct AddressList to
Email&#39;s &#39;To&#39; list.
Definition: envelope.h:58
struct AddressList sender
Email&#39;s sender.
Definition: envelope.h:61
char * x_label
X-Label.
Definition: envelope.h:72
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_envelope()

void rfc2047_encode_envelope ( struct Envelope env)

Encode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 807 of file rfc2047.c.

808 {
809  if (!env)
810  return;
811  rfc2047_encode_addrlist(&env->from, "From");
812  rfc2047_encode_addrlist(&env->to, "To");
813  rfc2047_encode_addrlist(&env->cc, "Cc");
814  rfc2047_encode_addrlist(&env->bcc, "Bcc");
815  rfc2047_encode_addrlist(&env->reply_to, "Reply-To");
816  rfc2047_encode_addrlist(&env->mail_followup_to, "Mail-Followup-To");
817  rfc2047_encode_addrlist(&env->sender, "Sender");
818  const char *const c_send_charset =
819  cs_subset_string(NeoMutt->sub, "send_charset");
820  rfc2047_encode(&env->x_label, NULL, sizeof("X-Label:"), c_send_charset);
821  rfc2047_encode(&env->subject, NULL, sizeof("Subject:"), c_send_charset);
822 }
struct AddressList mail_followup_to
Email&#39;s &#39;mail-followup-to&#39;.
Definition: envelope.h:63
struct AddressList reply_to
Email&#39;s &#39;reply-to&#39;.
Definition: envelope.h:62
struct AddressList bcc
Email&#39;s &#39;Bcc&#39; list.
Definition: envelope.h:60
void rfc2047_encode_addrlist(struct AddressList *al, const char *tag)
Encode any RFC2047 headers, where required, in an Address list.
Definition: rfc2047.c:742
Container for Accounts, Notifications.
Definition: neomutt.h:36
struct AddressList from
Email&#39;s &#39;From&#39; list.
Definition: envelope.h:57
struct AddressList cc
Email&#39;s &#39;Cc&#39; list.
Definition: envelope.h:59
const char * cs_subset_string(const struct ConfigSubset *sub, const char *name)
Get a string config item by name.
Definition: helpers.c:317
void rfc2047_encode(char **pd, const char *specials, int col, const char *charsets)
RFC-2047-encode a string.
Definition: rfc2047.c:616
char * subject
Email&#39;s subject.
Definition: envelope.h:66
struct AddressList to
Email&#39;s &#39;To&#39; list.
Definition: envelope.h:58
struct AddressList sender
Email&#39;s sender.
Definition: envelope.h:61
struct ConfigSubset * sub
Inherited config items.
Definition: neomutt.h:39
char * x_label
X-Label.
Definition: envelope.h:72
+ Here is the call graph for this function:
+ Here is the caller graph for this function: