NeoMutt  2020-06-26-250-g349c94
Teaching an old dog new tricks
DOXYGEN
rfc2047.c File Reference

RFC2047 MIME extensions encoding / decoding routines. More...

#include "config.h"
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <iconv.h>
#include <stdbool.h>
#include <string.h>
#include "mutt/lib.h"
#include "address/lib.h"
#include "rfc2047.h"
#include "envelope.h"
#include "globals.h"
#include "mime.h"
#include "mutt_globals.h"
+ Include dependency graph for rfc2047.c:

Go to the source code of this file.

Macros

#define ENCWORD_LEN_MAX   75
 
#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */
 
#define HSPACE(ch)   (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))
 
#define CONTINUATION_BYTE(ch)   (((ch) &0xc0) == 0x80)
 

Typedefs

typedef size_t(* encoder_t) (char *str, const char *buf, size_t buflen, const char *tocode)
 Prototype for an encoding function. More...
 

Functions

static size_t b_encoder (char *str, const char *buf, size_t buflen, const char *tocode)
 Base64 Encode a string - Implements encoder_t. More...
 
static size_t q_encoder (char *str, const char *buf, size_t buflen, const char *tocode)
 Quoted-printable Encode a string - Implements encoder_t. More...
 
static char * parse_encoded_word (char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
 Parse a string and report RFC2047 elements. More...
 
static size_t try_block (const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Attempt to convert a block of text. More...
 
static size_t encode_block (char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
 Encode a block of text using an encoder. More...
 
static size_t choose_block (char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Calculate how much data can be converted. More...
 
static void finalize_chunk (struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
 Perform charset conversion and filtering. More...
 
static char * decode_word (const char *s, size_t len, enum ContentEncoding enc)
 Decode an RFC2047-encoded string. More...
 
static int encode (const char *d, size_t dlen, int col, const char *fromcode, const char *charsets, char **e, size_t *elen, const char *specials)
 RFC2047-encode a string. More...
 
void rfc2047_encode (char **pd, const char *specials, int col, const char *charsets)
 RFC-2047-encode a string. More...
 
void rfc2047_decode (char **pd)
 Decode any RFC2047-encoded header fields. More...
 
void rfc2047_encode_addrlist (struct AddressList *al, const char *tag)
 Encode any RFC2047 headers, where required, in an Address list. More...
 
void rfc2047_decode_addrlist (struct AddressList *al)
 Decode any RFC2047 headers in an Address list. More...
 
void rfc2047_decode_envelope (struct Envelope *env)
 Decode the fields of an Envelope. More...
 
void rfc2047_encode_envelope (struct Envelope *env)
 Encode the fields of an Envelope. More...
 

Detailed Description

RFC2047 MIME extensions encoding / decoding routines.

Authors
  • Michael R. Elkins
  • Edmund Grimley Evans
  • Pietro Cerutti

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file rfc2047.c.

Macro Definition Documentation

◆ ENCWORD_LEN_MAX

#define ENCWORD_LEN_MAX   75

Definition at line 46 of file rfc2047.c.

◆ ENCWORD_LEN_MIN

#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */

Definition at line 47 of file rfc2047.c.

◆ HSPACE

#define HSPACE (   ch)    (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))

Definition at line 49 of file rfc2047.c.

◆ CONTINUATION_BYTE

#define CONTINUATION_BYTE (   ch)    (((ch) &0xc0) == 0x80)

Definition at line 51 of file rfc2047.c.

Typedef Documentation

◆ encoder_t

typedef size_t(* encoder_t) (char *str, const char *buf, size_t buflen, const char *tocode)

Prototype for an encoding function.

Parameters
strString to encode
bufBuffer for result
buflenLength of buffer
tocodeCharacter encoding
Return values
numBytes written to buffer

Definition at line 61 of file rfc2047.c.

Function Documentation

◆ b_encoder()

static size_t b_encoder ( char *  str,
const char *  buf,
size_t  buflen,
const char *  tocode 
)
static

Base64 Encode a string - Implements encoder_t.

Definition at line 66 of file rfc2047.c.

67 {
68  char *s0 = str;
69 
70  memcpy(str, "=?", 2);
71  str += 2;
72  memcpy(str, tocode, strlen(tocode));
73  str += strlen(tocode);
74  memcpy(str, "?B?", 3);
75  str += 3;
76 
77  while (buflen)
78  {
79  char encoded[11];
80  size_t ret;
81  size_t in_len = MIN(3, buflen);
82 
83  ret = mutt_b64_encode(buf, in_len, encoded, sizeof(encoded));
84  for (size_t i = 0; i < ret; i++)
85  *str++ = encoded[i];
86 
87  buflen -= in_len;
88  buf += in_len;
89  }
90 
91  memcpy(str, "?=", 2);
92  str += 2;
93  return str - s0;
94 }
#define MIN(a, b)
Definition: memory.h:31
size_t mutt_b64_encode(const char *in, size_t inlen, char *out, size_t outlen)
Convert raw bytes to null-terminated base64 string.
Definition: base64.c:88
static const char encoded[]
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ q_encoder()

static size_t q_encoder ( char *  str,
const char *  buf,
size_t  buflen,
const char *  tocode 
)
static

Quoted-printable Encode a string - Implements encoder_t.

Definition at line 99 of file rfc2047.c.

100 {
101  static const char hex[] = "0123456789ABCDEF";
102  char *s0 = str;
103 
104  memcpy(str, "=?", 2);
105  str += 2;
106  memcpy(str, tocode, strlen(tocode));
107  str += strlen(tocode);
108  memcpy(str, "?Q?", 3);
109  str += 3;
110  while (buflen--)
111  {
112  unsigned char c = *buf++;
113  if (c == ' ')
114  *str++ = '_';
115  else if ((c >= 0x7f) || (c < 0x20) || (c == '_') || strchr(MimeSpecials, c))
116  {
117  *str++ = '=';
118  *str++ = hex[(c & 0xf0) >> 4];
119  *str++ = hex[c & 0x0f];
120  }
121  else
122  *str++ = c;
123  }
124  memcpy(str, "?=", 2);
125  str += 2;
126  return str - s0;
127 }
const char MimeSpecials[]
Characters that need special treatment in MIME.
Definition: mime.c:67
+ Here is the caller graph for this function:

◆ parse_encoded_word()

static char* parse_encoded_word ( char *  str,
enum ContentEncoding enc,
char **  charset,
size_t *  charsetlen,
char **  text,
size_t *  textlen 
)
static

Parse a string and report RFC2047 elements.

Parameters
[in]strString to parse
[out]encContent encoding found in the first RFC2047 word
[out]charsetCharset found in the first RFC2047 word
[out]charsetlenLength of the charset string found
[out]textStart of the first RFC2047 encoded text
[out]textlenLength of the encoded text found
Return values
ptrStart of the RFC2047 encoded word
NULLNone was found

Definition at line 140 of file rfc2047.c.

142 {
143  regmatch_t *match = mutt_prex_capture(PREX_RFC2047_ENCODED_WORD, str);
144  if (!match)
145  return NULL;
146 
147  const regmatch_t *mfull = &match[PREX_RFC2047_ENCODED_WORD_MATCH_FULL];
148  const regmatch_t *mcharset = &match[PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET];
149  const regmatch_t *mencoding = &match[PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING];
150  const regmatch_t *mtext = &match[PREX_RFC2047_ENCODED_WORD_MATCH_TEXT];
151 
152  /* Charset */
153  *charset = str + mutt_regmatch_start(mcharset);
154  *charsetlen = mutt_regmatch_len(mcharset);
155 
156  /* Encoding: either Q or B */
157  *enc = (tolower(str[mutt_regmatch_start(mencoding)]) == 'q') ? ENC_QUOTED_PRINTABLE : ENC_BASE64;
158 
159  *text = str + mutt_regmatch_start(mtext);
160  *textlen = mutt_regmatch_len(mtext);
161  return str + mutt_regmatch_start(mfull);
162 }
=?[utf-8]?Q?=E8=81...?=
Definition: prex.h:95
[=?utf-8?Q?=E8=81...?=]
Definition: prex.h:94
=?utf-8?[Q]?=E8=81...?=
Definition: prex.h:96
static size_t mutt_regmatch_len(const regmatch_t *match)
Return the length of a match.
Definition: regex3.h:80
=?utf-8?Q?[=E8=81...]?=
Definition: prex.h:97
Base-64 encoded text.
Definition: mime.h:52
regmatch_t * mutt_prex_capture(enum Prex which, const char *str)
match a precompiled regex against a string
Definition: prex.c:306
static regoff_t mutt_regmatch_start(const regmatch_t *match)
Return the start of a match.
Definition: regex3.h:60
[=?utf-8?Q?=E8=81=AA=E6=98=8E=E7=9A=84?=]
Definition: prex.h:35
Quoted-printable text.
Definition: mime.h:51
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ try_block()

static size_t try_block ( const char *  d,
size_t  dlen,
const char *  fromcode,
const char *  tocode,
encoder_t encoder,
size_t *  wlen 
)
static

Attempt to convert a block of text.

Parameters
dString to convert
dlenLength of string
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
0Success, string converted
>0Error, number of bytes that could be converted

If the data could be converted using encoder, then set *encoder and *wlen. Otherwise return an upper bound on the maximum length of the data which could be converted.

The data is converted from fromcode (which must be stateless) to tocode, unless fromcode is NULL, in which case the data is assumed to be already in tocode, which should be 8-bit and stateless.

Definition at line 183 of file rfc2047.c.

185 {
186  char buf[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
187  const char *ib = NULL;
188  char *ob = NULL;
189  size_t ibl, obl;
190  int count, len, len_b, len_q;
191 
192  if (fromcode)
193  {
194  iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, 0);
195  assert(cd != (iconv_t)(-1));
196  ib = d;
197  ibl = dlen;
198  ob = buf;
199  obl = sizeof(buf) - strlen(tocode);
200  if ((iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl) == (size_t)(-1)) ||
201  (iconv(cd, NULL, NULL, &ob, &obl) == (size_t)(-1)))
202  {
203  assert(errno == E2BIG);
204  iconv_close(cd);
205  assert(ib > d);
206  return ((ib - d) == dlen) ? dlen : ib - d + 1;
207  }
208  iconv_close(cd);
209  }
210  else
211  {
212  if (dlen > (sizeof(buf) - strlen(tocode)))
213  return sizeof(buf) - strlen(tocode) + 1;
214  memcpy(buf, d, dlen);
215  ob = buf + dlen;
216  }
217 
218  count = 0;
219  for (char *p = buf; p < ob; p++)
220  {
221  unsigned char c = *p;
222  assert(strchr(MimeSpecials, '?'));
223  if ((c >= 0x7f) || (c < 0x20) || (*p == '_') ||
224  ((c != ' ') && strchr(MimeSpecials, *p)))
225  {
226  count++;
227  }
228  }
229 
230  len = ENCWORD_LEN_MIN - 2 + strlen(tocode);
231  len_b = len + (((ob - buf) + 2) / 3) * 4;
232  len_q = len + (ob - buf) + 2 * count;
233 
234  /* Apparently RFC1468 says to use B encoding for iso-2022-jp. */
235  if (mutt_istr_equal(tocode, "ISO-2022-JP"))
236  len_q = ENCWORD_LEN_MAX + 1;
237 
238  if ((len_b < len_q) && (len_b <= ENCWORD_LEN_MAX))
239  {
240  *encoder = b_encoder;
241  *wlen = len_b;
242  return 0;
243  }
244  else if (len_q <= ENCWORD_LEN_MAX)
245  {
246  *encoder = q_encoder;
247  *wlen = len_q;
248  return 0;
249  }
250  else
251  return dlen;
252 }
static size_t q_encoder(char *str, const char *buf, size_t buflen, const char *tocode)
Quoted-printable Encode a string - Implements encoder_t.
Definition: rfc2047.c:99
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:888
const char MimeSpecials[]
Characters that need special treatment in MIME.
Definition: mime.c:67
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, int flags)
Set up iconv for conversions.
Definition: charset.c:565
static size_t b_encoder(char *str, const char *buf, size_t buflen, const char *tocode)
Base64 Encode a string - Implements encoder_t.
Definition: rfc2047.c:66
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:46
#define ENCWORD_LEN_MIN
Definition: rfc2047.c:47
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode_block()

static size_t encode_block ( char *  str,
char *  buf,
size_t  buflen,
const char *  fromcode,
const char *  tocode,
encoder_t  encoder 
)
static

Encode a block of text using an encoder.

Parameters
strString to convert
bufBuffer for result
buflenBuffer length
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
Return values
numLength of the encoded word

Encode the data (buf, buflen) into str using the encoder.

Definition at line 266 of file rfc2047.c.

268 {
269  if (!fromcode)
270  {
271  return (*encoder)(str, buf, buflen, tocode);
272  }
273 
274  const iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, 0);
275  assert(cd != (iconv_t)(-1));
276  const char *ib = buf;
277  size_t ibl = buflen;
278  char tmp[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
279  char *ob = tmp;
280  size_t obl = sizeof(tmp) - strlen(tocode);
281  const size_t n1 = iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl);
282  const size_t n2 = iconv(cd, NULL, NULL, &ob, &obl);
283  assert(n1 != (size_t)(-1) && n2 != (size_t)(-1));
284  iconv_close(cd);
285  return (*encoder)(str, tmp, ob - tmp, tocode);
286 }
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, int flags)
Set up iconv for conversions.
Definition: charset.c:565
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:46
#define ENCWORD_LEN_MIN
Definition: rfc2047.c:47
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ choose_block()

static size_t choose_block ( char *  d,
size_t  dlen,
int  col,
const char *  fromcode,
const char *  tocode,
encoder_t encoder,
size_t *  wlen 
)
static

Calculate how much data can be converted.

Parameters
dString to convert
dlenLength of string
colStarting column to convert
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
numBytes that can be converted

Discover how much of the data (d, dlen) can be converted into a single encoded word. Return how much data can be converted, and set the length *wlen of the encoded word and *encoder. We start in column col, which limits the length of the word.

Definition at line 304 of file rfc2047.c.

306 {
307  const bool utf8 = fromcode && mutt_istr_equal(fromcode, "utf-8");
308 
309  size_t n = dlen;
310  while (true)
311  {
312  assert(n > 0);
313  const size_t nn = try_block(d, n, fromcode, tocode, encoder, wlen);
314  if ((nn == 0) && (((col + *wlen) <= (ENCWORD_LEN_MAX + 1)) || (n <= 1)))
315  break;
316  n = ((nn != 0) ? nn : n) - 1;
317  assert(n > 0);
318  if (utf8)
319  while ((n > 1) && CONTINUATION_BYTE(d[n]))
320  n--;
321  }
322  return n;
323 }
#define CONTINUATION_BYTE(ch)
Definition: rfc2047.c:51
static size_t try_block(const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Attempt to convert a block of text.
Definition: rfc2047.c:183
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:888
int n
Definition: acutest.h:492
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:46
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ finalize_chunk()

static void finalize_chunk ( struct Buffer res,
struct Buffer buf,
char *  charset,
size_t  charsetlen 
)
static

Perform charset conversion and filtering.

Parameters
[out]resBuffer where the resulting string is appended
[in]bufBuffer with the input string
[in]charsetCharset to use for the conversion
[in]charsetlenLength of the charset parameter

The buffer buf is reinitialized at the end of this function.

Definition at line 334 of file rfc2047.c.

335 {
336  if (!charset)
337  return;
338  char end = charset[charsetlen];
339  charset[charsetlen] = '\0';
341  charset[charsetlen] = end;
343  mutt_buffer_addstr(res, buf->data);
344  FREE(&buf->data);
345  mutt_buffer_init(buf);
346 }
int mutt_ch_convert_string(char **ps, const char *from, const char *to, int flags)
Convert a string between encodings.
Definition: charset.c:754
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:225
char * data
Pointer to data.
Definition: buffer.h:35
#define FREE(x)
Definition: memory.h:40
char * C_Charset
Config: Default character set for displaying text on screen.
Definition: charset.c:53
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
Definition: mbyte.c:424
struct Buffer * mutt_buffer_init(struct Buffer *buf)
Initialise a new Buffer.
Definition: buffer.c:46
#define MUTT_ICONV_HOOK_FROM
apply charset-hooks to fromcode
Definition: charset.h:72
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ decode_word()

static char* decode_word ( const char *  s,
size_t  len,
enum ContentEncoding  enc 
)
static

Decode an RFC2047-encoded string.

Parameters
sString to decode
lenLength of the string
encEncoding type
Return values
ptrDecoded string
Note
The caller must free the returned string

Definition at line 357 of file rfc2047.c.

358 {
359  const char *it = s;
360  const char *end = s + len;
361 
362  if (enc == ENC_QUOTED_PRINTABLE)
363  {
364  struct Buffer buf = mutt_buffer_make(0);
365  for (; it < end; it++)
366  {
367  if (*it == '_')
368  {
369  mutt_buffer_addch(&buf, ' ');
370  }
371  else if ((it[0] == '=') && (!(it[1] & ~127) && (hexval(it[1]) != -1)) &&
372  (!(it[2] & ~127) && (hexval(it[2]) != -1)))
373  {
374  mutt_buffer_addch(&buf, (hexval(it[1]) << 4) | hexval(it[2]));
375  it += 2;
376  }
377  else
378  {
379  mutt_buffer_addch(&buf, *it);
380  }
381  }
382  mutt_buffer_addch(&buf, '\0');
383  return buf.data;
384  }
385  else if (enc == ENC_BASE64)
386  {
387  const int olen = 3 * len / 4 + 1;
388  char *out = mutt_mem_malloc(olen);
389  int dlen = mutt_b64_decode(it, out, olen);
390  if (dlen == -1)
391  {
392  FREE(&out);
393  return NULL;
394  }
395  out[dlen] = '\0';
396  return out;
397  }
398 
399  assert(0); /* The enc parameter has an invalid value */
400  return NULL;
401 }
struct Buffer mutt_buffer_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:61
String manipulation buffer.
Definition: buffer.h:33
Base-64 encoded text.
Definition: mime.h:52
void * mutt_mem_malloc(size_t size)
Allocate memory on the heap.
Definition: memory.c:90
char * data
Pointer to data.
Definition: buffer.h:35
#define hexval(ch)
Definition: mime.h:80
size_t mutt_buffer_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
Definition: buffer.c:240
Quoted-printable text.
Definition: mime.h:51
#define FREE(x)
Definition: memory.h:40
int mutt_b64_decode(const char *in, char *out, size_t olen)
Convert null-terminated base64 string to raw bytes.
Definition: base64.c:136
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode()

static int encode ( const char *  d,
size_t  dlen,
int  col,
const char *  fromcode,
const char *  charsets,
char **  e,
size_t *  elen,
const char *  specials 
)
static

RFC2047-encode a string.

Parameters
[in]dString to convert
[in]dlenLength of string
[in]colStarting column to convert
[in]fromcodeOriginal encoding
[in]charsetsList of allowable encodings (colon separated)
[out]eEncoded string
[out]elenLength of encoded string
[in]specialsSpecial characters to be encoded
Return values
0Success

Definition at line 415 of file rfc2047.c.

417 {
418  int rc = 0;
419  char *buf = NULL;
420  size_t bufpos, buflen;
421  char *t0 = NULL, *t1 = NULL, *t = NULL;
422  char *s0 = NULL, *s1 = NULL;
423  size_t ulen, r, wlen = 0;
424  encoder_t encoder = NULL;
425  char *tocode1 = NULL;
426  const char *tocode = NULL;
427  const char *icode = "utf-8";
428 
429  /* Try to convert to UTF-8. */
430  char *u = mutt_strn_dup(d, dlen);
431  if (mutt_ch_convert_string(&u, fromcode, icode, 0) != 0)
432  {
433  rc = 1;
434  icode = 0;
435  }
436  ulen = mutt_str_len(u);
437 
438  /* Find earliest and latest things we must encode. */
439  s0 = 0;
440  s1 = 0;
441  t0 = 0;
442  t1 = 0;
443  for (t = u; t < (u + ulen); t++)
444  {
445  if ((*t & 0x80) || ((*t == '=') && (t[1] == '?') && ((t == u) || HSPACE(*(t - 1)))))
446  {
447  if (!t0)
448  t0 = t;
449  t1 = t;
450  }
451  else if (specials && *t && strchr(specials, *t))
452  {
453  if (!s0)
454  s0 = t;
455  s1 = t;
456  }
457  }
458 
459  /* If we have something to encode, include RFC822 specials */
460  if (t0 && s0 && (s0 < t0))
461  t0 = s0;
462  if (t1 && s1 && (s1 > t1))
463  t1 = s1;
464 
465  if (!t0)
466  {
467  /* No encoding is required. */
468  *e = u;
469  *elen = ulen;
470  return rc;
471  }
472 
473  /* Choose target charset. */
474  tocode = fromcode;
475  if (icode)
476  {
477  tocode1 = mutt_ch_choose(icode, charsets, u, ulen, 0, 0);
478  if (tocode1)
479  tocode = tocode1;
480  else
481  {
482  rc = 2;
483  icode = 0;
484  }
485  }
486 
487  /* Hack to avoid labelling 8-bit data as us-ascii. */
488  if (!icode && mutt_ch_is_us_ascii(tocode))
489  tocode = "unknown-8bit";
490 
491  /* Adjust t0 for maximum length of line. */
492  t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
493  if (t < u)
494  t = u;
495  if (t < t0)
496  t0 = t;
497 
498  /* Adjust t0 until we can encode a character after a space. */
499  for (; t0 > u; t0--)
500  {
501  if (!HSPACE(*(t0 - 1)))
502  continue;
503  t = t0 + 1;
504  if (icode)
505  while ((t < (u + ulen)) && CONTINUATION_BYTE(*t))
506  t++;
507  if ((try_block(t0, t - t0, icode, tocode, &encoder, &wlen) == 0) &&
508  ((col + (t0 - u) + wlen) <= (ENCWORD_LEN_MAX + 1)))
509  {
510  break;
511  }
512  }
513 
514  /* Adjust t1 until we can encode a character before a space. */
515  for (; t1 < (u + ulen); t1++)
516  {
517  if (!HSPACE(*t1))
518  continue;
519  t = t1 - 1;
520  if (icode)
521  while (CONTINUATION_BYTE(*t))
522  t--;
523  if ((try_block(t, t1 - t, icode, tocode, &encoder, &wlen) == 0) &&
524  ((1 + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1)))
525  {
526  break;
527  }
528  }
529 
530  /* We shall encode the region [t0,t1). */
531 
532  /* Initialise the output buffer with the us-ascii prefix. */
533  buflen = 2 * ulen;
534  buf = mutt_mem_malloc(buflen);
535  bufpos = t0 - u;
536  memcpy(buf, u, t0 - u);
537 
538  col += t0 - u;
539 
540  t = t0;
541  while (true)
542  {
543  /* Find how much we can encode. */
544  size_t n = choose_block(t, t1 - t, col, icode, tocode, &encoder, &wlen);
545  if (n == (t1 - t))
546  {
547  /* See if we can fit the us-ascii suffix, too. */
548  if ((col + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1))
549  break;
550  n = t1 - t - 1;
551  if (icode)
552  while (CONTINUATION_BYTE(t[n]))
553  n--;
554  if (n == 0)
555  {
556  /* This should only happen in the really stupid case where the
557  * only word that needs encoding is one character long, but
558  * there is too much us-ascii stuff after it to use a single
559  * encoded word. We add the next word to the encoded region
560  * and try again. */
561  assert(t1 < (u + ulen));
562  for (t1++; (t1 < (u + ulen)) && !HSPACE(*t1); t1++)
563  ; // do nothing
564 
565  continue;
566  }
567  n = choose_block(t, n, col, icode, tocode, &encoder, &wlen);
568  }
569 
570  /* Add to output buffer. */
571  const char *line_break = "\n\t";
572  const int lb_len = 2; /* strlen(line_break) */
573 
574  if ((bufpos + wlen + lb_len) > buflen)
575  {
576  buflen = bufpos + wlen + lb_len;
577  mutt_mem_realloc(&buf, buflen);
578  }
579  r = encode_block(buf + bufpos, t, n, icode, tocode, encoder);
580  assert(r == wlen);
581  bufpos += wlen;
582  memcpy(buf + bufpos, line_break, lb_len);
583  bufpos += lb_len;
584 
585  col = 1;
586 
587  t += n;
588  }
589 
590  /* Add last encoded word and us-ascii suffix to buffer. */
591  buflen = bufpos + wlen + (u + ulen - t1);
592  mutt_mem_realloc(&buf, buflen + 1);
593  r = encode_block(buf + bufpos, t, t1 - t, icode, tocode, encoder);
594  assert(r == wlen);
595  bufpos += wlen;
596  memcpy(buf + bufpos, t1, u + ulen - t1);
597 
598  FREE(&tocode1);
599  FREE(&u);
600 
601  buf[buflen] = '\0';
602 
603  *e = buf;
604  *elen = buflen + 1;
605  return rc;
606 }
int mutt_ch_convert_string(char **ps, const char *from, const char *to, int flags)
Convert a string between encodings.
Definition: charset.c:754
char * mutt_ch_choose(const char *fromcode, const char *charsets, const char *u, size_t ulen, char **d, size_t *dlen)
Figure the best charset to encode a string.
Definition: charset.c:1036
#define CONTINUATION_BYTE(ch)
Definition: rfc2047.c:51
size_t(* encoder_t)(char *str, const char *buf, size_t buflen, const char *tocode)
Prototype for an encoding function.
Definition: rfc2047.c:61
static size_t try_block(const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Attempt to convert a block of text.
Definition: rfc2047.c:183
char * mutt_strn_dup(const char *begin, size_t len)
Duplicate a sub-string.
Definition: string.c:553
#define mutt_ch_is_us_ascii(str)
Definition: charset.h:96
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
#define HSPACE(ch)
Definition: rfc2047.c:49
void * mutt_mem_malloc(size_t size)
Allocate memory on the heap.
Definition: memory.c:90
static size_t encode_block(char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
Encode a block of text using an encoder.
Definition: rfc2047.c:266
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:636
int n
Definition: acutest.h:492
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:46
#define FREE(x)
Definition: memory.h:40
#define ENCWORD_LEN_MIN
Definition: rfc2047.c:47
static size_t choose_block(char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Calculate how much data can be converted.
Definition: rfc2047.c:304
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode()

void rfc2047_encode ( char **  pd,
const char *  specials,
int  col,
const char *  charsets 
)

RFC-2047-encode a string.

Parameters
[in,out]pdString to be encoded, and resulting encoded string
[in]specialsSpecial characters to be encoded
[in]colStarting index in string
[in]charsetsList of charsets to choose from

Definition at line 615 of file rfc2047.c.

616 {
617  if (!C_Charset || !pd || !*pd)
618  return;
619 
620  if (!charsets)
621  charsets = "utf-8";
622 
623  char *e = NULL;
624  size_t elen = 0;
625  encode(*pd, strlen(*pd), col, C_Charset, charsets, &e, &elen, specials);
626 
627  FREE(pd);
628  *pd = e;
629 }
static int encode(const char *d, size_t dlen, int col, const char *fromcode, const char *charsets, char **e, size_t *elen, const char *specials)
RFC2047-encode a string.
Definition: rfc2047.c:415
#define FREE(x)
Definition: memory.h:40
char * C_Charset
Config: Default character set for displaying text on screen.
Definition: charset.c:53
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode()

void rfc2047_decode ( char **  pd)

Decode any RFC2047-encoded header fields.

Parameters
[in,out]pdString to be decoded, and resulting decoded string

Try to decode anything that looks like a valid RFC2047 encoded header field, ignoring RFC822 parsing rules. If decoding fails, for example due to an invalid base64 string, the original input is left untouched.

Definition at line 639 of file rfc2047.c.

640 {
641  if (!pd || !*pd)
642  return;
643 
644  struct Buffer buf = mutt_buffer_make(0); /* Output buffer */
645  char *s = *pd; /* Read pointer */
646  char *beg = NULL; /* Begin of encoded word */
647  enum ContentEncoding enc; /* ENC_BASE64 or ENC_QUOTED_PRINTABLE */
648  char *charset = NULL; /* Which charset */
649  size_t charsetlen; /* Length of the charset */
650  char *text = NULL; /* Encoded text */
651  size_t textlen; /* Length of encoded text */
652 
653  /* Keep some state in case the next decoded word is using the same charset
654  * and it happens to be split in the middle of a multibyte character.
655  * See https://github.com/neomutt/neomutt/issues/1015 */
656  struct Buffer prev = mutt_buffer_make(0); /* Previously decoded word */
657  char *prev_charset = NULL; /* Previously used charset */
658  size_t prev_charsetlen = 0; /* Length of the previously used charset */
659 
660  while (*s)
661  {
662  beg = parse_encoded_word(s, &enc, &charset, &charsetlen, &text, &textlen);
663  if (beg != s)
664  {
665  /* Some non-encoded text was found */
666  size_t holelen = beg ? beg - s : mutt_str_len(s);
667 
668  /* Ignore whitespace between encoded words */
669  if (beg && (mutt_str_lws_len(s, holelen) == holelen))
670  {
671  s = beg;
672  continue;
673  }
674 
675  /* If we have some previously decoded text, add it now */
676  if (!mutt_buffer_is_empty(&prev))
677  {
678  finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
679  }
680 
681  /* Add non-encoded part */
682  {
683  if (C_AssumedCharset)
684  {
685  char *conv = mutt_strn_dup(s, holelen);
687  mutt_buffer_addstr(&buf, conv);
688  FREE(&conv);
689  }
690  else
691  {
692  mutt_buffer_addstr_n(&buf, s, holelen);
693  }
694  }
695  s += holelen;
696  }
697  if (beg)
698  {
699  /* Some encoded text was found */
700  text[textlen] = '\0';
701  char *decoded = decode_word(text, textlen, enc);
702  if (!decoded)
703  {
704  return;
705  }
706  if (prev.data && ((prev_charsetlen != charsetlen) ||
707  !mutt_strn_equal(prev_charset, charset, charsetlen)))
708  {
709  /* Different charset, convert the previous chunk and add it to the
710  * final result */
711  finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
712  }
713 
714  mutt_buffer_addstr(&prev, decoded);
715  FREE(&decoded);
716  prev_charset = charset;
717  prev_charsetlen = charsetlen;
718  s = text + textlen + 2; /* Skip final ?= */
719  }
720  }
721 
722  /* Save the last chunk */
723  if (prev.data)
724  {
725  finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
726  }
727 
728  mutt_buffer_addch(&buf, '\0');
729  FREE(pd);
730  *pd = buf.data;
731 }
char * C_AssumedCharset
Config: If a message is missing a character set, assume this character set.
Definition: charset.c:52
static char * decode_word(const char *s, size_t len, enum ContentEncoding enc)
Decode an RFC2047-encoded string.
Definition: rfc2047.c:357
struct Buffer mutt_buffer_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:61
String manipulation buffer.
Definition: buffer.h:33
static char * parse_encoded_word(char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
Parse a string and report RFC2047 elements.
Definition: rfc2047.c:140
size_t mutt_str_lws_len(const char *s, size_t n)
Measure the linear-white-space at the beginning of a string.
Definition: string.c:778
char * mutt_strn_dup(const char *begin, size_t len)
Duplicate a sub-string.
Definition: string.c:553
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:225
size_t mutt_buffer_addstr_n(struct Buffer *buf, const char *s, size_t len)
Add a string to a Buffer, expanding it if necessary.
Definition: buffer.c:99
char * data
Pointer to data.
Definition: buffer.h:35
size_t mutt_buffer_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
Definition: buffer.c:240
static void finalize_chunk(struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
Perform charset conversion and filtering.
Definition: rfc2047.c:334
bool mutt_strn_equal(const char *a, const char *b, size_t l)
Check for equality of two strings (to a maximum), safely.
Definition: string.c:598
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:636
int mutt_ch_convert_nonmime_string(char **ps)
Try to convert a string using a list of character sets.
Definition: charset.c:309
#define FREE(x)
Definition: memory.h:40
bool mutt_buffer_is_empty(const struct Buffer *buf)
Is the Buffer empty?
Definition: buffer.c:252
ContentEncoding
Content-Transfer-Encoding.
Definition: mime.h:46
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_addrlist()

void rfc2047_encode_addrlist ( struct AddressList *  al,
const char *  tag 
)

Encode any RFC2047 headers, where required, in an Address list.

Parameters
alAddressList
tagHeader tag (used for wrapping calculation)

Definition at line 738 of file rfc2047.c.

739 {
740  if (!al)
741  return;
742 
743  int col = tag ? strlen(tag) + 2 : 32;
744  struct Address *a = NULL;
745  TAILQ_FOREACH(a, al, entries)
746  {
747  if (a->personal)
749  else if (a->group && a->mailbox)
751  }
752 }
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:718
const char AddressSpecials[]
Characters with special meaning for email addresses.
Definition: address.c:42
An email address.
Definition: address.h:34
char * mailbox
Mailbox and host address.
Definition: address.h:37
void rfc2047_encode(char **pd, const char *specials, int col, const char *charsets)
RFC-2047-encode a string.
Definition: rfc2047.c:615
char * personal
Real name of address.
Definition: address.h:36
bool group
Group mailbox?
Definition: address.h:38
char * C_SendCharset
Config: Character sets for outgoing mail.
Definition: globals.c:38
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_addrlist()

void rfc2047_decode_addrlist ( struct AddressList *  al)

Decode any RFC2047 headers in an Address list.

Parameters
alAddressList

Definition at line 758 of file rfc2047.c.

759 {
760  if (!al)
761  return;
762 
763  struct Address *a = NULL;
764  TAILQ_FOREACH(a, al, entries)
765  {
766  if (a->personal && ((strstr(a->personal, "=?")) || C_AssumedCharset))
767  {
769  }
770  else if (a->group && a->mailbox && strstr(a->mailbox, "=?"))
771  rfc2047_decode(&a->mailbox);
772  }
773 }
char * C_AssumedCharset
Config: If a message is missing a character set, assume this character set.
Definition: charset.c:52
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:718
An email address.
Definition: address.h:34
char * mailbox
Mailbox and host address.
Definition: address.h:37
void rfc2047_decode(char **pd)
Decode any RFC2047-encoded header fields.
Definition: rfc2047.c:639
char * personal
Real name of address.
Definition: address.h:36
bool group
Group mailbox?
Definition: address.h:38
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_envelope()

void rfc2047_decode_envelope ( struct Envelope env)

Decode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 779 of file rfc2047.c.

780 {
781  if (!env)
782  return;
791  rfc2047_decode(&env->x_label);
792  rfc2047_decode(&env->subject);
793 }
void rfc2047_decode_addrlist(struct AddressList *al)
Decode any RFC2047 headers in an Address list.
Definition: rfc2047.c:758
struct AddressList mail_followup_to
Email&#39;s &#39;mail-followup-to&#39;.
Definition: envelope.h:63
struct AddressList reply_to
Email&#39;s &#39;reply-to&#39;.
Definition: envelope.h:62
struct AddressList bcc
Email&#39;s &#39;Bcc&#39; list.
Definition: envelope.h:60
void rfc2047_decode(char **pd)
Decode any RFC2047-encoded header fields.
Definition: rfc2047.c:639
struct AddressList from
Email&#39;s &#39;From&#39; list.
Definition: envelope.h:57
struct AddressList cc
Email&#39;s &#39;Cc&#39; list.
Definition: envelope.h:59
char * subject
Email&#39;s subject.
Definition: envelope.h:66
struct AddressList return_path
Return path for the Email.
Definition: envelope.h:56
struct AddressList to
Email&#39;s &#39;To&#39; list.
Definition: envelope.h:58
struct AddressList sender
Email&#39;s sender.
Definition: envelope.h:61
char * x_label
X-Label.
Definition: envelope.h:72
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_envelope()

void rfc2047_encode_envelope ( struct Envelope env)

Encode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 799 of file rfc2047.c.

800 {
801  if (!env)
802  return;
803  rfc2047_encode_addrlist(&env->from, "From");
804  rfc2047_encode_addrlist(&env->to, "To");
805  rfc2047_encode_addrlist(&env->cc, "Cc");
806  rfc2047_encode_addrlist(&env->bcc, "Bcc");
807  rfc2047_encode_addrlist(&env->reply_to, "Reply-To");
808  rfc2047_encode_addrlist(&env->mail_followup_to, "Mail-Followup-To");
809  rfc2047_encode_addrlist(&env->sender, "Sender");
810  rfc2047_encode(&env->x_label, NULL, sizeof("X-Label:"), C_SendCharset);
811  rfc2047_encode(&env->subject, NULL, sizeof("Subject:"), C_SendCharset);
812 }
struct AddressList mail_followup_to
Email&#39;s &#39;mail-followup-to&#39;.
Definition: envelope.h:63
struct AddressList reply_to
Email&#39;s &#39;reply-to&#39;.
Definition: envelope.h:62
struct AddressList bcc
Email&#39;s &#39;Bcc&#39; list.
Definition: envelope.h:60
void rfc2047_encode_addrlist(struct AddressList *al, const char *tag)
Encode any RFC2047 headers, where required, in an Address list.
Definition: rfc2047.c:738
struct AddressList from
Email&#39;s &#39;From&#39; list.
Definition: envelope.h:57
struct AddressList cc
Email&#39;s &#39;Cc&#39; list.
Definition: envelope.h:59
void rfc2047_encode(char **pd, const char *specials, int col, const char *charsets)
RFC-2047-encode a string.
Definition: rfc2047.c:615
char * subject
Email&#39;s subject.
Definition: envelope.h:66
struct AddressList to
Email&#39;s &#39;To&#39; list.
Definition: envelope.h:58
struct AddressList sender
Email&#39;s sender.
Definition: envelope.h:61
char * C_SendCharset
Config: Character sets for outgoing mail.
Definition: globals.c:38
char * x_label
X-Label.
Definition: envelope.h:72
+ Here is the call graph for this function:
+ Here is the caller graph for this function: