NeoMutt  2019-12-07-60-g0cfa53
Teaching an old dog new tricks
DOXYGEN
rfc2047.c File Reference

RFC2047 MIME extensions encoding / decoding routines. More...

#include "config.h"
#include <assert.h>
#include <errno.h>
#include <iconv.h>
#include <regex.h>
#include <stdbool.h>
#include <string.h>
#include "mutt/mutt.h"
#include "address/lib.h"
#include "rfc2047.h"
#include "email_globals.h"
#include "envelope.h"
#include "mime.h"
+ Include dependency graph for rfc2047.c:

Go to the source code of this file.

Macros

#define ENCWORD_LEN_MAX   75
 
#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */
 
#define HSPACE(ch)   (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))
 
#define CONTINUATION_BYTE(ch)   (((ch) &0xc0) == 0x80)
 

Typedefs

typedef size_t(* encoder_t) (char *str, const char *buf, size_t buflen, const char *tocode)
 typedef encoder_t - Prototype for an encoding function More...
 

Functions

static size_t b_encoder (char *str, const char *buf, size_t buflen, const char *tocode)
 Base64 Encode a string - Implements encoder_t. More...
 
static size_t q_encoder (char *str, const char *buf, size_t buflen, const char *tocode)
 Quoted-printable Encode a string - Implements encoder_t. More...
 
static char * parse_encoded_word (char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
 Parse a string and report RFC2047 elements. More...
 
static size_t try_block (const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Attempt to convert a block of text. More...
 
static size_t encode_block (char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
 Encode a block of text using an encoder. More...
 
static size_t choose_block (char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Calculate how much data can be converted. More...
 
static void finalize_chunk (struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
 Perform charset conversion and filtering. More...
 
static char * decode_word (const char *s, size_t len, enum ContentEncoding enc)
 Decode an RFC2047-encoded string. More...
 
static int encode (const char *d, size_t dlen, int col, const char *fromcode, const char *charsets, char **e, size_t *elen, const char *specials)
 RFC2047-encode a string. More...
 
void rfc2047_encode (char **pd, const char *specials, int col, const char *charsets)
 RFC-2047-encode a string. More...
 
void rfc2047_decode (char **pd)
 Decode any RFC2047-encoded header fields. More...
 
void rfc2047_encode_addrlist (struct AddressList *al, const char *tag)
 Encode any RFC2047 headers, where required, in an Address list. More...
 
void rfc2047_decode_addrlist (struct AddressList *al)
 Decode any RFC2047 headers in an Address list. More...
 
void rfc2047_decode_envelope (struct Envelope *env)
 Decode the fields of an Envelope. More...
 
void rfc2047_encode_envelope (struct Envelope *env)
 Encode the fields of an Envelope. More...
 

Detailed Description

RFC2047 MIME extensions encoding / decoding routines.

Authors
  • Michael R. Elkins
  • Edmund Grimley Evans
  • Pietro Cerutti

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file rfc2047.c.

Macro Definition Documentation

◆ ENCWORD_LEN_MAX

#define ENCWORD_LEN_MAX   75

Definition at line 45 of file rfc2047.c.

◆ ENCWORD_LEN_MIN

#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */

Definition at line 46 of file rfc2047.c.

◆ HSPACE

#define HSPACE (   ch)    (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))

Definition at line 48 of file rfc2047.c.

◆ CONTINUATION_BYTE

#define CONTINUATION_BYTE (   ch)    (((ch) &0xc0) == 0x80)

Definition at line 50 of file rfc2047.c.

Typedef Documentation

◆ encoder_t

typedef size_t(* encoder_t) (char *str, const char *buf, size_t buflen, const char *tocode)

typedef encoder_t - Prototype for an encoding function

Parameters
strString to encode
bufBuffer for result
buflenLength of buffer
tocodeCharacter encoding
Return values
numBytes written to buffer

Definition at line 60 of file rfc2047.c.

Function Documentation

◆ b_encoder()

static size_t b_encoder ( char *  str,
const char *  buf,
size_t  buflen,
const char *  tocode 
)
static

Base64 Encode a string - Implements encoder_t.

Definition at line 65 of file rfc2047.c.

66 {
67  char *s0 = str;
68 
69  memcpy(str, "=?", 2);
70  str += 2;
71  memcpy(str, tocode, strlen(tocode));
72  str += strlen(tocode);
73  memcpy(str, "?B?", 3);
74  str += 3;
75 
76  while (buflen)
77  {
78  char encoded[11];
79  size_t ret;
80  size_t in_len = MIN(3, buflen);
81 
82  ret = mutt_b64_encode(buf, in_len, encoded, sizeof(encoded));
83  for (size_t i = 0; i < ret; i++)
84  *str++ = encoded[i];
85 
86  buflen -= in_len;
87  buf += in_len;
88  }
89 
90  memcpy(str, "?=", 2);
91  str += 2;
92  return str - s0;
93 }
#define MIN(a, b)
Definition: memory.h:31
size_t mutt_b64_encode(const char *in, size_t inlen, char *out, size_t outlen)
Convert raw bytes to null-terminated base64 string.
Definition: base64.c:88
static const char encoded[]
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ q_encoder()

static size_t q_encoder ( char *  str,
const char *  buf,
size_t  buflen,
const char *  tocode 
)
static

Quoted-printable Encode a string - Implements encoder_t.

Definition at line 98 of file rfc2047.c.

99 {
100  static const char hex[] = "0123456789ABCDEF";
101  char *s0 = str;
102 
103  memcpy(str, "=?", 2);
104  str += 2;
105  memcpy(str, tocode, strlen(tocode));
106  str += strlen(tocode);
107  memcpy(str, "?Q?", 3);
108  str += 3;
109  while (buflen--)
110  {
111  unsigned char c = *buf++;
112  if (c == ' ')
113  *str++ = '_';
114  else if ((c >= 0x7f) || (c < 0x20) || (c == '_') || strchr(MimeSpecials, c))
115  {
116  *str++ = '=';
117  *str++ = hex[(c & 0xf0) >> 4];
118  *str++ = hex[c & 0x0f];
119  }
120  else
121  *str++ = c;
122  }
123  memcpy(str, "?=", 2);
124  str += 2;
125  return str - s0;
126 }
const char MimeSpecials[]
Characters that need special treatment in MIME.
Definition: mime.c:67
+ Here is the caller graph for this function:

◆ parse_encoded_word()

static char* parse_encoded_word ( char *  str,
enum ContentEncoding enc,
char **  charset,
size_t *  charsetlen,
char **  text,
size_t *  textlen 
)
static

Parse a string and report RFC2047 elements.

Parameters
[in]strString to parse
[out]encContent encoding found in the first RFC2047 word
[out]charsetCharset found in the first RFC2047 word
[out]charsetlenLength of the charset string found
[out]textStart of the first RFC2047 encoded text
[out]textlenLength of the encoded text found
Return values
ptrStart of the RFC2047 encoded word
NULLNone was found

Definition at line 139 of file rfc2047.c.

141 {
142  regmatch_t match[4];
143  size_t nmatch = 4;
144  struct Regex *re = mutt_regex_compile("=\\?"
145  "([^][()<>@,;:\\\"/?. =]+)" /* charset */
146  "\\?"
147  "([qQbB])" /* encoding */
148  "\\?"
149  "([^?]+)" /* encoded text - we accept whitespace
150  as some mailers do that, see #1189. */
151  "\\?=",
152  REG_EXTENDED);
153  assert(re && "Something is wrong with your RE engine.");
154 
155  char *res = NULL;
156 
157  if (mutt_regex_capture(re, str, nmatch, match))
158  {
159  /* Charset */
160  *charset = str + match[1].rm_so;
161  *charsetlen = match[1].rm_eo - match[1].rm_so;
162 
163  /* Encoding: either Q or B */
164  *enc = ((str[match[2].rm_so] == 'Q') || (str[match[2].rm_so] == 'q')) ?
166  ENC_BASE64;
167 
168  *text = str + match[3].rm_so;
169  *textlen = match[3].rm_eo - match[3].rm_so;
170  res = str + match[0].rm_so;
171  }
172 
173  mutt_regex_free(&re);
174  return res;
175 }
struct Regex * mutt_regex_compile(const char *str, int flags)
Create an Regex from a string.
Definition: regex.c:53
Base-64 encoded text.
Definition: mime.h:52
bool mutt_regex_capture(const struct Regex *regex, const char *str, size_t nmatch, regmatch_t matches[])
match a regex against a string, with provided options
Definition: regex.c:594
void mutt_regex_free(struct Regex **r)
Free a Regex object.
Definition: regex.c:111
Cached regular expression.
Definition: regex3.h:57
Quoted-printable text.
Definition: mime.h:51
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ try_block()

static size_t try_block ( const char *  d,
size_t  dlen,
const char *  fromcode,
const char *  tocode,
encoder_t encoder,
size_t *  wlen 
)
static

Attempt to convert a block of text.

Parameters
dString to convert
dlenLength of string
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
0Success, string converted
>0Error, number of bytes that could be converted

If the data could be converted using encoder, then set *encoder and *wlen. Otherwise return an upper bound on the maximum length of the data which could be converted.

The data is converted from fromcode (which must be stateless) to tocode, unless fromcode is NULL, in which case the data is assumed to be already in tocode, which should be 8-bit and stateless.

Definition at line 196 of file rfc2047.c.

198 {
199  char buf[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
200  const char *ib = NULL;
201  char *ob = NULL;
202  size_t ibl, obl;
203  int count, len, len_b, len_q;
204 
205  if (fromcode)
206  {
207  iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, 0);
208  assert(cd != (iconv_t)(-1));
209  ib = d;
210  ibl = dlen;
211  ob = buf;
212  obl = sizeof(buf) - strlen(tocode);
213  if ((iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl) == (size_t)(-1)) ||
214  (iconv(cd, NULL, NULL, &ob, &obl) == (size_t)(-1)))
215  {
216  assert(errno == E2BIG);
217  iconv_close(cd);
218  assert(ib > d);
219  return ((ib - d) == dlen) ? dlen : ib - d + 1;
220  }
221  iconv_close(cd);
222  }
223  else
224  {
225  if (dlen > (sizeof(buf) - strlen(tocode)))
226  return sizeof(buf) - strlen(tocode) + 1;
227  memcpy(buf, d, dlen);
228  ob = buf + dlen;
229  }
230 
231  count = 0;
232  for (char *p = buf; p < ob; p++)
233  {
234  unsigned char c = *p;
235  assert(strchr(MimeSpecials, '?'));
236  if ((c >= 0x7f) || (c < 0x20) || (*p == '_') ||
237  ((c != ' ') && strchr(MimeSpecials, *p)))
238  {
239  count++;
240  }
241  }
242 
243  len = ENCWORD_LEN_MIN - 2 + strlen(tocode);
244  len_b = len + (((ob - buf) + 2) / 3) * 4;
245  len_q = len + (ob - buf) + 2 * count;
246 
247  /* Apparently RFC1468 says to use B encoding for iso-2022-jp. */
248  if (mutt_str_strcasecmp(tocode, "ISO-2022-JP") == 0)
249  len_q = ENCWORD_LEN_MAX + 1;
250 
251  if ((len_b < len_q) && (len_b <= ENCWORD_LEN_MAX))
252  {
253  *encoder = b_encoder;
254  *wlen = len_b;
255  return 0;
256  }
257  else if (len_q <= ENCWORD_LEN_MAX)
258  {
259  *encoder = q_encoder;
260  *wlen = len_q;
261  return 0;
262  }
263  else
264  return dlen;
265 }
static size_t q_encoder(char *str, const char *buf, size_t buflen, const char *tocode)
Quoted-printable Encode a string - Implements encoder_t.
Definition: rfc2047.c:98
const char MimeSpecials[]
Characters that need special treatment in MIME.
Definition: mime.c:67
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, int flags)
Set up iconv for conversions.
Definition: charset.c:559
static size_t b_encoder(char *str, const char *buf, size_t buflen, const char *tocode)
Base64 Encode a string - Implements encoder_t.
Definition: rfc2047.c:65
int mutt_str_strcasecmp(const char *a, const char *b)
Compare two strings ignoring case, safely.
Definition: string.c:628
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:45
#define ENCWORD_LEN_MIN
Definition: rfc2047.c:46
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode_block()

static size_t encode_block ( char *  str,
char *  buf,
size_t  buflen,
const char *  fromcode,
const char *  tocode,
encoder_t  encoder 
)
static

Encode a block of text using an encoder.

Parameters
strString to convert
bufBuffer for result
buflenBuffer length
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
Return values
numLength of the encoded word

Encode the data (buf, buflen) into str using the encoder.

Definition at line 279 of file rfc2047.c.

281 {
282  if (!fromcode)
283  {
284  return (*encoder)(str, buf, buflen, tocode);
285  }
286 
287  const iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, 0);
288  assert(cd != (iconv_t)(-1));
289  const char *ib = buf;
290  size_t ibl = buflen;
291  char tmp[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
292  char *ob = tmp;
293  size_t obl = sizeof(tmp) - strlen(tocode);
294  const size_t n1 = iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl);
295  const size_t n2 = iconv(cd, NULL, NULL, &ob, &obl);
296  assert(n1 != (size_t)(-1) && n2 != (size_t)(-1));
297  iconv_close(cd);
298  return (*encoder)(str, tmp, ob - tmp, tocode);
299 }
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, int flags)
Set up iconv for conversions.
Definition: charset.c:559
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:45
#define ENCWORD_LEN_MIN
Definition: rfc2047.c:46
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ choose_block()

static size_t choose_block ( char *  d,
size_t  dlen,
int  col,
const char *  fromcode,
const char *  tocode,
encoder_t encoder,
size_t *  wlen 
)
static

Calculate how much data can be converted.

Parameters
dString to convert
dlenLength of string
colStarting column to convert
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
numBytes that can be converted

Discover how much of the data (d, dlen) can be converted into a single encoded word. Return how much data can be converted, and set the length *wlen of the encoded word and *encoder. We start in column col, which limits the length of the word.

Definition at line 317 of file rfc2047.c.

319 {
320  const bool utf8 = fromcode && (mutt_str_strcasecmp(fromcode, "utf-8") == 0);
321 
322  size_t n = dlen;
323  while (true)
324  {
325  assert(n > 0);
326  const size_t nn = try_block(d, n, fromcode, tocode, encoder, wlen);
327  if ((nn == 0) && (((col + *wlen) <= (ENCWORD_LEN_MAX + 1)) || (n <= 1)))
328  break;
329  n = ((nn != 0) ? nn : n) - 1;
330  assert(n > 0);
331  if (utf8)
332  while ((n > 1) && CONTINUATION_BYTE(d[n]))
333  n--;
334  }
335  return n;
336 }
#define CONTINUATION_BYTE(ch)
Definition: rfc2047.c:50
static size_t try_block(const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Attempt to convert a block of text.
Definition: rfc2047.c:196
int mutt_str_strcasecmp(const char *a, const char *b)
Compare two strings ignoring case, safely.
Definition: string.c:628
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:45
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ finalize_chunk()

static void finalize_chunk ( struct Buffer res,
struct Buffer buf,
char *  charset,
size_t  charsetlen 
)
static

Perform charset conversion and filtering.

Parameters
[out]resBuffer where the resulting string is appended
[in]bufBuffer with the input string
[in]charsetCharset to use for the conversion
[in]charsetlenLength of the charset parameter

The buffer buf is reinitialized at the end of this function.

Definition at line 347 of file rfc2047.c.

348 {
349  if (!charset)
350  return;
351  char end = charset[charsetlen];
352  charset[charsetlen] = '\0';
354  charset[charsetlen] = end;
356  mutt_buffer_addstr(res, buf->data);
357  FREE(&buf->data);
358  mutt_buffer_init(buf);
359 }
int mutt_ch_convert_string(char **ps, const char *from, const char *to, int flags)
Convert a string between encodings.
Definition: charset.c:748
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:225
char * data
Pointer to data.
Definition: buffer.h:35
#define FREE(x)
Definition: memory.h:40
char * C_Charset
Config: Default character set for displaying text on screen.
Definition: charset.c:54
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
Definition: mbyte.c:424
struct Buffer * mutt_buffer_init(struct Buffer *buf)
Initialise a new Buffer.
Definition: buffer.c:46
#define MUTT_ICONV_HOOK_FROM
apply charset-hooks to fromcode
Definition: charset.h:81
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ decode_word()

static char* decode_word ( const char *  s,
size_t  len,
enum ContentEncoding  enc 
)
static

Decode an RFC2047-encoded string.

Parameters
sString to decode
lenLength of the string
encEncoding type
Return values
ptrDecoded string
Note
The caller must free the returned string

Definition at line 370 of file rfc2047.c.

371 {
372  const char *it = s;
373  const char *end = s + len;
374 
375  if (enc == ENC_QUOTED_PRINTABLE)
376  {
377  struct Buffer buf = mutt_buffer_make(0);
378  for (; it < end; it++)
379  {
380  if (*it == '_')
381  {
382  mutt_buffer_addch(&buf, ' ');
383  }
384  else if ((it[0] == '=') && (!(it[1] & ~127) && (hexval(it[1]) != -1)) &&
385  (!(it[2] & ~127) && (hexval(it[2]) != -1)))
386  {
387  mutt_buffer_addch(&buf, (hexval(it[1]) << 4) | hexval(it[2]));
388  it += 2;
389  }
390  else
391  {
392  mutt_buffer_addch(&buf, *it);
393  }
394  }
395  mutt_buffer_addch(&buf, '\0');
396  return buf.data;
397  }
398  else if (enc == ENC_BASE64)
399  {
400  const int olen = 3 * len / 4 + 1;
401  char *out = mutt_mem_malloc(olen);
402  int dlen = mutt_b64_decode(it, out, olen);
403  if (dlen == -1)
404  {
405  FREE(&out);
406  return NULL;
407  }
408  out[dlen] = '\0';
409  return out;
410  }
411 
412  assert(0); /* The enc parameter has an invalid value */
413  return NULL;
414 }
struct Buffer mutt_buffer_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:61
String manipulation buffer.
Definition: buffer.h:33
Base-64 encoded text.
Definition: mime.h:52
void * mutt_mem_malloc(size_t size)
Allocate memory on the heap.
Definition: memory.c:90
char * data
Pointer to data.
Definition: buffer.h:35
#define hexval(ch)
Definition: mime.h:75
size_t mutt_buffer_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
Definition: buffer.c:240
Quoted-printable text.
Definition: mime.h:51
#define FREE(x)
Definition: memory.h:40
int mutt_b64_decode(const char *in, char *out, size_t olen)
Convert null-terminated base64 string to raw bytes.
Definition: base64.c:136
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode()

static int encode ( const char *  d,
size_t  dlen,
int  col,
const char *  fromcode,
const char *  charsets,
char **  e,
size_t *  elen,
const char *  specials 
)
static

RFC2047-encode a string.

Parameters
[in]dString to convert
[in]dlenLength of string
[in]colStarting column to convert
[in]fromcodeOriginal encoding
[in]charsetsList of allowable encodings (colon separated)
[out]eEncoded string
[out]elenLength of encoded string
[in]specialsSpecial characters to be encoded
Return values
0Success

Definition at line 428 of file rfc2047.c.

430 {
431  int rc = 0;
432  char *buf = NULL;
433  size_t bufpos, buflen;
434  char *t0 = NULL, *t1 = NULL, *t = NULL;
435  char *s0 = NULL, *s1 = NULL;
436  size_t ulen, r, wlen = 0;
437  encoder_t encoder = NULL;
438  char *tocode1 = NULL;
439  const char *tocode = NULL;
440  const char *icode = "utf-8";
441 
442  /* Try to convert to UTF-8. */
443  char *u = mutt_str_substr_dup(d, d + dlen);
444  if (mutt_ch_convert_string(&u, fromcode, icode, 0) != 0)
445  {
446  rc = 1;
447  icode = 0;
448  }
449  ulen = mutt_str_strlen(u);
450 
451  /* Find earliest and latest things we must encode. */
452  s0 = 0;
453  s1 = 0;
454  t0 = 0;
455  t1 = 0;
456  for (t = u; t < (u + ulen); t++)
457  {
458  if ((*t & 0x80) || ((*t == '=') && (t[1] == '?') && ((t == u) || HSPACE(*(t - 1)))))
459  {
460  if (!t0)
461  t0 = t;
462  t1 = t;
463  }
464  else if (specials && *t && strchr(specials, *t))
465  {
466  if (!s0)
467  s0 = t;
468  s1 = t;
469  }
470  }
471 
472  /* If we have something to encode, include RFC822 specials */
473  if (t0 && s0 && (s0 < t0))
474  t0 = s0;
475  if (t1 && s1 && (s1 > t1))
476  t1 = s1;
477 
478  if (!t0)
479  {
480  /* No encoding is required. */
481  *e = u;
482  *elen = ulen;
483  return rc;
484  }
485 
486  /* Choose target charset. */
487  tocode = fromcode;
488  if (icode)
489  {
490  tocode1 = mutt_ch_choose(icode, charsets, u, ulen, 0, 0);
491  if (tocode1)
492  tocode = tocode1;
493  else
494  {
495  rc = 2;
496  icode = 0;
497  }
498  }
499 
500  /* Hack to avoid labelling 8-bit data as us-ascii. */
501  if (!icode && mutt_ch_is_us_ascii(tocode))
502  tocode = "unknown-8bit";
503 
504  /* Adjust t0 for maximum length of line. */
505  t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
506  if (t < u)
507  t = u;
508  if (t < t0)
509  t0 = t;
510 
511  /* Adjust t0 until we can encode a character after a space. */
512  for (; t0 > u; t0--)
513  {
514  if (!HSPACE(*(t0 - 1)))
515  continue;
516  t = t0 + 1;
517  if (icode)
518  while ((t < (u + ulen)) && CONTINUATION_BYTE(*t))
519  t++;
520  if ((try_block(t0, t - t0, icode, tocode, &encoder, &wlen) == 0) &&
521  ((col + (t0 - u) + wlen) <= (ENCWORD_LEN_MAX + 1)))
522  {
523  break;
524  }
525  }
526 
527  /* Adjust t1 until we can encode a character before a space. */
528  for (; t1 < (u + ulen); t1++)
529  {
530  if (!HSPACE(*t1))
531  continue;
532  t = t1 - 1;
533  if (icode)
534  while (CONTINUATION_BYTE(*t))
535  t--;
536  if ((try_block(t, t1 - t, icode, tocode, &encoder, &wlen) == 0) &&
537  ((1 + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1)))
538  {
539  break;
540  }
541  }
542 
543  /* We shall encode the region [t0,t1). */
544 
545  /* Initialise the output buffer with the us-ascii prefix. */
546  buflen = 2 * ulen;
547  buf = mutt_mem_malloc(buflen);
548  bufpos = t0 - u;
549  memcpy(buf, u, t0 - u);
550 
551  col += t0 - u;
552 
553  t = t0;
554  while (true)
555  {
556  /* Find how much we can encode. */
557  size_t n = choose_block(t, t1 - t, col, icode, tocode, &encoder, &wlen);
558  if (n == (t1 - t))
559  {
560  /* See if we can fit the us-ascii suffix, too. */
561  if ((col + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1))
562  break;
563  n = t1 - t - 1;
564  if (icode)
565  while (CONTINUATION_BYTE(t[n]))
566  n--;
567  if (n == 0)
568  {
569  /* This should only happen in the really stupid case where the
570  * only word that needs encoding is one character long, but
571  * there is too much us-ascii stuff after it to use a single
572  * encoded word. We add the next word to the encoded region
573  * and try again. */
574  assert(t1 < (u + ulen));
575  for (t1++; (t1 < (u + ulen)) && !HSPACE(*t1); t1++)
576  ;
577  continue;
578  }
579  n = choose_block(t, n, col, icode, tocode, &encoder, &wlen);
580  }
581 
582  /* Add to output buffer. */
583  const char *line_break = "\n\t";
584  const int lb_len = 2; /* strlen(line_break) */
585 
586  if ((bufpos + wlen + lb_len) > buflen)
587  {
588  buflen = bufpos + wlen + lb_len;
589  mutt_mem_realloc(&buf, buflen);
590  }
591  r = encode_block(buf + bufpos, t, n, icode, tocode, encoder);
592  assert(r == wlen);
593  bufpos += wlen;
594  memcpy(buf + bufpos, line_break, lb_len);
595  bufpos += lb_len;
596 
597  col = 1;
598 
599  t += n;
600  }
601 
602  /* Add last encoded word and us-ascii suffix to buffer. */
603  buflen = bufpos + wlen + (u + ulen - t1);
604  mutt_mem_realloc(&buf, buflen + 1);
605  r = encode_block(buf + bufpos, t, t1 - t, icode, tocode, encoder);
606  assert(r == wlen);
607  bufpos += wlen;
608  memcpy(buf + bufpos, t1, u + ulen - t1);
609 
610  FREE(&tocode1);
611  FREE(&u);
612 
613  buf[buflen] = '\0';
614 
615  *e = buf;
616  *elen = buflen + 1;
617  return rc;
618 }
int mutt_ch_convert_string(char **ps, const char *from, const char *to, int flags)
Convert a string between encodings.
Definition: charset.c:748
char * mutt_ch_choose(const char *fromcode, const char *charsets, const char *u, size_t ulen, char **d, size_t *dlen)
Figure the best charset to encode a string.
Definition: charset.c:1030
#define CONTINUATION_BYTE(ch)
Definition: rfc2047.c:50
size_t mutt_str_strlen(const char *a)
Calculate the length of a string, safely.
Definition: string.c:666
size_t(* encoder_t)(char *str, const char *buf, size_t buflen, const char *tocode)
typedef encoder_t - Prototype for an encoding function
Definition: rfc2047.c:60
static size_t try_block(const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Attempt to convert a block of text.
Definition: rfc2047.c:196
#define mutt_ch_is_us_ascii(str)
Definition: charset.h:107
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
#define HSPACE(ch)
Definition: rfc2047.c:48
void * mutt_mem_malloc(size_t size)
Allocate memory on the heap.
Definition: memory.c:90
static size_t encode_block(char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
Encode a block of text using an encoder.
Definition: rfc2047.c:279
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:45
#define FREE(x)
Definition: memory.h:40
#define ENCWORD_LEN_MIN
Definition: rfc2047.c:46
static size_t choose_block(char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Calculate how much data can be converted.
Definition: rfc2047.c:317
char * mutt_str_substr_dup(const char *begin, const char *end)
Duplicate a sub-string.
Definition: string.c:579
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode()

void rfc2047_encode ( char **  pd,
const char *  specials,
int  col,
const char *  charsets 
)

RFC-2047-encode a string.

Parameters
[in,out]pdString to be encoded, and resulting encoded string
[in]specialsSpecial characters to be encoded
[in]colStarting index in string
[in]charsetsList of charsets to choose from

Definition at line 627 of file rfc2047.c.

628 {
629  if (!C_Charset || !pd || !*pd)
630  return;
631 
632  if (!charsets)
633  charsets = "utf-8";
634 
635  char *e = NULL;
636  size_t elen = 0;
637  encode(*pd, strlen(*pd), col, C_Charset, charsets, &e, &elen, specials);
638 
639  FREE(pd);
640  *pd = e;
641 }
static int encode(const char *d, size_t dlen, int col, const char *fromcode, const char *charsets, char **e, size_t *elen, const char *specials)
RFC2047-encode a string.
Definition: rfc2047.c:428
#define FREE(x)
Definition: memory.h:40
char * C_Charset
Config: Default character set for displaying text on screen.
Definition: charset.c:54
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode()

void rfc2047_decode ( char **  pd)

Decode any RFC2047-encoded header fields.

Parameters
[in,out]pdString to be decoded, and resulting decoded string

Try to decode anything that looks like a valid RFC2047 encoded header field, ignoring RFC822 parsing rules. If decoding fails, for example due to an invalid base64 string, the original input is left untouched.

Definition at line 651 of file rfc2047.c.

652 {
653  if (!pd || !*pd)
654  return;
655 
656  struct Buffer buf = mutt_buffer_make(0); /* Output buffer */
657  char *s = *pd; /* Read pointer */
658  char *beg = NULL; /* Begin of encoded word */
659  enum ContentEncoding enc; /* ENC_BASE64 or ENC_QUOTED_PRINTABLE */
660  char *charset = NULL; /* Which charset */
661  size_t charsetlen; /* Length of the charset */
662  char *text = NULL; /* Encoded text */
663  size_t textlen; /* Length of encoded text */
664 
665  /* Keep some state in case the next decoded word is using the same charset
666  * and it happens to be split in the middle of a multibyte character.
667  * See https://github.com/neomutt/neomutt/issues/1015 */
668  struct Buffer prev = mutt_buffer_make(0); /* Previously decoded word */
669  char *prev_charset = NULL; /* Previously used charset */
670  size_t prev_charsetlen = 0; /* Length of the previously used charset */
671 
672  while (*s)
673  {
674  beg = parse_encoded_word(s, &enc, &charset, &charsetlen, &text, &textlen);
675  if (beg != s)
676  {
677  /* Some non-encoded text was found */
678  size_t holelen = beg ? beg - s : mutt_str_strlen(s);
679 
680  /* Ignore whitespace between encoded words */
681  if (beg && (mutt_str_lws_len(s, holelen) == holelen))
682  {
683  s = beg;
684  continue;
685  }
686 
687  /* If we have some previously decoded text, add it now */
688  if (!mutt_buffer_is_empty(&prev))
689  {
690  finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
691  }
692 
693  /* Add non-encoded part */
694  {
695  if (C_AssumedCharset)
696  {
697  char *conv = mutt_str_substr_dup(s, s + holelen);
699  mutt_buffer_addstr(&buf, conv);
700  FREE(&conv);
701  }
702  else
703  {
704  mutt_buffer_addstr_n(&buf, s, holelen);
705  }
706  }
707  s += holelen;
708  }
709  if (beg)
710  {
711  /* Some encoded text was found */
712  text[textlen] = '\0';
713  char *decoded = decode_word(text, textlen, enc);
714  if (!decoded)
715  {
716  return;
717  }
718  if (prev.data && ((prev_charsetlen != charsetlen) ||
719  (mutt_str_strncmp(prev_charset, charset, charsetlen) != 0)))
720  {
721  /* Different charset, convert the previous chunk and add it to the
722  * final result */
723  finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
724  }
725 
726  mutt_buffer_addstr(&prev, decoded);
727  FREE(&decoded);
728  prev_charset = charset;
729  prev_charsetlen = charsetlen;
730  s = text + textlen + 2; /* Skip final ?= */
731  }
732  }
733 
734  /* Save the last chunk */
735  if (prev.data)
736  {
737  finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
738  }
739 
740  mutt_buffer_addch(&buf, '\0');
741  FREE(pd);
742  *pd = buf.data;
743 }
char * C_AssumedCharset
Config: If a message is missing a character set, assume this character set.
Definition: charset.c:53
static char * decode_word(const char *s, size_t len, enum ContentEncoding enc)
Decode an RFC2047-encoded string.
Definition: rfc2047.c:370
struct Buffer mutt_buffer_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:61
String manipulation buffer.
Definition: buffer.h:33
static char * parse_encoded_word(char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
Parse a string and report RFC2047 elements.
Definition: rfc2047.c:139
size_t mutt_str_strlen(const char *a)
Calculate the length of a string, safely.
Definition: string.c:666
size_t mutt_str_lws_len(const char *s, size_t n)
Measure the linear-white-space at the beginning of a string.
Definition: string.c:815
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:225
size_t mutt_buffer_addstr_n(struct Buffer *buf, const char *s, size_t len)
Add a string to a Buffer, expanding it if necessary.
Definition: buffer.c:99
char * data
Pointer to data.
Definition: buffer.h:35
size_t mutt_buffer_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
Definition: buffer.c:240
static void finalize_chunk(struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
Perform charset conversion and filtering.
Definition: rfc2047.c:347
int mutt_str_strncmp(const char *a, const char *b, size_t l)
Compare two strings (to a maximum), safely.
Definition: string.c:642
int mutt_ch_convert_nonmime_string(char **ps)
Try to convert a string using a list of character sets.
Definition: charset.c:301
#define FREE(x)
Definition: memory.h:40
bool mutt_buffer_is_empty(const struct Buffer *buf)
Is the Buffer empty?
Definition: buffer.c:252
char * mutt_str_substr_dup(const char *begin, const char *end)
Duplicate a sub-string.
Definition: string.c:579
ContentEncoding
Content-Transfer-Encoding.
Definition: mime.h:46
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_addrlist()

void rfc2047_encode_addrlist ( struct AddressList *  al,
const char *  tag 
)

Encode any RFC2047 headers, where required, in an Address list.

Parameters
alAddressList
tagHeader tag (used for wrapping calculation)

Definition at line 750 of file rfc2047.c.

751 {
752  if (!al)
753  return;
754 
755  int col = tag ? strlen(tag) + 2 : 32;
756  struct Address *a = NULL;
757  TAILQ_FOREACH(a, al, entries)
758  {
759  if (a->personal)
761  else if (a->group && a->mailbox)
763  }
764 }
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:718
const char AddressSpecials[]
Characters with special meaning for email addresses.
Definition: address.c:42
An email address.
Definition: address.h:34
char * mailbox
Mailbox and host address.
Definition: address.h:37
char * C_SendCharset
Config: Character sets for outgoing mail.
Definition: email_globals.c:38
void rfc2047_encode(char **pd, const char *specials, int col, const char *charsets)
RFC-2047-encode a string.
Definition: rfc2047.c:627
char * personal
Real name of address.
Definition: address.h:36
bool group
Group mailbox?
Definition: address.h:38
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_addrlist()

void rfc2047_decode_addrlist ( struct AddressList *  al)

Decode any RFC2047 headers in an Address list.

Parameters
alAddressList

Definition at line 770 of file rfc2047.c.

771 {
772  if (!al)
773  return;
774 
775  struct Address *a = NULL;
776  TAILQ_FOREACH(a, al, entries)
777  {
778  if (a->personal && ((strstr(a->personal, "=?")) || C_AssumedCharset))
779  {
781  }
782  else if (a->group && a->mailbox && strstr(a->mailbox, "=?"))
783  rfc2047_decode(&a->mailbox);
784  }
785 }
char * C_AssumedCharset
Config: If a message is missing a character set, assume this character set.
Definition: charset.c:53
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:718
An email address.
Definition: address.h:34
char * mailbox
Mailbox and host address.
Definition: address.h:37
void rfc2047_decode(char **pd)
Decode any RFC2047-encoded header fields.
Definition: rfc2047.c:651
char * personal
Real name of address.
Definition: address.h:36
bool group
Group mailbox?
Definition: address.h:38
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_envelope()

void rfc2047_decode_envelope ( struct Envelope env)

Decode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 791 of file rfc2047.c.

792 {
793  if (!env)
794  return;
803  rfc2047_decode(&env->x_label);
804  rfc2047_decode(&env->subject);
805 }
void rfc2047_decode_addrlist(struct AddressList *al)
Decode any RFC2047 headers in an Address list.
Definition: rfc2047.c:770
struct AddressList mail_followup_to
Email&#39;s &#39;mail-followup-to&#39;.
Definition: envelope.h:63
struct AddressList reply_to
Email&#39;s &#39;reply-to&#39;.
Definition: envelope.h:62
struct AddressList bcc
Email&#39;s &#39;Bcc&#39; list.
Definition: envelope.h:60
void rfc2047_decode(char **pd)
Decode any RFC2047-encoded header fields.
Definition: rfc2047.c:651
struct AddressList from
Email&#39;s &#39;From&#39; list.
Definition: envelope.h:57
struct AddressList cc
Email&#39;s &#39;Cc&#39; list.
Definition: envelope.h:59
char * subject
Email&#39;s subject.
Definition: envelope.h:66
struct AddressList return_path
Return path for the Email.
Definition: envelope.h:56
struct AddressList to
Email&#39;s &#39;To&#39; list.
Definition: envelope.h:58
struct AddressList sender
Email&#39;s sender.
Definition: envelope.h:61
char * x_label
X-Label.
Definition: envelope.h:72
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_envelope()

void rfc2047_encode_envelope ( struct Envelope env)

Encode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 811 of file rfc2047.c.

812 {
813  if (!env)
814  return;
815  rfc2047_encode_addrlist(&env->from, "From");
816  rfc2047_encode_addrlist(&env->to, "To");
817  rfc2047_encode_addrlist(&env->cc, "Cc");
818  rfc2047_encode_addrlist(&env->bcc, "Bcc");
819  rfc2047_encode_addrlist(&env->reply_to, "Reply-To");
820  rfc2047_encode_addrlist(&env->mail_followup_to, "Mail-Followup-To");
821  rfc2047_encode_addrlist(&env->sender, "Sender");
822  rfc2047_encode(&env->x_label, NULL, sizeof("X-Label:"), C_SendCharset);
823  rfc2047_encode(&env->subject, NULL, sizeof("Subject:"), C_SendCharset);
824 }
struct AddressList mail_followup_to
Email&#39;s &#39;mail-followup-to&#39;.
Definition: envelope.h:63
struct AddressList reply_to
Email&#39;s &#39;reply-to&#39;.
Definition: envelope.h:62
struct AddressList bcc
Email&#39;s &#39;Bcc&#39; list.
Definition: envelope.h:60
void rfc2047_encode_addrlist(struct AddressList *al, const char *tag)
Encode any RFC2047 headers, where required, in an Address list.
Definition: rfc2047.c:750
char * C_SendCharset
Config: Character sets for outgoing mail.
Definition: email_globals.c:38
struct AddressList from
Email&#39;s &#39;From&#39; list.
Definition: envelope.h:57
struct AddressList cc
Email&#39;s &#39;Cc&#39; list.
Definition: envelope.h:59
void rfc2047_encode(char **pd, const char *specials, int col, const char *charsets)
RFC-2047-encode a string.
Definition: rfc2047.c:627
char * subject
Email&#39;s subject.
Definition: envelope.h:66
struct AddressList to
Email&#39;s &#39;To&#39; list.
Definition: envelope.h:58
struct AddressList sender
Email&#39;s sender.
Definition: envelope.h:61
char * x_label
X-Label.
Definition: envelope.h:72
+ Here is the call graph for this function:
+ Here is the caller graph for this function: