NeoMutt  2018-07-16 +2481-68dcde
Teaching an old dog new tricks
DOXYGEN
rfc2047.c File Reference

RFC2047 MIME extensions encoding / decoding routines. More...

#include "config.h"
#include <assert.h>
#include <errno.h>
#include <iconv.h>
#include <regex.h>
#include <stdbool.h>
#include <string.h>
#include "mutt/mutt.h"
#include "address/lib.h"
#include "rfc2047.h"
#include "email_globals.h"
#include "envelope.h"
#include "mime.h"
+ Include dependency graph for rfc2047.c:

Go to the source code of this file.

Macros

#define ENCWORD_LEN_MAX   75
 
#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */
 
#define HSPACE(ch)   (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))
 
#define CONTINUATION_BYTE(ch)   (((ch) &0xc0) == 0x80)
 

Typedefs

typedef size_t(* encoder_t) (char *str, const char *buf, size_t buflen, const char *tocode)
 typedef encoder_t - Prototype for an encoding function More...
 

Functions

static size_t b_encoder (char *str, const char *buf, size_t buflen, const char *tocode)
 Base64 Encode a string - Implements encoder_t. More...
 
static size_t q_encoder (char *str, const char *buf, size_t buflen, const char *tocode)
 Quoted-printable Encode a string - Implements encoder_t. More...
 
static char * parse_encoded_word (char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
 Parse a string and report RFC2047 elements. More...
 
static size_t try_block (const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Attempt to convert a block of text. More...
 
static size_t encode_block (char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
 Encode a block of text using an encoder. More...
 
static size_t choose_block (char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Calculate how much data can be converted. More...
 
static void finalize_chunk (struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
 Perform charset conversion and filtering. More...
 
static char * decode_word (const char *s, size_t len, enum ContentEncoding enc)
 Decode an RFC2047-encoded string. More...
 
static int encode (const char *d, size_t dlen, int col, const char *fromcode, const char *charsets, char **e, size_t *elen, const char *specials)
 RFC2047-encode a string. More...
 
void rfc2047_encode (char **pd, const char *specials, int col, const char *charsets)
 RFC-2047-encode a string. More...
 
void rfc2047_decode (char **pd)
 Decode any RFC2047-encoded header fields. More...
 
void rfc2047_encode_addrlist (struct AddressList *al, const char *tag)
 Encode any RFC2047 headers, where required, in an Address list. More...
 
void rfc2047_decode_addrlist (struct AddressList *al)
 Decode any RFC2047 headers in an Address list. More...
 
void rfc2047_decode_envelope (struct Envelope *env)
 Decode the fields of an Envelope. More...
 
void rfc2047_encode_envelope (struct Envelope *env)
 Encode the fields of an Envelope. More...
 

Detailed Description

RFC2047 MIME extensions encoding / decoding routines.

Authors
  • Michael R. Elkins
  • Edmund Grimley Evans
  • Pietro Cerutti

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file rfc2047.c.

Macro Definition Documentation

◆ ENCWORD_LEN_MAX

#define ENCWORD_LEN_MAX   75

Definition at line 45 of file rfc2047.c.

◆ ENCWORD_LEN_MIN

#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */

Definition at line 46 of file rfc2047.c.

◆ HSPACE

#define HSPACE (   ch)    (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))

Definition at line 48 of file rfc2047.c.

◆ CONTINUATION_BYTE

#define CONTINUATION_BYTE (   ch)    (((ch) &0xc0) == 0x80)

Definition at line 50 of file rfc2047.c.

Typedef Documentation

◆ encoder_t

typedef size_t(* encoder_t) (char *str, const char *buf, size_t buflen, const char *tocode)

typedef encoder_t - Prototype for an encoding function

Parameters
strString to encode
bufBuffer for result
buflenLength of buffer
tocodeCharacter encoding
Return values
numBytes written to buffer

Definition at line 60 of file rfc2047.c.

Function Documentation

◆ b_encoder()

static size_t b_encoder ( char *  str,
const char *  buf,
size_t  buflen,
const char *  tocode 
)
static

Base64 Encode a string - Implements encoder_t.

Definition at line 65 of file rfc2047.c.

66 {
67  char *s0 = str;
68 
69  memcpy(str, "=?", 2);
70  str += 2;
71  memcpy(str, tocode, strlen(tocode));
72  str += strlen(tocode);
73  memcpy(str, "?B?", 3);
74  str += 3;
75 
76  while (buflen)
77  {
78  char encoded[11];
79  size_t ret;
80  size_t in_len = MIN(3, buflen);
81 
82  ret = mutt_b64_encode(buf, in_len, encoded, sizeof(encoded));
83  for (size_t i = 0; i < ret; i++)
84  *str++ = encoded[i];
85 
86  buflen -= in_len;
87  buf += in_len;
88  }
89 
90  memcpy(str, "?=", 2);
91  str += 2;
92  return str - s0;
93 }
#define MIN(a, b)
Definition: memory.h:31
size_t mutt_b64_encode(const char *in, size_t inlen, char *out, size_t outlen)
Convert raw bytes to null-terminated base64 string.
Definition: base64.c:88
static const char encoded[]
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ q_encoder()

static size_t q_encoder ( char *  str,
const char *  buf,
size_t  buflen,
const char *  tocode 
)
static

Quoted-printable Encode a string - Implements encoder_t.

Definition at line 98 of file rfc2047.c.

99 {
100  static const char hex[] = "0123456789ABCDEF";
101  char *s0 = str;
102 
103  memcpy(str, "=?", 2);
104  str += 2;
105  memcpy(str, tocode, strlen(tocode));
106  str += strlen(tocode);
107  memcpy(str, "?Q?", 3);
108  str += 3;
109  while (buflen--)
110  {
111  unsigned char c = *buf++;
112  if (c == ' ')
113  *str++ = '_';
114  else if ((c >= 0x7f) || (c < 0x20) || (c == '_') || strchr(MimeSpecials, c))
115  {
116  *str++ = '=';
117  *str++ = hex[(c & 0xf0) >> 4];
118  *str++ = hex[c & 0x0f];
119  }
120  else
121  *str++ = c;
122  }
123  memcpy(str, "?=", 2);
124  str += 2;
125  return str - s0;
126 }
const char MimeSpecials[]
Characters that need special treatment in MIME.
Definition: mime.c:67
+ Here is the caller graph for this function:

◆ parse_encoded_word()

static char* parse_encoded_word ( char *  str,
enum ContentEncoding enc,
char **  charset,
size_t *  charsetlen,
char **  text,
size_t *  textlen 
)
static

Parse a string and report RFC2047 elements.

Parameters
[in]strString to parse
[out]encContent encoding found in the first RFC2047 word
[out]charsetCharset found in the first RFC2047 word
[out]charsetlenLength of the charset string found
[out]textStart of the first RFC2047 encoded text
[out]textlenLength of the encoded text found
Return values
ptrStart of the RFC2047 encoded word
NULLNone was found

Definition at line 139 of file rfc2047.c.

141 {
142  regmatch_t match[4];
143  size_t nmatch = 4;
144  struct Regex *re = mutt_regex_compile("=\\?"
145  "([^][()<>@,;:\\\"/?. =]+)" /* charset */
146  "\\?"
147  "([qQbB])" /* encoding */
148  "\\?"
149  "([^?]+)" /* encoded text - we accept whitespace
150  as some mailers do that, see #1189. */
151  "\\?=",
152  REG_EXTENDED);
153  assert(re && "Something is wrong with your RE engine.");
154 
155  char *res = NULL;
156 
157  if (mutt_regex_capture(re, str, nmatch, match))
158  {
159  /* Charset */
160  *charset = str + match[1].rm_so;
161  *charsetlen = match[1].rm_eo - match[1].rm_so;
162 
163  /* Encoding: either Q or B */
164  *enc = ((str[match[2].rm_so] == 'Q') || (str[match[2].rm_so] == 'q')) ?
166  ENC_BASE64;
167 
168  *text = str + match[3].rm_so;
169  *textlen = match[3].rm_eo - match[3].rm_so;
170  res = str + match[0].rm_so;
171  }
172 
173  mutt_regex_free(&re);
174  return res;
175 }
struct Regex * mutt_regex_compile(const char *str, int flags)
Create an Regex from a string.
Definition: regex.c:53
Base-64 encoded text.
Definition: mime.h:52
bool mutt_regex_capture(const struct Regex *regex, const char *str, size_t nmatch, regmatch_t matches[])
match a regex against a string, with provided options
Definition: regex.c:594
void mutt_regex_free(struct Regex **r)
Free a Regex object.
Definition: regex.c:111
Cached regular expression.
Definition: regex3.h:57
Quoted-printable text.
Definition: mime.h:51
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ try_block()

static size_t try_block ( const char *  d,
size_t  dlen,
const char *  fromcode,
const char *  tocode,
encoder_t encoder,
size_t *  wlen 
)
static

Attempt to convert a block of text.

Parameters
dString to convert
dlenLength of string
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
0Success, string converted
>0Error, number of bytes that could be converted

If the data could be converted using encoder, then set *encoder and *wlen. Otherwise return an upper bound on the maximum length of the data which could be converted.

The data is converted from fromcode (which must be stateless) to tocode, unless fromcode is NULL, in which case the data is assumed to be already in tocode, which should be 8-bit and stateless.

Definition at line 196 of file rfc2047.c.

198 {
199  char buf[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
200  const char *ib = NULL;
201  char *ob = NULL;
202  size_t ibl, obl;
203  int count, len, len_b, len_q;
204 
205  if (fromcode)
206  {
207  iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, 0);
208  assert(cd != (iconv_t)(-1));
209  ib = d;
210  ibl = dlen;
211  ob = buf;
212  obl = sizeof(buf) - strlen(tocode);
213  if ((iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl) == (size_t)(-1)) ||
214  (iconv(cd, NULL, NULL, &ob, &obl) == (size_t)(-1)))
215  {
216  assert(errno == E2BIG);
217  iconv_close(cd);
218  assert(ib > d);
219  return ((ib - d) == dlen) ? dlen : ib - d + 1;
220  }
221  iconv_close(cd);
222  }
223  else
224  {
225  if (dlen > (sizeof(buf) - strlen(tocode)))
226  return sizeof(buf) - strlen(tocode) + 1;
227  memcpy(buf, d, dlen);
228  ob = buf + dlen;
229  }
230 
231  count = 0;
232  for (char *p = buf; p < ob; p++)
233  {
234  unsigned char c = *p;
235  assert(strchr(MimeSpecials, '?'));
236  if ((c >= 0x7f) || (c < 0x20) || (*p == '_') ||
237  ((c != ' ') && strchr(MimeSpecials, *p)))
238  {
239  count++;
240  }
241  }
242 
243  len = ENCWORD_LEN_MIN - 2 + strlen(tocode);
244  len_b = len + (((ob - buf) + 2) / 3) * 4;
245  len_q = len + (ob - buf) + 2 * count;
246 
247  /* Apparently RFC1468 says to use B encoding for iso-2022-jp. */
248  if (mutt_str_strcasecmp(tocode, "ISO-2022-JP") == 0)
249  len_q = ENCWORD_LEN_MAX + 1;
250 
251  if ((len_b < len_q) && (len_b <= ENCWORD_LEN_MAX))
252  {
253  *encoder = b_encoder;
254  *wlen = len_b;
255  return 0;
256  }
257  else if (len_q <= ENCWORD_LEN_MAX)
258  {
259  *encoder = q_encoder;
260  *wlen = len_q;
261  return 0;
262  }
263  else
264  return dlen;
265 }
static size_t q_encoder(char *str, const char *buf, size_t buflen, const char *tocode)
Quoted-printable Encode a string - Implements encoder_t.
Definition: rfc2047.c:98
const char MimeSpecials[]
Characters that need special treatment in MIME.
Definition: mime.c:67
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, int flags)
Set up iconv for conversions.
Definition: charset.c:559
static size_t b_encoder(char *str, const char *buf, size_t buflen, const char *tocode)
Base64 Encode a string - Implements encoder_t.
Definition: rfc2047.c:65
int mutt_str_strcasecmp(const char *a, const char *b)
Compare two strings ignoring case, safely.
Definition: string.c:628
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:45
#define ENCWORD_LEN_MIN
Definition: rfc2047.c:46
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode_block()

static size_t encode_block ( char *  str,
char *  buf,
size_t  buflen,
const char *  fromcode,
const char *  tocode,
encoder_t  encoder 
)
static

Encode a block of text using an encoder.

Parameters
strString to convert
bufBuffer for result
buflenBuffer length
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
Return values
numLength of the encoded word

Encode the data (buf, buflen) into str using the encoder.

Definition at line 279 of file rfc2047.c.

281 {
282  if (!fromcode)
283  {
284  return (*encoder)(str, buf, buflen, tocode);
285  }
286 
287  const iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, 0);
288  assert(cd != (iconv_t)(-1));
289  const char *ib = buf;
290  size_t ibl = buflen;
291  char tmp[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
292  char *ob = tmp;
293  size_t obl = sizeof(tmp) - strlen(tocode);
294  const size_t n1 = iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl);
295  const size_t n2 = iconv(cd, NULL, NULL, &ob, &obl);
296  assert(n1 != (size_t)(-1) && n2 != (size_t)(-1));
297  iconv_close(cd);
298  return (*encoder)(str, tmp, ob - tmp, tocode);
299 }
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, int flags)
Set up iconv for conversions.
Definition: charset.c:559
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:45
#define ENCWORD_LEN_MIN
Definition: rfc2047.c:46
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ choose_block()

static size_t choose_block ( char *  d,
size_t  dlen,
int  col,
const char *  fromcode,
const char *  tocode,
encoder_t encoder,
size_t *  wlen 
)
static

Calculate how much data can be converted.

Parameters
dString to convert
dlenLength of string
colStarting column to convert
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
numBytes that can be converted

Discover how much of the data (d, dlen) can be converted into a single encoded word. Return how much data can be converted, and set the length *wlen of the encoded word and *encoder. We start in column col, which limits the length of the word.

Definition at line 317 of file rfc2047.c.

319 {
320  const bool utf8 = fromcode && (mutt_str_strcasecmp(fromcode, "utf-8") == 0);
321 
322  size_t n = dlen;
323  while (true)
324  {
325  assert(n > 0);
326  const size_t nn = try_block(d, n, fromcode, tocode, encoder, wlen);
327  if ((nn == 0) && (((col + *wlen) <= (ENCWORD_LEN_MAX + 1)) || (n <= 1)))
328  break;
329  n = ((nn != 0) ? nn : n) - 1;
330  assert(n > 0);
331  if (utf8)
332  while ((n > 1) && CONTINUATION_BYTE(d[n]))
333  n--;
334  }
335  return n;
336 }
#define CONTINUATION_BYTE(ch)
Definition: rfc2047.c:50
static size_t try_block(const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Attempt to convert a block of text.
Definition: rfc2047.c:196
int mutt_str_strcasecmp(const char *a, const char *b)
Compare two strings ignoring case, safely.
Definition: string.c:628
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:45
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ finalize_chunk()

static void finalize_chunk ( struct Buffer res,
struct Buffer buf,
char *  charset,
size_t  charsetlen 
)
static

Perform charset conversion and filtering.

Parameters
[out]resBuffer where the resulting string is appended
[in]bufBuffer with the input string
[in]charsetCharset to use for the conversion
[in]charsetlenLength of the charset parameter

The buffer buf is reinitialized at the end of this function.

Definition at line 347 of file rfc2047.c.

348 {
349  char end = charset[charsetlen];
350  charset[charsetlen] = '\0';
352  charset[charsetlen] = end;
354  mutt_buffer_addstr(res, buf->data);
355  FREE(&buf->data);
356  mutt_buffer_init(buf);
357 }
int mutt_ch_convert_string(char **ps, const char *from, const char *to, int flags)
Convert a string between encodings.
Definition: charset.c:748
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:225
char * data
Pointer to data.
Definition: buffer.h:35
#define FREE(x)
Definition: memory.h:40
char * C_Charset
Config: Default character set for displaying text on screen.
Definition: charset.c:54
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
Definition: mbyte.c:424
struct Buffer * mutt_buffer_init(struct Buffer *buf)
Initialise a new Buffer.
Definition: buffer.c:46
#define MUTT_ICONV_HOOK_FROM
apply charset-hooks to fromcode
Definition: charset.h:81
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ decode_word()

static char* decode_word ( const char *  s,
size_t  len,
enum ContentEncoding  enc 
)
static

Decode an RFC2047-encoded string.

Parameters
sString to decode
lenLength of the string
encEncoding type
Return values
ptrDecoded string
Note
The caller must free the returned string

Definition at line 368 of file rfc2047.c.

369 {
370  const char *it = s;
371  const char *end = s + len;
372 
373  if (enc == ENC_QUOTED_PRINTABLE)
374  {
375  struct Buffer buf = mutt_buffer_make(0);
376  for (; it < end; it++)
377  {
378  if (*it == '_')
379  {
380  mutt_buffer_addch(&buf, ' ');
381  }
382  else if ((it[0] == '=') && (!(it[1] & ~127) && (hexval(it[1]) != -1)) &&
383  (!(it[2] & ~127) && (hexval(it[2]) != -1)))
384  {
385  mutt_buffer_addch(&buf, (hexval(it[1]) << 4) | hexval(it[2]));
386  it += 2;
387  }
388  else
389  {
390  mutt_buffer_addch(&buf, *it);
391  }
392  }
393  mutt_buffer_addch(&buf, '\0');
394  return buf.data;
395  }
396  else if (enc == ENC_BASE64)
397  {
398  const int olen = 3 * len / 4 + 1;
399  char *out = mutt_mem_malloc(olen);
400  int dlen = mutt_b64_decode(it, out, olen);
401  if (dlen == -1)
402  {
403  FREE(&out);
404  return NULL;
405  }
406  out[dlen] = '\0';
407  return out;
408  }
409 
410  assert(0); /* The enc parameter has an invalid value */
411  return NULL;
412 }
struct Buffer mutt_buffer_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:61
String manipulation buffer.
Definition: buffer.h:33
Base-64 encoded text.
Definition: mime.h:52
void * mutt_mem_malloc(size_t size)
Allocate memory on the heap.
Definition: memory.c:90
char * data
Pointer to data.
Definition: buffer.h:35
#define hexval(ch)
Definition: mime.h:75
size_t mutt_buffer_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
Definition: buffer.c:240
Quoted-printable text.
Definition: mime.h:51
#define FREE(x)
Definition: memory.h:40
int mutt_b64_decode(const char *in, char *out, size_t olen)
Convert null-terminated base64 string to raw bytes.
Definition: base64.c:136
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode()

static int encode ( const char *  d,
size_t  dlen,
int  col,
const char *  fromcode,
const char *  charsets,
char **  e,
size_t *  elen,
const char *  specials 
)
static

RFC2047-encode a string.

Parameters
[in]dString to convert
[in]dlenLength of string
[in]colStarting column to convert
[in]fromcodeOriginal encoding
[in]charsetsList of allowable encodings (colon separated)
[out]eEncoded string
[out]elenLength of encoded string
[in]specialsSpecial characters to be encoded
Return values
0Success

Definition at line 426 of file rfc2047.c.

428 {
429  int rc = 0;
430  char *buf = NULL;
431  size_t bufpos, buflen;
432  char *t0 = NULL, *t1 = NULL, *t = NULL;
433  char *s0 = NULL, *s1 = NULL;
434  size_t ulen, r, wlen = 0;
435  encoder_t encoder = NULL;
436  char *tocode1 = NULL;
437  const char *tocode = NULL;
438  const char *icode = "utf-8";
439 
440  /* Try to convert to UTF-8. */
441  char *u = mutt_str_substr_dup(d, d + dlen);
442  if (mutt_ch_convert_string(&u, fromcode, icode, 0) != 0)
443  {
444  rc = 1;
445  icode = 0;
446  }
447  ulen = mutt_str_strlen(u);
448 
449  /* Find earliest and latest things we must encode. */
450  s0 = 0;
451  s1 = 0;
452  t0 = 0;
453  t1 = 0;
454  for (t = u; t < (u + ulen); t++)
455  {
456  if ((*t & 0x80) || ((*t == '=') && (t[1] == '?') && ((t == u) || HSPACE(*(t - 1)))))
457  {
458  if (!t0)
459  t0 = t;
460  t1 = t;
461  }
462  else if (specials && *t && strchr(specials, *t))
463  {
464  if (!s0)
465  s0 = t;
466  s1 = t;
467  }
468  }
469 
470  /* If we have something to encode, include RFC822 specials */
471  if (t0 && s0 && (s0 < t0))
472  t0 = s0;
473  if (t1 && s1 && (s1 > t1))
474  t1 = s1;
475 
476  if (!t0)
477  {
478  /* No encoding is required. */
479  *e = u;
480  *elen = ulen;
481  return rc;
482  }
483 
484  /* Choose target charset. */
485  tocode = fromcode;
486  if (icode)
487  {
488  tocode1 = mutt_ch_choose(icode, charsets, u, ulen, 0, 0);
489  if (tocode1)
490  tocode = tocode1;
491  else
492  {
493  rc = 2;
494  icode = 0;
495  }
496  }
497 
498  /* Hack to avoid labelling 8-bit data as us-ascii. */
499  if (!icode && mutt_ch_is_us_ascii(tocode))
500  tocode = "unknown-8bit";
501 
502  /* Adjust t0 for maximum length of line. */
503  t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
504  if (t < u)
505  t = u;
506  if (t < t0)
507  t0 = t;
508 
509  /* Adjust t0 until we can encode a character after a space. */
510  for (; t0 > u; t0--)
511  {
512  if (!HSPACE(*(t0 - 1)))
513  continue;
514  t = t0 + 1;
515  if (icode)
516  while ((t < (u + ulen)) && CONTINUATION_BYTE(*t))
517  t++;
518  if ((try_block(t0, t - t0, icode, tocode, &encoder, &wlen) == 0) &&
519  ((col + (t0 - u) + wlen) <= (ENCWORD_LEN_MAX + 1)))
520  {
521  break;
522  }
523  }
524 
525  /* Adjust t1 until we can encode a character before a space. */
526  for (; t1 < (u + ulen); t1++)
527  {
528  if (!HSPACE(*t1))
529  continue;
530  t = t1 - 1;
531  if (icode)
532  while (CONTINUATION_BYTE(*t))
533  t--;
534  if ((try_block(t, t1 - t, icode, tocode, &encoder, &wlen) == 0) &&
535  ((1 + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1)))
536  {
537  break;
538  }
539  }
540 
541  /* We shall encode the region [t0,t1). */
542 
543  /* Initialise the output buffer with the us-ascii prefix. */
544  buflen = 2 * ulen;
545  buf = mutt_mem_malloc(buflen);
546  bufpos = t0 - u;
547  memcpy(buf, u, t0 - u);
548 
549  col += t0 - u;
550 
551  t = t0;
552  while (true)
553  {
554  /* Find how much we can encode. */
555  size_t n = choose_block(t, t1 - t, col, icode, tocode, &encoder, &wlen);
556  if (n == (t1 - t))
557  {
558  /* See if we can fit the us-ascii suffix, too. */
559  if ((col + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1))
560  break;
561  n = t1 - t - 1;
562  if (icode)
563  while (CONTINUATION_BYTE(t[n]))
564  n--;
565  if (n == 0)
566  {
567  /* This should only happen in the really stupid case where the
568  * only word that needs encoding is one character long, but
569  * there is too much us-ascii stuff after it to use a single
570  * encoded word. We add the next word to the encoded region
571  * and try again. */
572  assert(t1 < (u + ulen));
573  for (t1++; (t1 < (u + ulen)) && !HSPACE(*t1); t1++)
574  ;
575  continue;
576  }
577  n = choose_block(t, n, col, icode, tocode, &encoder, &wlen);
578  }
579 
580  /* Add to output buffer. */
581  const char *line_break = "\n\t";
582  const int lb_len = 2; /* strlen(line_break) */
583 
584  if ((bufpos + wlen + lb_len) > buflen)
585  {
586  buflen = bufpos + wlen + lb_len;
587  mutt_mem_realloc(&buf, buflen);
588  }
589  r = encode_block(buf + bufpos, t, n, icode, tocode, encoder);
590  assert(r == wlen);
591  bufpos += wlen;
592  memcpy(buf + bufpos, line_break, lb_len);
593  bufpos += lb_len;
594 
595  col = 1;
596 
597  t += n;
598  }
599 
600  /* Add last encoded word and us-ascii suffix to buffer. */
601  buflen = bufpos + wlen + (u + ulen - t1);
602  mutt_mem_realloc(&buf, buflen + 1);
603  r = encode_block(buf + bufpos, t, t1 - t, icode, tocode, encoder);
604  assert(r == wlen);
605  bufpos += wlen;
606  memcpy(buf + bufpos, t1, u + ulen - t1);
607 
608  FREE(&tocode1);
609  FREE(&u);
610 
611  buf[buflen] = '\0';
612 
613  *e = buf;
614  *elen = buflen + 1;
615  return rc;
616 }
int mutt_ch_convert_string(char **ps, const char *from, const char *to, int flags)
Convert a string between encodings.
Definition: charset.c:748
char * mutt_ch_choose(const char *fromcode, const char *charsets, const char *u, size_t ulen, char **d, size_t *dlen)
Figure the best charset to encode a string.
Definition: charset.c:1030
#define CONTINUATION_BYTE(ch)
Definition: rfc2047.c:50
size_t mutt_str_strlen(const char *a)
Calculate the length of a string, safely.
Definition: string.c:666
size_t(* encoder_t)(char *str, const char *buf, size_t buflen, const char *tocode)
typedef encoder_t - Prototype for an encoding function
Definition: rfc2047.c:60
static size_t try_block(const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Attempt to convert a block of text.
Definition: rfc2047.c:196
#define mutt_ch_is_us_ascii(str)
Definition: charset.h:107
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
#define HSPACE(ch)
Definition: rfc2047.c:48
void * mutt_mem_malloc(size_t size)
Allocate memory on the heap.
Definition: memory.c:90
static size_t encode_block(char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
Encode a block of text using an encoder.
Definition: rfc2047.c:279
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:45
#define FREE(x)
Definition: memory.h:40
#define ENCWORD_LEN_MIN
Definition: rfc2047.c:46
static size_t choose_block(char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Calculate how much data can be converted.
Definition: rfc2047.c:317
char * mutt_str_substr_dup(const char *begin, const char *end)
Duplicate a sub-string.
Definition: string.c:579
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode()

void rfc2047_encode ( char **  pd,
const char *  specials,
int  col,
const char *  charsets 
)

RFC-2047-encode a string.

Parameters
[in,out]pdString to be encoded, and resulting encoded string
[in]specialsSpecial characters to be encoded
[in]colStarting index in string
[in]charsetsList of charsets to choose from

Definition at line 625 of file rfc2047.c.

626 {
627  if (!C_Charset || !pd || !*pd)
628  return;
629 
630  if (!charsets)
631  charsets = "utf-8";
632 
633  char *e = NULL;
634  size_t elen = 0;
635  encode(*pd, strlen(*pd), col, C_Charset, charsets, &e, &elen, specials);
636 
637  FREE(pd);
638  *pd = e;
639 }
static int encode(const char *d, size_t dlen, int col, const char *fromcode, const char *charsets, char **e, size_t *elen, const char *specials)
RFC2047-encode a string.
Definition: rfc2047.c:426
#define FREE(x)
Definition: memory.h:40
char * C_Charset
Config: Default character set for displaying text on screen.
Definition: charset.c:54
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode()

void rfc2047_decode ( char **  pd)

Decode any RFC2047-encoded header fields.

Parameters
[in,out]pdString to be decoded, and resulting decoded string

Try to decode anything that looks like a valid RFC2047 encoded header field, ignoring RFC822 parsing rules. If decoding fails, for example due to an invalid base64 string, the original input is left untouched.

Definition at line 649 of file rfc2047.c.

650 {
651  if (!pd || !*pd)
652  return;
653 
654  struct Buffer buf = mutt_buffer_make(0); /* Output buffer */
655  char *s = *pd; /* Read pointer */
656  char *beg = NULL; /* Begin of encoded word */
657  enum ContentEncoding enc; /* ENC_BASE64 or ENC_QUOTED_PRINTABLE */
658  char *charset = NULL; /* Which charset */
659  size_t charsetlen; /* Length of the charset */
660  char *text = NULL; /* Encoded text */
661  size_t textlen; /* Length of encoded text */
662 
663  /* Keep some state in case the next decoded word is using the same charset
664  * and it happens to be split in the middle of a multibyte character.
665  * See https://github.com/neomutt/neomutt/issues/1015 */
666  struct Buffer prev = mutt_buffer_make(0); /* Previously decoded word */
667  char *prev_charset = NULL; /* Previously used charset */
668  size_t prev_charsetlen = 0; /* Length of the previously used charset */
669 
670  while (*s)
671  {
672  beg = parse_encoded_word(s, &enc, &charset, &charsetlen, &text, &textlen);
673  if (beg != s)
674  {
675  /* Some non-encoded text was found */
676  size_t holelen = beg ? beg - s : mutt_str_strlen(s);
677 
678  /* Ignore whitespace between encoded words */
679  if (beg && (mutt_str_lws_len(s, holelen) == holelen))
680  {
681  s = beg;
682  continue;
683  }
684 
685  /* If we have some previously decoded text, add it now */
686  if (!mutt_buffer_is_empty(&prev))
687  {
688  finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
689  }
690 
691  /* Add non-encoded part */
692  {
693  if (C_AssumedCharset)
694  {
695  char *conv = mutt_str_substr_dup(s, s + holelen);
697  mutt_buffer_addstr(&buf, conv);
698  FREE(&conv);
699  }
700  else
701  {
702  mutt_buffer_addstr_n(&buf, s, holelen);
703  }
704  }
705  s += holelen;
706  }
707  if (beg)
708  {
709  /* Some encoded text was found */
710  text[textlen] = '\0';
711  char *decoded = decode_word(text, textlen, enc);
712  if (!decoded)
713  {
714  return;
715  }
716  if (prev.data && ((prev_charsetlen != charsetlen) ||
717  (strncmp(prev_charset, charset, charsetlen) != 0)))
718  {
719  /* Different charset, convert the previous chunk and add it to the
720  * final result */
721  finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
722  }
723 
724  mutt_buffer_addstr(&prev, decoded);
725  FREE(&decoded);
726  prev_charset = charset;
727  prev_charsetlen = charsetlen;
728  s = text + textlen + 2; /* Skip final ?= */
729  }
730  }
731 
732  /* Save the last chunk */
733  if (prev.data)
734  {
735  finalize_chunk(&buf, &prev, prev_charset, prev_charsetlen);
736  }
737 
738  mutt_buffer_addch(&buf, '\0');
739  FREE(pd);
740  *pd = buf.data;
741 }
char * C_AssumedCharset
Config: If a message is missing a character set, assume this character set.
Definition: charset.c:53
static char * decode_word(const char *s, size_t len, enum ContentEncoding enc)
Decode an RFC2047-encoded string.
Definition: rfc2047.c:368
struct Buffer mutt_buffer_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:61
String manipulation buffer.
Definition: buffer.h:33
static char * parse_encoded_word(char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
Parse a string and report RFC2047 elements.
Definition: rfc2047.c:139
size_t mutt_str_strlen(const char *a)
Calculate the length of a string, safely.
Definition: string.c:666
size_t mutt_str_lws_len(const char *s, size_t n)
Measure the linear-white-space at the beginning of a string.
Definition: string.c:815
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:225
size_t mutt_buffer_addstr_n(struct Buffer *buf, const char *s, size_t len)
Add a string to a Buffer, expanding it if necessary.
Definition: buffer.c:99
char * data
Pointer to data.
Definition: buffer.h:35
size_t mutt_buffer_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
Definition: buffer.c:240
static void finalize_chunk(struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
Perform charset conversion and filtering.
Definition: rfc2047.c:347
int mutt_ch_convert_nonmime_string(char **ps)
Try to convert a string using a list of character sets.
Definition: charset.c:301
#define FREE(x)
Definition: memory.h:40
bool mutt_buffer_is_empty(const struct Buffer *buf)
Is the Buffer empty?
Definition: buffer.c:252
char * mutt_str_substr_dup(const char *begin, const char *end)
Duplicate a sub-string.
Definition: string.c:579
ContentEncoding
Content-Transfer-Encoding.
Definition: mime.h:46
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_addrlist()

void rfc2047_encode_addrlist ( struct AddressList *  al,
const char *  tag 
)

Encode any RFC2047 headers, where required, in an Address list.

Parameters
alAddressList
tagHeader tag (used for wrapping calculation)

Definition at line 748 of file rfc2047.c.

749 {
750  if (!al)
751  return;
752 
753  int col = tag ? strlen(tag) + 2 : 32;
754  struct Address *a = NULL;
755  TAILQ_FOREACH(a, al, entries)
756  {
757  if (a->personal)
759  else if (a->group && a->mailbox)
761  }
762 }
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:719
const char AddressSpecials[]
Characters with special meaning for email addresses.
Definition: address.c:42
An email address.
Definition: address.h:34
char * mailbox
Mailbox and host address.
Definition: address.h:37
char * C_SendCharset
Config: Character sets for outgoing mail.
Definition: email_globals.c:38
void rfc2047_encode(char **pd, const char *specials, int col, const char *charsets)
RFC-2047-encode a string.
Definition: rfc2047.c:625
char * personal
Real name of address.
Definition: address.h:36
bool group
Group mailbox?
Definition: address.h:38
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_addrlist()

void rfc2047_decode_addrlist ( struct AddressList *  al)

Decode any RFC2047 headers in an Address list.

Parameters
alAddressList

Definition at line 768 of file rfc2047.c.

769 {
770  if (!al)
771  return;
772 
773  struct Address *a = NULL;
774  TAILQ_FOREACH(a, al, entries)
775  {
776  if (a->personal && ((strstr(a->personal, "=?")) || C_AssumedCharset))
777  {
779  }
780  else if (a->group && a->mailbox && strstr(a->mailbox, "=?"))
781  rfc2047_decode(&a->mailbox);
782  }
783 }
char * C_AssumedCharset
Config: If a message is missing a character set, assume this character set.
Definition: charset.c:53
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:719
An email address.
Definition: address.h:34
char * mailbox
Mailbox and host address.
Definition: address.h:37
void rfc2047_decode(char **pd)
Decode any RFC2047-encoded header fields.
Definition: rfc2047.c:649
char * personal
Real name of address.
Definition: address.h:36
bool group
Group mailbox?
Definition: address.h:38
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_envelope()

void rfc2047_decode_envelope ( struct Envelope env)

Decode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 789 of file rfc2047.c.

790 {
791  if (!env)
792  return;
801  rfc2047_decode(&env->x_label);
802  rfc2047_decode(&env->subject);
803 }
void rfc2047_decode_addrlist(struct AddressList *al)
Decode any RFC2047 headers in an Address list.
Definition: rfc2047.c:768
struct AddressList mail_followup_to
Email&#39;s &#39;mail-followup-to&#39;.
Definition: envelope.h:63
struct AddressList reply_to
Email&#39;s &#39;reply-to&#39;.
Definition: envelope.h:62
struct AddressList bcc
Email&#39;s &#39;Bcc&#39; list.
Definition: envelope.h:60
void rfc2047_decode(char **pd)
Decode any RFC2047-encoded header fields.
Definition: rfc2047.c:649
struct AddressList from
Email&#39;s &#39;From&#39; list.
Definition: envelope.h:57
struct AddressList cc
Email&#39;s &#39;Cc&#39; list.
Definition: envelope.h:59
char * subject
Email&#39;s subject.
Definition: envelope.h:66
struct AddressList return_path
Return path for the Email.
Definition: envelope.h:56
struct AddressList to
Email&#39;s &#39;To&#39; list.
Definition: envelope.h:58
struct AddressList sender
Email&#39;s sender.
Definition: envelope.h:61
char * x_label
X-Label.
Definition: envelope.h:72
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_envelope()

void rfc2047_encode_envelope ( struct Envelope env)

Encode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 809 of file rfc2047.c.

810 {
811  if (!env)
812  return;
813  rfc2047_encode_addrlist(&env->from, "From");
814  rfc2047_encode_addrlist(&env->to, "To");
815  rfc2047_encode_addrlist(&env->cc, "Cc");
816  rfc2047_encode_addrlist(&env->bcc, "Bcc");
817  rfc2047_encode_addrlist(&env->reply_to, "Reply-To");
818  rfc2047_encode_addrlist(&env->mail_followup_to, "Mail-Followup-To");
819  rfc2047_encode_addrlist(&env->sender, "Sender");
820  rfc2047_encode(&env->x_label, NULL, sizeof("X-Label:"), C_SendCharset);
821  rfc2047_encode(&env->subject, NULL, sizeof("Subject:"), C_SendCharset);
822 }
struct AddressList mail_followup_to
Email&#39;s &#39;mail-followup-to&#39;.
Definition: envelope.h:63
struct AddressList reply_to
Email&#39;s &#39;reply-to&#39;.
Definition: envelope.h:62
struct AddressList bcc
Email&#39;s &#39;Bcc&#39; list.
Definition: envelope.h:60
void rfc2047_encode_addrlist(struct AddressList *al, const char *tag)
Encode any RFC2047 headers, where required, in an Address list.
Definition: rfc2047.c:748
char * C_SendCharset
Config: Character sets for outgoing mail.
Definition: email_globals.c:38
struct AddressList from
Email&#39;s &#39;From&#39; list.
Definition: envelope.h:57
struct AddressList cc
Email&#39;s &#39;Cc&#39; list.
Definition: envelope.h:59
void rfc2047_encode(char **pd, const char *specials, int col, const char *charsets)
RFC-2047-encode a string.
Definition: rfc2047.c:625
char * subject
Email&#39;s subject.
Definition: envelope.h:66
struct AddressList to
Email&#39;s &#39;To&#39; list.
Definition: envelope.h:58
struct AddressList sender
Email&#39;s sender.
Definition: envelope.h:61
char * x_label
X-Label.
Definition: envelope.h:72
+ Here is the call graph for this function:
+ Here is the caller graph for this function: