#include "config.h"
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <iconv.h>
#include <stdbool.h>
#include <string.h>
#include "mutt/lib.h"
#include "address/lib.h"
#include "rfc2047.h"
#include "envelope.h"
#include "globals.h"
#include "mime.h"
#include "mutt_globals.h"
Go to the source code of this file.
|
typedef size_t(* | encoder_t) (char *str, const char *buf, size_t buflen, const char *tocode) |
| Prototype for an encoding function. More...
|
|
|
static size_t | b_encoder (char *str, const char *buf, size_t buflen, const char *tocode) |
| Base64 Encode a string - Implements encoder_t. More...
|
|
static size_t | q_encoder (char *str, const char *buf, size_t buflen, const char *tocode) |
| Quoted-printable Encode a string - Implements encoder_t. More...
|
|
static char * | parse_encoded_word (char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen) |
| Parse a string and report RFC2047 elements. More...
|
|
static size_t | try_block (const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen) |
| Attempt to convert a block of text. More...
|
|
static size_t | encode_block (char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder) |
| Encode a block of text using an encoder. More...
|
|
static size_t | choose_block (char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen) |
| Calculate how much data can be converted. More...
|
|
static void | finalize_chunk (struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen) |
| Perform charset conversion and filtering. More...
|
|
static char * | decode_word (const char *s, size_t len, enum ContentEncoding enc) |
| Decode an RFC2047-encoded string. More...
|
|
static int | encode (const char *d, size_t dlen, int col, const char *fromcode, const char *charsets, char **e, size_t *elen, const char *specials) |
| RFC2047-encode a string. More...
|
|
void | rfc2047_encode (char **pd, const char *specials, int col, const char *charsets) |
| RFC-2047-encode a string. More...
|
|
void | rfc2047_decode (char **pd) |
| Decode any RFC2047-encoded header fields. More...
|
|
void | rfc2047_encode_addrlist (struct AddressList *al, const char *tag) |
| Encode any RFC2047 headers, where required, in an Address list. More...
|
|
void | rfc2047_decode_addrlist (struct AddressList *al) |
| Decode any RFC2047 headers in an Address list. More...
|
|
void | rfc2047_decode_envelope (struct Envelope *env) |
| Decode the fields of an Envelope. More...
|
|
void | rfc2047_encode_envelope (struct Envelope *env) |
| Encode the fields of an Envelope. More...
|
|
RFC2047 MIME extensions encoding / decoding routines
- Authors
- Michael R. Elkins
- Edmund Grimley Evans
- Pietro Cerutti
- Copyright
- This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.
Definition in file rfc2047.c.
◆ ENCWORD_LEN_MAX
#define ENCWORD_LEN_MAX 75 |
◆ ENCWORD_LEN_MIN
#define ENCWORD_LEN_MIN 9 /* strlen ("=?.?.?.?=") */ |
◆ HSPACE
#define HSPACE |
( |
|
ch | ) |
(((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t')) |
◆ CONTINUATION_BYTE
#define CONTINUATION_BYTE |
( |
|
ch | ) |
(((ch) &0xc0) == 0x80) |
◆ encoder_t
typedef size_t(* encoder_t) (char *str, const char *buf, size_t buflen, const char *tocode) |
Prototype for an encoding function.
- Parameters
-
str | String to encode |
buf | Buffer for result |
buflen | Length of buffer |
tocode | Character encoding |
- Return values
-
num | Bytes written to buffer |
Definition at line 61 of file rfc2047.c.
◆ b_encoder()
static size_t b_encoder |
( |
char * |
str, |
|
|
const char * |
buf, |
|
|
size_t |
buflen, |
|
|
const char * |
tocode |
|
) |
| |
|
static |
Base64 Encode a string - Implements encoder_t.
Definition at line 66 of file rfc2047.c.
72 memcpy(str, tocode, strlen(tocode));
73 str += strlen(tocode);
74 memcpy(str,
"?B?", 3);
81 size_t in_len =
MIN(3, buflen);
84 for (
size_t i = 0; i < ret; i++)
◆ q_encoder()
static size_t q_encoder |
( |
char * |
str, |
|
|
const char * |
buf, |
|
|
size_t |
buflen, |
|
|
const char * |
tocode |
|
) |
| |
|
static |
Quoted-printable Encode a string - Implements encoder_t.
Definition at line 99 of file rfc2047.c.
101 static const char hex[] =
"0123456789ABCDEF";
104 memcpy(str,
"=?", 2);
106 memcpy(str, tocode, strlen(tocode));
107 str += strlen(tocode);
108 memcpy(str,
"?Q?", 3);
112 unsigned char c = *buf++;
115 else if ((c >= 0x7f) || (c < 0x20) || (c ==
'_') || strchr(
MimeSpecials, c))
118 *str++ = hex[(c & 0xf0) >> 4];
119 *str++ = hex[c & 0x0f];
124 memcpy(str,
"?=", 2);
◆ parse_encoded_word()
static char* parse_encoded_word |
( |
char * |
str, |
|
|
enum ContentEncoding * |
enc, |
|
|
char ** |
charset, |
|
|
size_t * |
charsetlen, |
|
|
char ** |
text, |
|
|
size_t * |
textlen |
|
) |
| |
|
static |
Parse a string and report RFC2047 elements.
- Parameters
-
[in] | str | String to parse |
[out] | enc | Content encoding found in the first RFC2047 word |
[out] | charset | Charset found in the first RFC2047 word |
[out] | charsetlen | Length of the charset string found |
[out] | text | Start of the first RFC2047 encoded text |
[out] | textlen | Length of the encoded text found |
- Return values
-
ptr | Start of the RFC2047 encoded word |
NULL | None was found |
Definition at line 140 of file rfc2047.c.
◆ try_block()
static size_t try_block |
( |
const char * |
d, |
|
|
size_t |
dlen, |
|
|
const char * |
fromcode, |
|
|
const char * |
tocode, |
|
|
encoder_t * |
encoder, |
|
|
size_t * |
wlen |
|
) |
| |
|
static |
Attempt to convert a block of text.
- Parameters
-
d | String to convert |
dlen | Length of string |
fromcode | Original encoding |
tocode | New encoding |
encoder | Encoding function |
wlen | Number of characters converted |
- Return values
-
0 | Success, string converted |
>0 | Error, number of bytes that could be converted |
If the data could be converted using encoder, then set *encoder and *wlen. Otherwise return an upper bound on the maximum length of the data which could be converted.
The data is converted from fromcode (which must be stateless) to tocode, unless fromcode is NULL, in which case the data is assumed to be already in tocode, which should be 8-bit and stateless.
Definition at line 183 of file rfc2047.c.
187 const char *ib = NULL;
190 int count, len, len_b, len_q;
195 assert(cd != (iconv_t)(-1));
199 obl =
sizeof(buf) - strlen(tocode);
200 if ((iconv(cd, (ICONV_CONST
char **) &ib, &ibl, &ob, &obl) == (
size_t)(-1)) ||
201 (iconv(cd, NULL, NULL, &ob, &obl) == (size_t)(-1)))
203 assert(errno == E2BIG);
206 return ((ib - d) == dlen) ? dlen : ib - d + 1;
212 if (dlen > (
sizeof(buf) - strlen(tocode)))
213 return sizeof(buf) - strlen(tocode) + 1;
214 memcpy(buf, d, dlen);
219 for (
char *p = buf; p < ob; p++)
221 unsigned char c = *p;
223 if ((c >= 0x7f) || (c < 0x20) || (*p ==
'_') ||
231 len_b = len + (((ob - buf) + 2) / 3) * 4;
232 len_q = len + (ob - buf) + 2 * count;
◆ encode_block()
static size_t encode_block |
( |
char * |
str, |
|
|
char * |
buf, |
|
|
size_t |
buflen, |
|
|
const char * |
fromcode, |
|
|
const char * |
tocode, |
|
|
encoder_t |
encoder |
|
) |
| |
|
static |
Encode a block of text using an encoder.
- Parameters
-
str | String to convert |
buf | Buffer for result |
buflen | Buffer length |
fromcode | Original encoding |
tocode | New encoding |
encoder | Encoding function |
- Return values
-
num | Length of the encoded word |
Encode the data (buf, buflen) into str using the encoder.
Definition at line 266 of file rfc2047.c.
271 return (*encoder)(str, buf, buflen, tocode);
275 assert(cd != (iconv_t)(-1));
276 const char *ib = buf;
280 size_t obl =
sizeof(tmp) - strlen(tocode);
281 const size_t n1 = iconv(cd, (ICONV_CONST
char **) &ib, &ibl, &ob, &obl);
282 const size_t n2 = iconv(cd, NULL, NULL, &ob, &obl);
283 assert(n1 != (
size_t)(-1) && n2 != (
size_t)(-1));
285 return (*encoder)(str, tmp, ob - tmp, tocode);
◆ choose_block()
static size_t choose_block |
( |
char * |
d, |
|
|
size_t |
dlen, |
|
|
int |
col, |
|
|
const char * |
fromcode, |
|
|
const char * |
tocode, |
|
|
encoder_t * |
encoder, |
|
|
size_t * |
wlen |
|
) |
| |
|
static |
Calculate how much data can be converted.
- Parameters
-
d | String to convert |
dlen | Length of string |
col | Starting column to convert |
fromcode | Original encoding |
tocode | New encoding |
encoder | Encoding function |
wlen | Number of characters converted |
- Return values
-
num | Bytes that can be converted |
Discover how much of the data (d, dlen) can be converted into a single encoded word. Return how much data can be converted, and set the length *wlen of the encoded word and *encoder. We start in column col, which limits the length of the word.
Definition at line 304 of file rfc2047.c.
313 const size_t nn =
try_block(d, n, fromcode, tocode, encoder, wlen);
314 if ((nn == 0) && (((col + *wlen) <= (
ENCWORD_LEN_MAX + 1)) || (n <= 1)))
316 n = ((nn != 0) ? nn : n) - 1;
◆ finalize_chunk()
static void finalize_chunk |
( |
struct Buffer * |
res, |
|
|
struct Buffer * |
buf, |
|
|
char * |
charset, |
|
|
size_t |
charsetlen |
|
) |
| |
|
static |
Perform charset conversion and filtering.
- Parameters
-
[out] | res | Buffer where the resulting string is appended |
[in] | buf | Buffer with the input string |
[in] | charset | Charset to use for the conversion |
[in] | charsetlen | Length of the charset parameter |
The buffer buf is reinitialized at the end of this function.
Definition at line 334 of file rfc2047.c.
338 char end = charset[charsetlen];
339 charset[charsetlen] =
'\0';
341 charset[charsetlen] = end;
◆ decode_word()
static char* decode_word |
( |
const char * |
s, |
|
|
size_t |
len, |
|
|
enum ContentEncoding |
enc |
|
) |
| |
|
static |
Decode an RFC2047-encoded string.
- Parameters
-
s | String to decode |
len | Length of the string |
enc | Encoding type |
- Return values
-
- Note
- The caller must free the returned string
Definition at line 357 of file rfc2047.c.
360 const char *end = s + len;
365 for (; it < end; it++)
371 else if ((it[0] ==
'=') && (!(it[1] & ~127) && (
hexval(it[1]) != -1)) &&
372 (!(it[2] & ~127) && (
hexval(it[2]) != -1)))
387 const int olen = 3 * len / 4 + 1;
◆ encode()
static int encode |
( |
const char * |
d, |
|
|
size_t |
dlen, |
|
|
int |
col, |
|
|
const char * |
fromcode, |
|
|
const char * |
charsets, |
|
|
char ** |
e, |
|
|
size_t * |
elen, |
|
|
const char * |
specials |
|
) |
| |
|
static |
RFC2047-encode a string.
- Parameters
-
[in] | d | String to convert |
[in] | dlen | Length of string |
[in] | col | Starting column to convert |
[in] | fromcode | Original encoding |
[in] | charsets | List of allowable encodings (colon separated) |
[out] | e | Encoded string |
[out] | elen | Length of encoded string |
[in] | specials | Special characters to be encoded |
- Return values
-
Definition at line 415 of file rfc2047.c.
420 size_t bufpos, buflen;
421 char *t0 = NULL, *t1 = NULL, *t = NULL;
422 char *s0 = NULL, *s1 = NULL;
423 size_t ulen, r, wlen = 0;
425 char *tocode1 = NULL;
426 const char *tocode = NULL;
427 const char *icode =
"utf-8";
443 for (t = u; t < (u + ulen); t++)
445 if ((*t & 0x80) || ((*t ==
'=') && (t[1] ==
'?') && ((t == u) ||
HSPACE(*(t - 1)))))
451 else if (specials && *t && strchr(specials, *t))
460 if (t0 && s0 && (s0 < t0))
462 if (t1 && s1 && (s1 > t1))
489 tocode =
"unknown-8bit";
507 if ((
try_block(t0, t - t0, icode, tocode, &encoder, &wlen) == 0) &&
515 for (; t1 < (u + ulen); t1++)
523 if ((
try_block(t, t1 - t, icode, tocode, &encoder, &wlen) == 0) &&
536 memcpy(buf, u, t0 - u);
544 size_t n =
choose_block(t, t1 - t, col, icode, tocode, &encoder, &wlen);
561 assert(t1 < (u + ulen));
562 for (t1++; (t1 < (u + ulen)) && !
HSPACE(*t1); t1++)
567 n =
choose_block(t, n, col, icode, tocode, &encoder, &wlen);
571 const char *line_break =
"\n\t";
572 const int lb_len = 2;
574 if ((bufpos + wlen + lb_len) > buflen)
576 buflen = bufpos + wlen + lb_len;
579 r =
encode_block(buf + bufpos, t, n, icode, tocode, encoder);
582 memcpy(buf + bufpos, line_break, lb_len);
591 buflen = bufpos + wlen + (u + ulen - t1);
593 r =
encode_block(buf + bufpos, t, t1 - t, icode, tocode, encoder);
596 memcpy(buf + bufpos, t1, u + ulen - t1);
◆ rfc2047_encode()
void rfc2047_encode |
( |
char ** |
pd, |
|
|
const char * |
specials, |
|
|
int |
col, |
|
|
const char * |
charsets |
|
) |
| |
RFC-2047-encode a string.
- Parameters
-
[in,out] | pd | String to be encoded, and resulting encoded string |
[in] | specials | Special characters to be encoded |
[in] | col | Starting index in string |
[in] | charsets | List of charsets to choose from |
Definition at line 615 of file rfc2047.c.
625 encode(*pd, strlen(*pd), col,
C_Charset, charsets, &e, &elen, specials);
◆ rfc2047_decode()
void rfc2047_decode |
( |
char ** |
pd | ) |
|
Decode any RFC2047-encoded header fields.
- Parameters
-
[in,out] | pd | String to be decoded, and resulting decoded string |
Try to decode anything that looks like a valid RFC2047 encoded header field, ignoring RFC822 parsing rules. If decoding fails, for example due to an invalid base64 string, the original input is left untouched.
Definition at line 639 of file rfc2047.c.
648 char *charset = NULL;
657 char *prev_charset = NULL;
658 size_t prev_charsetlen = 0;
700 text[textlen] =
'\0';
706 if (prev.
data && ((prev_charsetlen != charsetlen) ||
716 prev_charset = charset;
717 prev_charsetlen = charsetlen;
718 s = text + textlen + 2;
◆ rfc2047_encode_addrlist()
void rfc2047_encode_addrlist |
( |
struct AddressList * |
al, |
|
|
const char * |
tag |
|
) |
| |
Encode any RFC2047 headers, where required, in an Address list.
- Parameters
-
al | AddressList |
tag | Header tag (used for wrapping calculation) |
Definition at line 738 of file rfc2047.c.
743 int col = tag ? strlen(tag) + 2 : 32;
◆ rfc2047_decode_addrlist()
void rfc2047_decode_addrlist |
( |
struct AddressList * |
al | ) |
|
Decode any RFC2047 headers in an Address list.
- Parameters
-
Definition at line 758 of file rfc2047.c.
◆ rfc2047_decode_envelope()
void rfc2047_decode_envelope |
( |
struct Envelope * |
env | ) |
|
◆ rfc2047_encode_envelope()
void rfc2047_encode_envelope |
( |
struct Envelope * |
env | ) |
|
char * subject
Email's subject.
struct AddressList bcc
Email's 'Bcc' list.
@ ENC_QUOTED_PRINTABLE
Quoted-printable text.
#define CONTINUATION_BYTE(ch)
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
static size_t q_encoder(char *str, const char *buf, size_t buflen, const char *tocode)
Quoted-printable Encode a string - Implements encoder_t.
static size_t choose_block(char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Calculate how much data can be converted.
bool mutt_strn_equal(const char *a, const char *b, size_t num)
Check for equality of two strings (to a maximum), safely.
char * personal
Real name of address.
String manipulation buffer.
@ PREX_RFC2047_ENCODED_WORD_MATCH_TEXT
=?utf-8?Q?[=E8=81...]?=
bool mutt_buffer_is_empty(const struct Buffer *buf)
Is the Buffer empty?
static char * decode_word(const char *s, size_t len, enum ContentEncoding enc)
Decode an RFC2047-encoded string.
int mutt_b64_decode(const char *in, char *out, size_t olen)
Convert null-terminated base64 string to raw bytes.
#define MUTT_ICONV_NO_FLAGS
No flags are set.
size_t(* encoder_t)(char *str, const char *buf, size_t buflen, const char *tocode)
Prototype for an encoding function.
static char * parse_encoded_word(char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
Parse a string and report RFC2047 elements.
ContentEncoding
Content-Transfer-Encoding.
#define TAILQ_FOREACH(var, head, field)
struct Buffer * mutt_buffer_init(struct Buffer *buf)
Initialise a new Buffer.
@ PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET
=?[utf-8]?Q?=E8=81...?=
void rfc2047_decode_addrlist(struct AddressList *al)
Decode any RFC2047 headers in an Address list.
const char MimeSpecials[]
Characters that need special treatment in MIME.
int mutt_ch_convert_string(char **ps, const char *from, const char *to, uint8_t flags)
Convert a string between encodings.
size_t mutt_buffer_addstr_n(struct Buffer *buf, const char *s, size_t len)
Add a string to a Buffer, expanding it if necessary.
static size_t try_block(const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Attempt to convert a block of text.
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, uint8_t flags)
Set up iconv for conversions.
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
void rfc2047_encode_addrlist(struct AddressList *al, const char *tag)
Encode any RFC2047 headers, where required, in an Address list.
struct AddressList reply_to
Email's 'reply-to'.
const char AddressSpecials[]
Characters with special meaning for email addresses.
@ PREX_RFC2047_ENCODED_WORD_MATCH_FULL
[=?utf-8?Q?=E8=81...?=]
#define MUTT_ICONV_HOOK_FROM
apply charset-hooks to fromcode
struct AddressList cc
Email's 'Cc' list.
size_t mutt_buffer_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
static regoff_t mutt_regmatch_start(const regmatch_t *match)
Return the start of a match.
@ PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING
=?utf-8?[Q]?=E8=81...?=
size_t mutt_b64_encode(const char *in, size_t inlen, char *out, size_t outlen)
Convert raw bytes to null-terminated base64 string.
static size_t mutt_regmatch_len(const regmatch_t *match)
Return the length of a match.
void rfc2047_decode(char **pd)
Decode any RFC2047-encoded header fields.
static size_t encode_block(char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
Encode a block of text using an encoder.
void rfc2047_encode(char **pd, const char *specials, int col, const char *charsets)
RFC-2047-encode a string.
#define mutt_ch_is_us_ascii(str)
struct AddressList to
Email's 'To' list.
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
int mutt_ch_convert_nonmime_string(char **ps)
Try to convert a string using a list of character sets.
char * C_SendCharset
Config: Character sets for outgoing mail.
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
static size_t b_encoder(char *str, const char *buf, size_t buflen, const char *tocode)
Base64 Encode a string - Implements encoder_t.
char * mailbox
Mailbox and host address.
struct AddressList from
Email's 'From' list.
@ ENC_BASE64
Base-64 encoded text.
void * mutt_mem_malloc(size_t size)
Allocate memory on the heap.
char * mutt_strn_dup(const char *begin, size_t len)
Duplicate a sub-string.
char * C_AssumedCharset
Config: If a message is missing a character set, assume this character set.
@ PREX_RFC2047_ENCODED_WORD
[=?utf-8?Q?=E8=81=AA=E6=98=8E=E7=9A=84?=]
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
struct AddressList return_path
Return path for the Email.
static void finalize_chunk(struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
Perform charset conversion and filtering.
char * C_Charset
Config: Default character set for displaying text on screen.
char * data
Pointer to data.
struct AddressList mail_followup_to
Email's 'mail-followup-to'.
struct Buffer mutt_buffer_make(size_t size)
Make a new buffer on the stack.
struct AddressList sender
Email's sender.
static int encode(const char *d, size_t dlen, int col, const char *fromcode, const char *charsets, char **e, size_t *elen, const char *specials)
RFC2047-encode a string.
char * mutt_ch_choose(const char *fromcode, const char *charsets, const char *u, size_t ulen, char **d, size_t *dlen)
Figure the best charset to encode a string.
size_t mutt_str_lws_len(const char *s, size_t n)
Measure the linear-white-space at the beginning of a string.
regmatch_t * mutt_prex_capture(enum Prex which, const char *str)