Go to the documentation of this file.
103 {
"ansi_x3.4-1968",
"us-ascii" },
104 {
"iso-ir-6",
"us-ascii" },
105 {
"iso_646.irv:1991",
"us-ascii" },
106 {
"ascii",
"us-ascii" },
107 {
"iso646-us",
"us-ascii" },
108 {
"us",
"us-ascii" },
109 {
"ibm367",
"us-ascii" },
110 {
"cp367",
"us-ascii" },
111 {
"csASCII",
"us-ascii" },
113 {
"csISO2022KR",
"iso-2022-kr" },
114 {
"csEUCKR",
"euc-kr" },
115 {
"csISO2022JP",
"iso-2022-jp" },
116 {
"csISO2022JP2",
"iso-2022-jp-2" },
118 {
"ISO_8859-1:1987",
"iso-8859-1" },
119 {
"iso-ir-100",
"iso-8859-1" },
120 {
"iso_8859-1",
"iso-8859-1" },
121 {
"latin1",
"iso-8859-1" },
122 {
"l1",
"iso-8859-1" },
123 {
"IBM819",
"iso-8859-1" },
124 {
"CP819",
"iso-8859-1" },
125 {
"csISOLatin1",
"iso-8859-1" },
127 {
"ISO_8859-2:1987",
"iso-8859-2" },
128 {
"iso-ir-101",
"iso-8859-2" },
129 {
"iso_8859-2",
"iso-8859-2" },
130 {
"latin2",
"iso-8859-2" },
131 {
"l2",
"iso-8859-2" },
132 {
"csISOLatin2",
"iso-8859-2" },
134 {
"ISO_8859-3:1988",
"iso-8859-3" },
135 {
"iso-ir-109",
"iso-8859-3" },
136 {
"ISO_8859-3",
"iso-8859-3" },
137 {
"latin3",
"iso-8859-3" },
138 {
"l3",
"iso-8859-3" },
139 {
"csISOLatin3",
"iso-8859-3" },
141 {
"ISO_8859-4:1988",
"iso-8859-4" },
142 {
"iso-ir-110",
"iso-8859-4" },
143 {
"ISO_8859-4",
"iso-8859-4" },
144 {
"latin4",
"iso-8859-4" },
145 {
"l4",
"iso-8859-4" },
146 {
"csISOLatin4",
"iso-8859-4" },
148 {
"ISO_8859-6:1987",
"iso-8859-6" },
149 {
"iso-ir-127",
"iso-8859-6" },
150 {
"iso_8859-6",
"iso-8859-6" },
151 {
"ECMA-114",
"iso-8859-6" },
152 {
"ASMO-708",
"iso-8859-6" },
153 {
"arabic",
"iso-8859-6" },
154 {
"csISOLatinArabic",
"iso-8859-6" },
156 {
"ISO_8859-7:1987",
"iso-8859-7" },
157 {
"iso-ir-126",
"iso-8859-7" },
158 {
"ISO_8859-7",
"iso-8859-7" },
159 {
"ELOT_928",
"iso-8859-7" },
160 {
"ECMA-118",
"iso-8859-7" },
161 {
"greek",
"iso-8859-7" },
162 {
"greek8",
"iso-8859-7" },
163 {
"csISOLatinGreek",
"iso-8859-7" },
165 {
"ISO_8859-8:1988",
"iso-8859-8" },
166 {
"iso-ir-138",
"iso-8859-8" },
167 {
"ISO_8859-8",
"iso-8859-8" },
168 {
"hebrew",
"iso-8859-8" },
169 {
"csISOLatinHebrew",
"iso-8859-8" },
171 {
"ISO_8859-5:1988",
"iso-8859-5" },
172 {
"iso-ir-144",
"iso-8859-5" },
173 {
"ISO_8859-5",
"iso-8859-5" },
174 {
"cyrillic",
"iso-8859-5" },
175 {
"csISOLatinCyrillic",
"iso-8859-5" },
177 {
"ISO_8859-9:1989",
"iso-8859-9" },
178 {
"iso-ir-148",
"iso-8859-9" },
179 {
"ISO_8859-9",
"iso-8859-9" },
180 {
"latin5",
"iso-8859-9" },
181 {
"l5",
"iso-8859-9" },
182 {
"csISOLatin5",
"iso-8859-9" },
184 {
"ISO_8859-10:1992",
"iso-8859-10" },
185 {
"iso-ir-157",
"iso-8859-10" },
186 {
"latin6",
"iso-8859-10" },
187 {
"l6",
"iso-8859-10" },
188 {
"csISOLatin6",
"iso-8859-10" },
190 {
"csKOI8r",
"koi8-r" },
192 {
"MS_Kanji",
"Shift_JIS" },
193 {
"csShiftJis",
"Shift_JIS" },
195 {
"Extended_UNIX_Code_Packed_Format_for_Japanese",
197 {
"csEUCPkdFmtJapanese",
"euc-jp" },
199 {
"csGB2312",
"gb2312" },
200 {
"csbig5",
"big5" },
205 {
"iso_8859-13",
"iso-8859-13" },
206 {
"iso-ir-179",
"iso-8859-13" },
207 {
"latin7",
"iso-8859-13" },
208 {
"l7",
"iso-8859-13" },
210 {
"iso_8859-14",
"iso-8859-14" },
211 {
"latin8",
"iso-8859-14" },
212 {
"l8",
"iso-8859-14" },
214 {
"iso_8859-15",
"iso-8859-15" },
215 {
"latin9",
"iso-8859-15" },
218 {
"latin0",
"iso-8859-15" },
220 {
"iso_8859-16",
"iso-8859-16" },
221 {
"latin10",
"iso-8859-16" },
223 {
"646",
"us-ascii" },
227 {
"eucJP",
"euc-jp" },
228 {
"PCK",
"Shift_JIS" },
229 {
"ko_KR-euc",
"euc-kr" },
230 {
"zh_TW-big5",
"big5" },
234 {
"sjis",
"Shift_JIS" },
235 {
"euc-jp-ms",
"eucJP-ms" },
319 const char *c1 = NULL;
357 char in[1024], scratch[1024];
360 char *ext = strchr(in,
'/');
373 snprintf(scratch,
sizeof(scratch),
"iso-8859-%s", in +
plen);
375 snprintf(scratch,
sizeof(scratch),
"iso-8859-%s", in +
plen);
377 snprintf(scratch,
sizeof(scratch),
"iso_8859-%s", in +
plen);
379 snprintf(scratch,
sizeof(scratch),
"iso_8859-%s", in +
plen);
395 for (
char *p = buf; *p; p++)
431 ((len1 > len2) ? cs2 : buf),
MIN(len1, len2));
442 static char fcharset[128];
444 const char *c1 = NULL;
449 mutt_str_copy(fcharset, c, c1 ? (c1 - c + 1) :
sizeof(fcharset));
452 return strcpy(fcharset,
"us-ascii");
464 char buf[1024] = { 0 };
486 const char *replace,
struct Buffer *err)
488 if (!pat || !replace)
492 int rc =
REG_COMP(rx, pat, REG_ICASE);
520 struct Lookup *tmp = NULL;
569 const char *tocode2 = NULL, *fromcode2 = NULL;
570 const char *tmp = NULL;
590 tocode2 = tocode2 ? tocode2 : tocode1;
592 fromcode2 = fromcode2 ? fromcode2 : fromcode1;
595 cd = iconv_open(tocode2, fromcode2);
596 if (cd != (iconv_t) -1)
619 char **outbuf,
size_t *outbytesleft,
const char **inrepls,
620 const char *outrepl,
int *iconverrno)
623 const char *ib = *inbuf;
624 size_t ibl = *inbytesleft;
626 size_t obl = *outbytesleft;
631 const size_t ret1 = iconv(cd, (ICONV_CONST
char **) &ib, &ibl, &ob, &obl);
632 if (ret1 != (
size_t) -1)
637 if (ibl && obl && (errno ==
EILSEQ))
642 const char **t = NULL;
643 for (t = inrepls; *t; t++)
645 const char *ib1 = *t;
646 size_t ibl1 = strlen(*t);
649 iconv(cd, (ICONV_CONST
char **) &ib1, &ibl1, &ob1, &obl1);
666 iconv(cd, NULL, NULL, &ob, &obl);
669 int n = strlen(outrepl);
675 memcpy(ob, outrepl, n);
681 iconv(cd, NULL, NULL, NULL, NULL);
717 int mutt_ch_check(
const char *s,
size_t slen,
const char *from,
const char *to)
719 if (!s || !from || !to)
724 if (cd == (iconv_t) -1)
727 size_t outlen = MB_LEN_MAX * slen;
729 char *saved_out = out;
731 const size_t convlen =
732 iconv(cd, (ICONV_CONST
char **) &s, &slen, &out, (
size_t *) &outlen);
761 if (!s || (*s ==
'\0'))
767 const char *repls[] = {
"\357\277\275",
"?", 0 };
771 if (cd == (iconv_t) -1)
775 const char *ib = NULL;
776 char *buf = NULL, *ob = NULL;
778 const char **inrepls = NULL;
779 const char *outrepl = NULL;
782 outrepl =
"\357\277\275";
791 obl = MB_LEN_MAX * ibl;
795 mutt_ch_iconv(cd, &ib, &ibl, &ob, &obl, inrepls, outrepl, &rc);
839 if (cd != (iconv_t)(-1))
861 iconv_t
cd = (iconv_t) -1;
866 if (
cd != (iconv_t) -1)
868 static const char *repls[] = {
"\357\277\275",
"?", 0 };
893 if ((*fc)->cd != (iconv_t) -1)
894 iconv_close((*fc)->cd);
912 if (fc->
cd == (iconv_t) -1)
913 return fgetc(fc->
fp);
917 return (
unsigned char) *(fc->
p)++;
924 size_t obl =
sizeof(fc->
bufo);
925 iconv(fc->
cd, (ICONV_CONST
char **) &fc->
ib, &fc->
ibl, &fc->
ob, &obl);
927 return (
unsigned char) *(fc->
p)++;
934 if ((fc->
ibl ==
sizeof(fc->
bufi)) ||
948 size_t obl =
sizeof(fc->
bufo);
952 return (
unsigned char) *(fc->
p)++;
976 for (r = 0; (r + 1) < buflen;)
1020 #if defined(HAVE_BIND_TEXTDOMAIN_CODESET) && defined(ENABLE_NLS)
1021 bind_textdomain_codeset(PACKAGE, buf);
1037 size_t ulen,
char **d,
size_t *dlen)
1042 char *e = NULL, *tocode = NULL;
1043 size_t elen = 0, bestn = 0;
1044 const char *q = NULL;
1046 for (
const char *
p = charsets;
p;
p = q ? q + 1 : 0)
1050 size_t n = q ? q -
p : strlen(
p);
1069 if (!tocode || (n < bestn))
1096 char canonical_buf[1024];
void mutt_ch_set_charset(const char *charset)
Update the records for a new character set.
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
void mutt_ch_canonical_charset(char *buf, size_t buflen, const char *name)
Canonicalise the charset of a string.
String manipulation buffer.
#define MUTT_ICONV_NO_FLAGS
No flags are set.
bool mutt_ch_chscmp(const char *cs1, const char *cs2)
Are the names of two character sets equivalent?
#define TAILQ_FOREACH(var, head, field)
Cached regular expression.
bool mutt_ch_check_charset(const char *cs, bool strict)
Does iconv understand a character set?
char * mutt_str_dup(const char *str)
Copy a string, safely.
#define TAILQ_INSERT_TAIL(head, elm, field)
size_t dsize
Length of data.
int mutt_ch_convert_string(char **ps, const char *from, const char *to, uint8_t flags)
Convert a string between encodings.
char * mutt_ch_get_default_charset(void)
Get the default character set.
static struct Lookup * lookup_new(void)
Create a new Lookup.
#define TAILQ_ENTRY(type)
struct Regex regex
Regular expression.
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, uint8_t flags)
Set up iconv for conversions.
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
TAILQ_HEAD(LookupList, Lookup)
#define TAILQ_HEAD_INITIALIZER(head)
#define TAILQ_REMOVE(head, elm, field)
#define MUTT_ICONV_HOOK_FROM
apply charset-hooks to fromcode
void mutt_ch_lookup_remove(void)
Remove all the character set lookups.
void mutt_str_adjust(char **ptr)
Shrink-to-fit a string.
char * mutt_ch_fgetconvs(char *buf, size_t buflen, struct FgetConv *fc)
Convert a file's charset into a string buffer.
const struct MimeNames PreferredMimeNames[]
Lookup table of preferred charsets.
const char * mutt_ch_charset_lookup(const char *chs)
Look for a replacement character set.
int mutt_ch_convert_nonmime_string(char **ps)
Try to convert a string using a list of character sets.
@ MUTT_LOOKUP_ICONV
Character set conversion.
#define REG_COMP(preg, regex, cflags)
Compile a regular expression.
int mutt_ch_check(const char *s, size_t slen, const char *from, const char *to)
Check whether a string can be converted between encodings.
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
bool mutt_regex_match(const struct Regex *regex, const char *str)
Shorthand to mutt_regex_capture()
#define TAILQ_FOREACH_SAFE(var, head, field, tvar)
@ MUTT_LOOKUP_CHARSET
Alias for another character set.
char * mutt_str_replace(char **p, const char *s)
Replace one string with another.
const char * mutt_ch_iconv_lookup(const char *chs)
Look for a replacement character set.
char * mutt_str_cat(char *buf, size_t buflen, const char *s)
Concatenate two strings.
size_t mutt_ch_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft, const char **inrepls, const char *outrepl, int *iconverrno)
Change the encoding of a string.
void mutt_ch_fgetconv_close(struct FgetConv **fc)
Close an fgetconv handle.
char * replacement
Alternative charset to use.
void * mutt_mem_malloc(size_t size)
Allocate memory on the heap.
enum LookupType type
Lookup type.
#define mutt_ch_is_utf8(str)
Regex to String lookup table.
char * mutt_strn_dup(const char *begin, size_t len)
Duplicate a sub-string.
static const char * lookup_charset(enum LookupType type, const char *cs)
Look for a preferred character set name.
wchar_t ReplacementChar
When a Unicode character can't be displayed, use this instead.
size_t mutt_istr_startswith(const char *str, const char *prefix)
Check whether a string starts with a prefix, ignoring case.
char * C_AssumedCharset
Config: If a message is missing a character set, assume this character set.
regex_t * regex
compiled expression
static void lookup_free(struct Lookup **ptr)
Free a Lookup.
LookupType
Types of character set lookups.
static size_t plen
Length of cached packet.
bool mutt_istrn_equal(const char *a, const char *b, size_t num)
Check for equality of two strings ignoring case (to a maximum), safely.
char * pattern
printable version
char * mutt_ch_get_langinfo_charset(void)
Get the user's choice of character set.
bool CharsetIsUtf8
Is the user's current character set utf-8?
struct FgetConv * mutt_ch_fgetconv_open(FILE *fp, const char *from, const char *to, uint8_t flags)
Prepare a file for charset conversion.
char * C_Charset
Config: Default character set for displaying text on screen.
Cursor for converting a file's encoding.
char * data
Pointer to data.
int mutt_ch_fgetconv(struct FgetConv *fc)
Convert a file's character set.
bool mutt_ch_lookup_add(enum LookupType type, const char *pat, const char *replace, struct Buffer *err)
Add a new character set lookup.
char * mutt_ch_choose(const char *fromcode, const char *charsets, const char *u, size_t ulen, char **d, size_t *dlen)
Figure the best charset to encode a string.
static struct LookupList Lookups
size_t mutt_str_copy(char *dest, const char *src, size_t dsize)
Copy a string into a buffer (guaranteeing NUL-termination)