NeoMutt  2023-03-22-27-g3cb248
Teaching an old dog new tricks
DOXYGEN
charset.c
Go to the documentation of this file.
1
29#include "config.h"
30#include <ctype.h>
31#include <errno.h>
32#include <iconv.h>
33#include <langinfo.h>
34#include <limits.h>
35#include <stdbool.h>
36#include <stdio.h>
37#include <string.h>
38#include "charset.h"
39#include "lib.h"
40#include "memory.h"
41#include "queue.h"
42#include "regex3.h"
43#include "string2.h"
44#ifdef ENABLE_NLS
45#include <libintl.h>
46#endif
47
48#ifndef EILSEQ
49#define EILSEQ EINVAL
50#endif
51
55wchar_t ReplacementChar = '?';
56
60bool CharsetIsUtf8 = false;
61
67struct Lookup
68{
70 struct Regex regex;
72 TAILQ_ENTRY(Lookup) entries;
73};
74TAILQ_HEAD(LookupList, Lookup);
75
76static struct LookupList Lookups = TAILQ_HEAD_INITIALIZER(Lookups);
77
82{
83 const char *key;
84 const char *pref;
85};
86
97const struct MimeNames PreferredMimeNames[] = {
98 // clang-format off
99 { "ansi_x3.4-1968", "us-ascii" },
100 { "iso-ir-6", "us-ascii" },
101 { "iso_646.irv:1991", "us-ascii" },
102 { "ascii", "us-ascii" },
103 { "iso646-us", "us-ascii" },
104 { "us", "us-ascii" },
105 { "ibm367", "us-ascii" },
106 { "cp367", "us-ascii" },
107 { "csASCII", "us-ascii" },
108
109 { "csISO2022KR", "iso-2022-kr" },
110 { "csEUCKR", "euc-kr" },
111 { "csISO2022JP", "iso-2022-jp" },
112 { "csISO2022JP2", "iso-2022-jp-2" },
113
114 { "ISO_8859-1:1987", "iso-8859-1" },
115 { "iso-ir-100", "iso-8859-1" },
116 { "iso_8859-1", "iso-8859-1" },
117 { "latin1", "iso-8859-1" },
118 { "l1", "iso-8859-1" },
119 { "IBM819", "iso-8859-1" },
120 { "CP819", "iso-8859-1" },
121 { "csISOLatin1", "iso-8859-1" },
122
123 { "ISO_8859-2:1987", "iso-8859-2" },
124 { "iso-ir-101", "iso-8859-2" },
125 { "iso_8859-2", "iso-8859-2" },
126 { "latin2", "iso-8859-2" },
127 { "l2", "iso-8859-2" },
128 { "csISOLatin2", "iso-8859-2" },
129
130 { "ISO_8859-3:1988", "iso-8859-3" },
131 { "iso-ir-109", "iso-8859-3" },
132 { "ISO_8859-3", "iso-8859-3" },
133 { "latin3", "iso-8859-3" },
134 { "l3", "iso-8859-3" },
135 { "csISOLatin3", "iso-8859-3" },
136
137 { "ISO_8859-4:1988", "iso-8859-4" },
138 { "iso-ir-110", "iso-8859-4" },
139 { "ISO_8859-4", "iso-8859-4" },
140 { "latin4", "iso-8859-4" },
141 { "l4", "iso-8859-4" },
142 { "csISOLatin4", "iso-8859-4" },
143
144 { "ISO_8859-6:1987", "iso-8859-6" },
145 { "iso-ir-127", "iso-8859-6" },
146 { "iso_8859-6", "iso-8859-6" },
147 { "ECMA-114", "iso-8859-6" },
148 { "ASMO-708", "iso-8859-6" },
149 { "arabic", "iso-8859-6" },
150 { "csISOLatinArabic", "iso-8859-6" },
151
152 { "ISO_8859-7:1987", "iso-8859-7" },
153 { "iso-ir-126", "iso-8859-7" },
154 { "ISO_8859-7", "iso-8859-7" },
155 { "ELOT_928", "iso-8859-7" },
156 { "ECMA-118", "iso-8859-7" },
157 { "greek", "iso-8859-7" },
158 { "greek8", "iso-8859-7" },
159 { "csISOLatinGreek", "iso-8859-7" },
160
161 { "ISO_8859-8:1988", "iso-8859-8" },
162 { "iso-ir-138", "iso-8859-8" },
163 { "ISO_8859-8", "iso-8859-8" },
164 { "hebrew", "iso-8859-8" },
165 { "csISOLatinHebrew", "iso-8859-8" },
166
167 { "ISO_8859-5:1988", "iso-8859-5" },
168 { "iso-ir-144", "iso-8859-5" },
169 { "ISO_8859-5", "iso-8859-5" },
170 { "cyrillic", "iso-8859-5" },
171 { "csISOLatinCyrillic", "iso-8859-5" },
172
173 { "ISO_8859-9:1989", "iso-8859-9" },
174 { "iso-ir-148", "iso-8859-9" },
175 { "ISO_8859-9", "iso-8859-9" },
176 { "latin5", "iso-8859-9" }, /* this is not a bug */
177 { "l5", "iso-8859-9" },
178 { "csISOLatin5", "iso-8859-9" },
179
180 { "ISO_8859-10:1992", "iso-8859-10" },
181 { "iso-ir-157", "iso-8859-10" },
182 { "latin6", "iso-8859-10" }, /* this is not a bug */
183 { "l6", "iso-8859-10" },
184 { "csISOLatin6", "iso-8859-10" },
185
186 { "csKOI8r", "koi8-r" },
187
188 { "MS_Kanji", "Shift_JIS" }, /* Note the underscore! */
189 { "csShiftJis", "Shift_JIS" },
190
191 { "Extended_UNIX_Code_Packed_Format_for_Japanese",
192 "euc-jp" },
193 { "csEUCPkdFmtJapanese", "euc-jp" },
194
195 { "csGB2312", "gb2312" },
196 { "csbig5", "big5" },
197
198 /* End of official brain damage.
199 * What follows has been taken from glibc's localedata files. */
200
201 { "iso_8859-13", "iso-8859-13" },
202 { "iso-ir-179", "iso-8859-13" },
203 { "latin7", "iso-8859-13" }, /* this is not a bug */
204 { "l7", "iso-8859-13" },
205
206 { "iso_8859-14", "iso-8859-14" },
207 { "latin8", "iso-8859-14" }, /* this is not a bug */
208 { "l8", "iso-8859-14" },
209
210 { "iso_8859-15", "iso-8859-15" },
211 { "latin9", "iso-8859-15" }, /* this is not a bug */
212
213 /* Suggested by Ionel Mugurel Ciobica <tgakic@sg10.chem.tue.nl> */
214 { "latin0", "iso-8859-15" }, /* this is not a bug */
215
216 { "iso_8859-16", "iso-8859-16" },
217 { "latin10", "iso-8859-16" }, /* this is not a bug */
218
219 { "646", "us-ascii" },
220
221 /* http://www.sun.com/software/white-papers/wp-unicode/ */
222
223 { "eucJP", "euc-jp" },
224 { "PCK", "Shift_JIS" },
225 { "ko_KR-euc", "euc-kr" },
226 { "zh_TW-big5", "big5" },
227
228 /* seems to be common on some systems */
229
230 { "sjis", "Shift_JIS" },
231 { "euc-jp-ms", "eucJP-ms" },
232
233 /* If you happen to encounter system-specific brain-damage with respect to
234 * character set naming, please add it above this comment, and submit a patch
235 * to <neomutt-devel@neomutt.org> */
236
237 { NULL, NULL },
238 // clang-format on
239};
240
245static struct Lookup *lookup_new(void)
246{
247 return mutt_mem_calloc(1, sizeof(struct Lookup));
248}
249
254static void lookup_free(struct Lookup **ptr)
255{
256 if (!ptr || !*ptr)
257 return;
258
259 struct Lookup *l = *ptr;
260 FREE(&l->replacement);
261 FREE(&l->regex.pattern);
262 if (l->regex.regex)
263 regfree(l->regex.regex);
264 FREE(&l->regex.regex);
265 FREE(&l->regex);
266
267 FREE(ptr);
268}
269
279static const char *lookup_charset(enum LookupType type, const char *cs)
280{
281 if (!cs)
282 return NULL;
283
284 struct Lookup *l = NULL;
285
286 TAILQ_FOREACH(l, &Lookups, entries)
287 {
288 if (l->type != type)
289 continue;
290 if (mutt_regex_match(&l->regex, cs))
291 return l->replacement;
292 }
293 return NULL;
294}
295
307int mutt_ch_convert_nonmime_string(const struct Slist *const assumed_charset,
308 const char *charset, char **ps)
309{
310 if (!ps)
311 return -1;
312
313 char *u = *ps;
314 const size_t ulen = mutt_str_len(u);
315 if (ulen == 0)
316 return 0;
317
318 const struct ListNode *np = NULL;
319 STAILQ_FOREACH(np, &assumed_charset->head, entries)
320 {
321 char const *c = np->data;
322 size_t n = mutt_str_len(c);
323 char *fromcode = mutt_mem_malloc(n + 1);
324 mutt_str_copy(fromcode, c, n + 1);
325 char *s = mutt_strn_dup(u, ulen);
326 int m = mutt_ch_convert_string(&s, fromcode, charset, MUTT_ICONV_NO_FLAGS);
327 FREE(&fromcode);
328 if (m == 0)
329 {
330 FREE(ps);
331 *ps = s;
332 return 0;
333 }
334 FREE(&s);
335 }
337 charset, MUTT_ICONV_HOOK_FROM);
338 return -1;
339}
340
350void mutt_ch_canonical_charset(char *buf, size_t buflen, const char *name)
351{
352 if (!buf || !name)
353 return;
354
355 char in[1024], scratch[1024 + 10];
356
357 mutt_str_copy(in, name, sizeof(in));
358 char *ext = strchr(in, '/');
359 if (ext)
360 *ext++ = '\0';
361
362 if (mutt_istr_equal(in, "utf-8") || mutt_istr_equal(in, "utf8"))
363 {
364 mutt_str_copy(buf, "utf-8", buflen);
365 goto out;
366 }
367
368 /* catch some common iso-8859-something misspellings */
369 size_t plen;
370 if ((plen = mutt_istr_startswith(in, "8859")) && (in[plen] != '-'))
371 snprintf(scratch, sizeof(scratch), "iso-8859-%s", in + plen);
372 else if ((plen = mutt_istr_startswith(in, "8859-")))
373 snprintf(scratch, sizeof(scratch), "iso-8859-%s", in + plen);
374 else if ((plen = mutt_istr_startswith(in, "iso8859")) && (in[plen] != '-'))
375 snprintf(scratch, sizeof(scratch), "iso_8859-%s", in + plen);
376 else if ((plen = mutt_istr_startswith(in, "iso8859-")))
377 snprintf(scratch, sizeof(scratch), "iso_8859-%s", in + plen);
378 else
379 mutt_str_copy(scratch, in, sizeof(scratch));
380
381 for (size_t i = 0; PreferredMimeNames[i].key; i++)
382 {
383 if (mutt_istr_equal(scratch, PreferredMimeNames[i].key))
384 {
385 mutt_str_copy(buf, PreferredMimeNames[i].pref, buflen);
386 goto out;
387 }
388 }
389
390 mutt_str_copy(buf, scratch, buflen);
391
392 /* for cosmetics' sake, transform to lowercase. */
393 for (char *p = buf; *p; p++)
394 *p = tolower(*p);
395
396out:
397 if (ext && *ext)
398 {
399 mutt_str_cat(buf, buflen, "/");
400 mutt_str_cat(buf, buflen, ext);
401 }
402}
403
416bool mutt_ch_chscmp(const char *cs1, const char *cs2)
417{
418 if (!cs1 || !cs2)
419 return false;
420
421 char buf[256] = { 0 };
422
423 mutt_ch_canonical_charset(buf, sizeof(buf), cs1);
424
425 int len1 = mutt_str_len(buf);
426 int len2 = mutt_str_len(cs2);
427
428 return mutt_istrn_equal(((len1 > len2) ? buf : cs2),
429 ((len1 > len2) ? cs2 : buf), MIN(len1, len2));
430}
431
439const char *mutt_ch_get_default_charset(const struct Slist *const assumed_charset)
440{
441 static char fcharset[128];
442 const char *c = NULL;
443
444 if (assumed_charset && (assumed_charset->count > 0))
445 c = STAILQ_FIRST(&assumed_charset->head)->data;
446 else
447 c = "us-ascii";
448
449 mutt_str_copy(fcharset, c, sizeof(fcharset));
450 return fcharset;
451}
452
461{
462 char buf[1024] = { 0 };
463
464 mutt_ch_canonical_charset(buf, sizeof(buf), nl_langinfo(CODESET));
465
466 if (buf[0] != '\0')
467 return mutt_str_dup(buf);
468
469 return mutt_str_dup("iso-8859-1");
470}
471
483bool mutt_ch_lookup_add(enum LookupType type, const char *pat,
484 const char *replace, struct Buffer *err)
485{
486 if (!pat || !replace)
487 return false;
488
489 regex_t *rx = mutt_mem_calloc(1, sizeof(regex_t));
490 int rc = REG_COMP(rx, pat, REG_ICASE);
491 if (rc != 0)
492 {
493 regerror(rc, rx, err->data, err->dsize);
494 FREE(&rx);
495 return false;
496 }
497
498 struct Lookup *l = lookup_new();
499 l->type = type;
500 l->replacement = mutt_str_dup(replace);
501 l->regex.pattern = mutt_str_dup(pat);
502 l->regex.regex = rx;
503 l->regex.pat_not = false;
504
505 TAILQ_INSERT_TAIL(&Lookups, l, entries);
506
507 return true;
508}
509
516{
517 struct Lookup *l = NULL;
518 struct Lookup *tmp = NULL;
519
520 TAILQ_FOREACH_SAFE(l, &Lookups, entries, tmp)
521 {
522 TAILQ_REMOVE(&Lookups, l, entries);
523 lookup_free(&l);
524 }
525}
526
536const char *mutt_ch_charset_lookup(const char *chs)
537{
539}
540
563iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, uint8_t flags)
564{
565 char tocode1[128];
566 char fromcode1[128];
567 const char *tocode2 = NULL, *fromcode2 = NULL;
568 const char *tmp = NULL;
569
570 iconv_t cd;
571
572 /* transform to MIME preferred charset names */
573 mutt_ch_canonical_charset(tocode1, sizeof(tocode1), tocode);
574 mutt_ch_canonical_charset(fromcode1, sizeof(fromcode1), fromcode);
575
576 /* maybe apply charset-hooks and recanonicalise fromcode,
577 * but only when caller asked us to sanitize a potentially wrong
578 * charset name incoming from the wild exterior. */
579 if (flags & MUTT_ICONV_HOOK_FROM)
580 {
581 tmp = mutt_ch_charset_lookup(fromcode1);
582 if (tmp)
583 mutt_ch_canonical_charset(fromcode1, sizeof(fromcode1), tmp);
584 }
585
586 /* always apply iconv-hooks to suit system's iconv tastes */
587 tocode2 = mutt_ch_iconv_lookup(tocode1);
588 tocode2 = tocode2 ? tocode2 : tocode1;
589 fromcode2 = mutt_ch_iconv_lookup(fromcode1);
590 fromcode2 = fromcode2 ? fromcode2 : fromcode1;
591
592 /* call system iconv with names it appreciates */
593 cd = iconv_open(tocode2, fromcode2);
594 if (cd != (iconv_t) -1)
595 return cd;
596
597 return (iconv_t) -1;
598}
599
616size_t mutt_ch_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft,
617 char **outbuf, size_t *outbytesleft, const char **inrepls,
618 const char *outrepl, int *iconverrno)
619{
620 size_t rc = 0;
621 const char *ib = *inbuf;
622 size_t ibl = *inbytesleft;
623 char *ob = *outbuf;
624 size_t obl = *outbytesleft;
625
626 while (true)
627 {
628 errno = 0;
629 const size_t ret1 = iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl);
630 if (ret1 != (size_t) -1)
631 rc += ret1;
632 if (iconverrno)
633 *iconverrno = errno;
634
635 if (ibl && obl && (errno == EILSEQ))
636 {
637 if (inrepls)
638 {
639 /* Try replacing the input */
640 const char **t = NULL;
641 for (t = inrepls; *t; t++)
642 {
643 const char *ib1 = *t;
644 size_t ibl1 = strlen(*t);
645 char *ob1 = ob;
646 size_t obl1 = obl;
647 iconv(cd, (ICONV_CONST char **) &ib1, &ibl1, &ob1, &obl1);
648 if (ibl1 == 0)
649 {
650 ib++;
651 ibl--;
652 ob = ob1;
653 obl = obl1;
654 rc++;
655 break;
656 }
657 }
658 if (*t)
659 continue;
660 }
661 /* Replace the output */
662 if (!outrepl)
663 outrepl = "?";
664 iconv(cd, NULL, NULL, &ob, &obl);
665 if (obl)
666 {
667 int n = strlen(outrepl);
668 if (n > obl)
669 {
670 outrepl = "?";
671 n = 1;
672 }
673 memcpy(ob, outrepl, n);
674 ib++;
675 ibl--;
676 ob += n;
677 obl -= n;
678 rc++;
679 iconv(cd, NULL, NULL, NULL, NULL); /* for good measure */
680 continue;
681 }
682 }
683 *inbuf = ib;
684 *inbytesleft = ibl;
685 *outbuf = ob;
686 *outbytesleft = obl;
687 return rc;
688 }
689}
690
700const char *mutt_ch_iconv_lookup(const char *chs)
701{
703}
704
715int mutt_ch_check(const char *s, size_t slen, const char *from, const char *to)
716{
717 if (!s || !from || !to)
718 return -1;
719
720 int rc = 0;
721 iconv_t cd = mutt_ch_iconv_open(to, from, MUTT_ICONV_NO_FLAGS);
722 if (cd == (iconv_t) -1)
723 return -1;
724
725 size_t outlen = MB_LEN_MAX * slen;
726 char *out = mutt_mem_malloc(outlen + 1);
727 char *saved_out = out;
728
729 const size_t convlen = iconv(cd, (ICONV_CONST char **) &s, &slen, &out, &outlen);
730 if (convlen == (size_t) -1)
731 rc = errno;
732
733 FREE(&saved_out);
734 iconv_close(cd);
735 return rc;
736}
737
751int mutt_ch_convert_string(char **ps, const char *from, const char *to, uint8_t flags)
752{
753 if (!ps)
754 return -1;
755
756 char *s = *ps;
757
758 if (!s || (*s == '\0'))
759 return 0;
760
761 if (!to || !from)
762 return -1;
763
764 const char *repls[] = { "\357\277\275", "?", 0 };
765 int rc = 0;
766
767 iconv_t cd = mutt_ch_iconv_open(to, from, flags);
768 if (cd == (iconv_t) -1)
769 return -1;
770
771 const char **inrepls = NULL;
772 const char *outrepl = NULL;
773
774 if (mutt_ch_is_utf8(to))
775 outrepl = "\357\277\275";
776 else if (mutt_ch_is_utf8(from))
777 inrepls = repls;
778 else
779 outrepl = "?";
780
781 const char *ib = s;
782 size_t ibl = strlen(s);
783 if (ibl >= (SIZE_MAX / MB_LEN_MAX))
784 {
785 iconv_close(cd);
786 return -1;
787 }
788 size_t obl = MB_LEN_MAX * ibl;
789 char *buf = mutt_mem_malloc(obl + 1);
790 char *ob = buf;
791
792 mutt_ch_iconv(cd, &ib, &ibl, &ob, &obl, inrepls, outrepl, &rc);
793 iconv(cd, 0, 0, &ob, &obl);
794 iconv_close(cd);
795
796 *ob = '\0';
797
798 FREE(ps);
799 *ps = buf;
800
801 mutt_str_adjust(ps);
802 return rc;
803}
804
816bool mutt_ch_check_charset(const char *cs, bool strict)
817{
818 if (!cs)
819 return false;
820
821 if (mutt_ch_is_utf8(cs))
822 return true;
823
824 if (!strict)
825 {
826 for (int i = 0; PreferredMimeNames[i].key; i++)
827 {
828 if (mutt_istr_equal(PreferredMimeNames[i].key, cs) ||
830 {
831 return true;
832 }
833 }
834 }
835
836 iconv_t cd = mutt_ch_iconv_open(cs, cs, MUTT_ICONV_NO_FLAGS);
837 if (cd != (iconv_t) (-1))
838 {
839 iconv_close(cd);
840 return true;
841 }
842
843 return false;
844}
845
856struct FgetConv *mutt_ch_fgetconv_open(FILE *fp, const char *from, const char *to, uint8_t flags)
857{
858 struct FgetConv *fc = NULL;
859 iconv_t cd = (iconv_t) -1;
860
861 if (from && to)
862 cd = mutt_ch_iconv_open(to, from, flags);
863
864 if (cd != (iconv_t) -1)
865 {
866 static const char *repls[] = { "\357\277\275", "?", 0 };
867
868 fc = mutt_mem_malloc(sizeof(struct FgetConv));
869 fc->p = fc->bufo;
870 fc->ob = fc->bufo;
871 fc->ib = fc->bufi;
872 fc->ibl = 0;
873 fc->inrepls = mutt_ch_is_utf8(to) ? repls : repls + 1;
874 }
875 else
876 {
877 fc = mutt_mem_malloc(sizeof(struct FgetConvNot));
878 }
879 fc->fp = fp;
880 fc->cd = cd;
881 return fc;
882}
883
889{
890 if (!fc || !*fc)
891 return;
892
893 if ((*fc)->cd != (iconv_t) -1)
894 iconv_close((*fc)->cd);
895 FREE(fc);
896}
897
909{
910 if (!fc)
911 return EOF;
912 if (fc->cd == (iconv_t) -1)
913 return fgetc(fc->fp);
914 if (!fc->p)
915 return EOF;
916 if (fc->p < fc->ob)
917 return (unsigned char) *(fc->p)++;
918
919 /* Try to convert some more */
920 fc->p = fc->bufo;
921 fc->ob = fc->bufo;
922 if (fc->ibl)
923 {
924 size_t obl = sizeof(fc->bufo);
925 iconv(fc->cd, (ICONV_CONST char **) &fc->ib, &fc->ibl, &fc->ob, &obl);
926 if (fc->p < fc->ob)
927 return (unsigned char) *(fc->p)++;
928 }
929
930 /* If we trusted iconv a bit more, we would at this point
931 * ask why it had stopped converting ... */
932
933 /* Try to read some more */
934 if ((fc->ibl == sizeof(fc->bufi)) ||
935 (fc->ibl && (fc->ib + fc->ibl < fc->bufi + sizeof(fc->bufi))))
936 {
937 fc->p = 0;
938 return EOF;
939 }
940 if (fc->ibl)
941 memcpy(fc->bufi, fc->ib, fc->ibl);
942 fc->ib = fc->bufi;
943 fc->ibl += fread(fc->ib + fc->ibl, 1, sizeof(fc->bufi) - fc->ibl, fc->fp);
944
945 /* Try harder this time to convert some */
946 if (fc->ibl)
947 {
948 size_t obl = sizeof(fc->bufo);
949 mutt_ch_iconv(fc->cd, (const char **) &fc->ib, &fc->ibl, &fc->ob, &obl,
950 fc->inrepls, 0, NULL);
951 if (fc->p < fc->ob)
952 return (unsigned char) *(fc->p)++;
953 }
954
955 /* Either the file has finished or one of the buffers is too small */
956 fc->p = 0;
957 return EOF;
958}
959
970char *mutt_ch_fgetconvs(char *buf, size_t buflen, struct FgetConv *fc)
971{
972 if (!buf)
973 return NULL;
974
975 size_t r;
976 for (r = 0; (r + 1) < buflen;)
977 {
978 const int c = mutt_ch_fgetconv(fc);
979 if (c == EOF)
980 break;
981 buf[r++] = (char) c;
982 if (c == '\n')
983 break;
984 }
985 buf[r] = '\0';
986
987 if (r > 0)
988 return buf;
989
990 return NULL;
991}
992
1003void mutt_ch_set_charset(const char *charset)
1004{
1005 char buf[256] = { 0 };
1006
1007 mutt_ch_canonical_charset(buf, sizeof(buf), charset);
1008
1009 if (mutt_ch_is_utf8(buf))
1010 {
1011 CharsetIsUtf8 = true;
1012 ReplacementChar = 0xfffd; /* replacement character */
1013 }
1014 else
1015 {
1016 CharsetIsUtf8 = false;
1017 ReplacementChar = '?';
1018 }
1019
1020#if defined(HAVE_BIND_TEXTDOMAIN_CODESET) && defined(ENABLE_NLS)
1021 bind_textdomain_codeset(PACKAGE, buf);
1022#endif
1023}
1024
1036char *mutt_ch_choose(const char *fromcode, const struct Slist *charsets,
1037 const char *u, size_t ulen, char **d, size_t *dlen)
1038{
1039 if (!fromcode || !charsets)
1040 return NULL;
1041
1042 char *e = NULL, *tocode = NULL;
1043 size_t elen = 0, bestn = 0;
1044
1045 const struct ListNode *np = NULL;
1046 STAILQ_FOREACH(np, &charsets->head, entries)
1047 {
1048 char *t = mutt_str_dup(np->data);
1049 if (!t)
1050 continue;
1051
1052 size_t n = mutt_str_len(t);
1053 char *s = mutt_strn_dup(u, ulen);
1054 const int rc = d ? mutt_ch_convert_string(&s, fromcode, t, MUTT_ICONV_NO_FLAGS) :
1055 mutt_ch_check(s, ulen, fromcode, t);
1056 if (rc)
1057 {
1058 FREE(&t);
1059 FREE(&s);
1060 continue;
1061 }
1062 size_t slen = mutt_str_len(s);
1063
1064 if (!tocode || (n < bestn))
1065 {
1066 bestn = n;
1067 FREE(&tocode);
1068 tocode = t;
1069 if (d)
1070 {
1071 FREE(&e);
1072 e = s;
1073 }
1074 else
1075 {
1076 FREE(&s);
1077 }
1078 elen = slen;
1079 }
1080 else
1081 {
1082 FREE(&t);
1083 FREE(&s);
1084 }
1085 }
1086 if (tocode)
1087 {
1088 if (d)
1089 *d = e;
1090 if (dlen)
1091 *dlen = elen;
1092
1093 char canonical_buf[1024] = { 0 };
1094 mutt_ch_canonical_charset(canonical_buf, sizeof(canonical_buf), tocode);
1095 mutt_str_replace(&tocode, canonical_buf);
1096 }
1097 return tocode;
1098}
static char * chs
Definition: gnupgparse.c:73
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
void * mutt_mem_malloc(size_t size)
Allocate memory on the heap.
Definition: memory.c:90
Memory management wrappers.
#define FREE(x)
Definition: memory.h:43
#define MIN(a, b)
Definition: memory.h:31
bool mutt_ch_check_charset(const char *cs, bool strict)
Does iconv understand a character set?
Definition: charset.c:816
void mutt_ch_fgetconv_close(struct FgetConv **fc)
Close an fgetconv handle.
Definition: charset.c:888
size_t mutt_ch_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft, const char **inrepls, const char *outrepl, int *iconverrno)
Change the encoding of a string.
Definition: charset.c:616
void mutt_ch_lookup_remove(void)
Remove all the character set lookups.
Definition: charset.c:515
char * mutt_ch_choose(const char *fromcode, const struct Slist *charsets, const char *u, size_t ulen, char **d, size_t *dlen)
Figure the best charset to encode a string.
Definition: charset.c:1036
int mutt_ch_convert_nonmime_string(const struct Slist *const assumed_charset, const char *charset, char **ps)
Try to convert a string using a list of character sets.
Definition: charset.c:307
static struct LookupList Lookups
Definition: charset.c:76
char * mutt_ch_get_langinfo_charset(void)
Get the user's choice of character set.
Definition: charset.c:460
const struct MimeNames PreferredMimeNames[]
Lookup table of preferred charsets.
Definition: charset.c:97
bool mutt_ch_lookup_add(enum LookupType type, const char *pat, const char *replace, struct Buffer *err)
Add a new character set lookup.
Definition: charset.c:483
void mutt_ch_canonical_charset(char *buf, size_t buflen, const char *name)
Canonicalise the charset of a string.
Definition: charset.c:350
const char * mutt_ch_iconv_lookup(const char *chs)
Look for a replacement character set.
Definition: charset.c:700
TAILQ_HEAD(LookupList, Lookup)
int mutt_ch_convert_string(char **ps, const char *from, const char *to, uint8_t flags)
Convert a string between encodings.
Definition: charset.c:751
void mutt_ch_set_charset(const char *charset)
Update the records for a new character set.
Definition: charset.c:1003
bool CharsetIsUtf8
Is the user's current character set utf-8?
Definition: charset.c:60
static const char * lookup_charset(enum LookupType type, const char *cs)
Look for a preferred character set name.
Definition: charset.c:279
int mutt_ch_check(const char *s, size_t slen, const char *from, const char *to)
Check whether a string can be converted between encodings.
Definition: charset.c:715
const char * mutt_ch_charset_lookup(const char *chs)
Look for a replacement character set.
Definition: charset.c:536
static struct Lookup * lookup_new(void)
Create a new Lookup.
Definition: charset.c:245
int mutt_ch_fgetconv(struct FgetConv *fc)
Convert a file's character set.
Definition: charset.c:908
static void lookup_free(struct Lookup **ptr)
Free a Lookup.
Definition: charset.c:254
wchar_t ReplacementChar
When a Unicode character can't be displayed, use this instead.
Definition: charset.c:55
#define EILSEQ
Definition: charset.c:49
struct FgetConv * mutt_ch_fgetconv_open(FILE *fp, const char *from, const char *to, uint8_t flags)
Prepare a file for charset conversion.
Definition: charset.c:856
char * mutt_ch_fgetconvs(char *buf, size_t buflen, struct FgetConv *fc)
Convert a file's charset into a string buffer.
Definition: charset.c:970
bool mutt_ch_chscmp(const char *cs1, const char *cs2)
Are the names of two character sets equivalent?
Definition: charset.c:416
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, uint8_t flags)
Set up iconv for conversions.
Definition: charset.c:563
const char * mutt_ch_get_default_charset(const struct Slist *const assumed_charset)
Get the default character set.
Definition: charset.c:439
Conversion between different character encodings.
#define MUTT_ICONV_HOOK_FROM
apply charset-hooks to fromcode
Definition: charset.h:72
#define mutt_ch_is_utf8(str)
Definition: charset.h:95
LookupType
Types of character set lookups.
Definition: charset.h:66
@ MUTT_LOOKUP_ICONV
Character set conversion.
Definition: charset.h:68
@ MUTT_LOOKUP_CHARSET
Alias for another character set.
Definition: charset.h:67
#define MUTT_ICONV_NO_FLAGS
No flags are set.
Definition: charset.h:71
bool mutt_regex_match(const struct Regex *regex, const char *str)
Shorthand to mutt_regex_capture()
Definition: regex.c:635
char * mutt_strn_dup(const char *begin, size_t len)
Duplicate a sub-string.
Definition: string.c:451
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:819
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition: string.c:250
void mutt_str_adjust(char **ptr)
Shrink-to-fit a string.
Definition: string.c:370
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:567
size_t mutt_str_copy(char *dest, const char *src, size_t dsize)
Copy a string into a buffer (guaranteeing NUL-termination)
Definition: string.c:652
size_t mutt_istr_startswith(const char *str, const char *prefix)
Check whether a string starts with a prefix, ignoring case.
Definition: string.c:239
bool mutt_istrn_equal(const char *a, const char *b, size_t num)
Check for equality of two strings ignoring case (to a maximum), safely.
Definition: string.c:524
char * mutt_str_replace(char **p, const char *s)
Replace one string with another.
Definition: string.c:326
char * mutt_str_cat(char *buf, size_t buflen, const char *s)
Concatenate two strings.
Definition: string.c:265
static size_t plen
Length of cached packet.
Definition: pgppacket.c:39
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:725
#define TAILQ_FOREACH_SAFE(var, head, field, tvar)
Definition: queue.h:735
#define STAILQ_FIRST(head)
Definition: queue.h:350
#define TAILQ_INSERT_TAIL(head, elm, field)
Definition: queue.h:809
#define STAILQ_FOREACH(var, head, field)
Definition: queue.h:352
#define TAILQ_REMOVE(head, elm, field)
Definition: queue.h:841
#define TAILQ_HEAD_INITIALIZER(head)
Definition: queue.h:637
#define TAILQ_ENTRY(type)
Definition: queue.h:640
Manage regular expressions.
#define REG_COMP(preg, regex, cflags)
Compile a regular expression.
Definition: regex3.h:53
Key value store.
String manipulation functions.
String manipulation buffer.
Definition: buffer.h:34
size_t dsize
Length of data.
Definition: buffer.h:37
char * data
Pointer to data.
Definition: buffer.h:35
A dummy converter.
Definition: charset.h:57
Cursor for converting a file's encoding.
Definition: charset.h:41
char bufi[512]
Definition: charset.h:44
iconv_t cd
Definition: charset.h:43
char bufo[512]
Definition: charset.h:45
size_t ibl
Definition: charset.h:49
FILE * fp
Definition: charset.h:42
char * p
Definition: charset.h:46
const char ** inrepls
Definition: charset.h:50
char * ib
Definition: charset.h:48
char * ob
Definition: charset.h:47
A List node for strings.
Definition: list.h:35
char * data
String.
Definition: list.h:36
Regex to String lookup table.
Definition: charset.c:68
char * replacement
Alternative charset to use.
Definition: charset.c:71
enum LookupType type
Lookup type.
Definition: charset.c:69
struct Regex regex
Regular expression.
Definition: charset.c:70
MIME name lookup entry.
Definition: charset.c:82
const char * key
Definition: charset.c:83
const char * pref
Definition: charset.c:84
Cached regular expression.
Definition: regex3.h:89
char * pattern
printable version
Definition: regex3.h:90
bool pat_not
do not match
Definition: regex3.h:92
regex_t * regex
compiled expression
Definition: regex3.h:91
String list.
Definition: slist.h:47
struct ListHead head
List containing values.
Definition: slist.h:48
size_t count
Number of values in list.
Definition: slist.h:49