NeoMutt  2022-04-29-145-g9b6a0e
Teaching an old dog new tricks
DOXYGEN
charset.c
Go to the documentation of this file.
1 
29 #include "config.h"
30 #include <ctype.h>
31 #include <errno.h>
32 #include <iconv.h>
33 #include <langinfo.h>
34 #include <limits.h>
35 #include <stdbool.h>
36 #include <stdio.h>
37 #include <string.h>
38 #include "config/lib.h"
39 #include "core/lib.h"
40 #include "charset.h"
41 #include "lib.h"
42 #include "memory.h"
43 #include "queue.h"
44 #include "regex3.h"
45 #include "string2.h"
46 #ifdef ENABLE_NLS
47 #include <libintl.h>
48 #endif
49 
50 #ifndef EILSEQ
51 #define EILSEQ EINVAL
52 #endif
53 
57 wchar_t ReplacementChar = '?';
58 
62 bool CharsetIsUtf8 = false;
63 
69 struct Lookup
70 {
71  enum LookupType type;
72  struct Regex regex;
73  char *replacement;
74  TAILQ_ENTRY(Lookup) entries;
75 };
76 TAILQ_HEAD(LookupList, Lookup);
77 
78 static struct LookupList Lookups = TAILQ_HEAD_INITIALIZER(Lookups);
79 
83 struct MimeNames
84 {
85  const char *key;
86  const char *pref;
87 };
88 
99 const struct MimeNames PreferredMimeNames[] = {
100  // clang-format off
101  { "ansi_x3.4-1968", "us-ascii" },
102  { "iso-ir-6", "us-ascii" },
103  { "iso_646.irv:1991", "us-ascii" },
104  { "ascii", "us-ascii" },
105  { "iso646-us", "us-ascii" },
106  { "us", "us-ascii" },
107  { "ibm367", "us-ascii" },
108  { "cp367", "us-ascii" },
109  { "csASCII", "us-ascii" },
110 
111  { "csISO2022KR", "iso-2022-kr" },
112  { "csEUCKR", "euc-kr" },
113  { "csISO2022JP", "iso-2022-jp" },
114  { "csISO2022JP2", "iso-2022-jp-2" },
115 
116  { "ISO_8859-1:1987", "iso-8859-1" },
117  { "iso-ir-100", "iso-8859-1" },
118  { "iso_8859-1", "iso-8859-1" },
119  { "latin1", "iso-8859-1" },
120  { "l1", "iso-8859-1" },
121  { "IBM819", "iso-8859-1" },
122  { "CP819", "iso-8859-1" },
123  { "csISOLatin1", "iso-8859-1" },
124 
125  { "ISO_8859-2:1987", "iso-8859-2" },
126  { "iso-ir-101", "iso-8859-2" },
127  { "iso_8859-2", "iso-8859-2" },
128  { "latin2", "iso-8859-2" },
129  { "l2", "iso-8859-2" },
130  { "csISOLatin2", "iso-8859-2" },
131 
132  { "ISO_8859-3:1988", "iso-8859-3" },
133  { "iso-ir-109", "iso-8859-3" },
134  { "ISO_8859-3", "iso-8859-3" },
135  { "latin3", "iso-8859-3" },
136  { "l3", "iso-8859-3" },
137  { "csISOLatin3", "iso-8859-3" },
138 
139  { "ISO_8859-4:1988", "iso-8859-4" },
140  { "iso-ir-110", "iso-8859-4" },
141  { "ISO_8859-4", "iso-8859-4" },
142  { "latin4", "iso-8859-4" },
143  { "l4", "iso-8859-4" },
144  { "csISOLatin4", "iso-8859-4" },
145 
146  { "ISO_8859-6:1987", "iso-8859-6" },
147  { "iso-ir-127", "iso-8859-6" },
148  { "iso_8859-6", "iso-8859-6" },
149  { "ECMA-114", "iso-8859-6" },
150  { "ASMO-708", "iso-8859-6" },
151  { "arabic", "iso-8859-6" },
152  { "csISOLatinArabic", "iso-8859-6" },
153 
154  { "ISO_8859-7:1987", "iso-8859-7" },
155  { "iso-ir-126", "iso-8859-7" },
156  { "ISO_8859-7", "iso-8859-7" },
157  { "ELOT_928", "iso-8859-7" },
158  { "ECMA-118", "iso-8859-7" },
159  { "greek", "iso-8859-7" },
160  { "greek8", "iso-8859-7" },
161  { "csISOLatinGreek", "iso-8859-7" },
162 
163  { "ISO_8859-8:1988", "iso-8859-8" },
164  { "iso-ir-138", "iso-8859-8" },
165  { "ISO_8859-8", "iso-8859-8" },
166  { "hebrew", "iso-8859-8" },
167  { "csISOLatinHebrew", "iso-8859-8" },
168 
169  { "ISO_8859-5:1988", "iso-8859-5" },
170  { "iso-ir-144", "iso-8859-5" },
171  { "ISO_8859-5", "iso-8859-5" },
172  { "cyrillic", "iso-8859-5" },
173  { "csISOLatinCyrillic", "iso-8859-5" },
174 
175  { "ISO_8859-9:1989", "iso-8859-9" },
176  { "iso-ir-148", "iso-8859-9" },
177  { "ISO_8859-9", "iso-8859-9" },
178  { "latin5", "iso-8859-9" }, /* this is not a bug */
179  { "l5", "iso-8859-9" },
180  { "csISOLatin5", "iso-8859-9" },
181 
182  { "ISO_8859-10:1992", "iso-8859-10" },
183  { "iso-ir-157", "iso-8859-10" },
184  { "latin6", "iso-8859-10" }, /* this is not a bug */
185  { "l6", "iso-8859-10" },
186  { "csISOLatin6", "iso-8859-10" },
187 
188  { "csKOI8r", "koi8-r" },
189 
190  { "MS_Kanji", "Shift_JIS" }, /* Note the underscore! */
191  { "csShiftJis", "Shift_JIS" },
192 
193  { "Extended_UNIX_Code_Packed_Format_for_Japanese",
194  "euc-jp" },
195  { "csEUCPkdFmtJapanese", "euc-jp" },
196 
197  { "csGB2312", "gb2312" },
198  { "csbig5", "big5" },
199 
200  /* End of official brain damage.
201  * What follows has been taken from glibc's localedata files. */
202 
203  { "iso_8859-13", "iso-8859-13" },
204  { "iso-ir-179", "iso-8859-13" },
205  { "latin7", "iso-8859-13" }, /* this is not a bug */
206  { "l7", "iso-8859-13" },
207 
208  { "iso_8859-14", "iso-8859-14" },
209  { "latin8", "iso-8859-14" }, /* this is not a bug */
210  { "l8", "iso-8859-14" },
211 
212  { "iso_8859-15", "iso-8859-15" },
213  { "latin9", "iso-8859-15" }, /* this is not a bug */
214 
215  /* Suggested by Ionel Mugurel Ciobica <tgakic@sg10.chem.tue.nl> */
216  { "latin0", "iso-8859-15" }, /* this is not a bug */
217 
218  { "iso_8859-16", "iso-8859-16" },
219  { "latin10", "iso-8859-16" }, /* this is not a bug */
220 
221  { "646", "us-ascii" },
222 
223  /* http://www.sun.com/software/white-papers/wp-unicode/ */
224 
225  { "eucJP", "euc-jp" },
226  { "PCK", "Shift_JIS" },
227  { "ko_KR-euc", "euc-kr" },
228  { "zh_TW-big5", "big5" },
229 
230  /* seems to be common on some systems */
231 
232  { "sjis", "Shift_JIS" },
233  { "euc-jp-ms", "eucJP-ms" },
234 
235  /* If you happen to encounter system-specific brain-damage with respect to
236  * character set naming, please add it above this comment, and submit a patch
237  * to <neomutt-devel@neomutt.org> */
238 
239  { NULL, NULL },
240  // clang-format on
241 };
242 
247 static struct Lookup *lookup_new(void)
248 {
249  return mutt_mem_calloc(1, sizeof(struct Lookup));
250 }
251 
256 static void lookup_free(struct Lookup **ptr)
257 {
258  if (!ptr || !*ptr)
259  return;
260 
261  struct Lookup *l = *ptr;
262  FREE(&l->replacement);
263  FREE(&l->regex.pattern);
264  if (l->regex.regex)
265  regfree(l->regex.regex);
266  FREE(&l->regex.regex);
267  FREE(&l->regex);
268 
269  FREE(ptr);
270 }
271 
281 static const char *lookup_charset(enum LookupType type, const char *cs)
282 {
283  if (!cs)
284  return NULL;
285 
286  struct Lookup *l = NULL;
287 
288  TAILQ_FOREACH(l, &Lookups, entries)
289  {
290  if (l->type != type)
291  continue;
292  if (mutt_regex_match(&l->regex, cs))
293  return l->replacement;
294  }
295  return NULL;
296 }
297 
308 {
309  if (!ps)
310  return -1;
311 
312  char *u = *ps;
313  const size_t ulen = mutt_str_len(u);
314  if (ulen == 0)
315  return 0;
316 
317  const struct Slist *const c_assumed_charset = cs_subset_slist(NeoMutt->sub, "assumed_charset");
318  const char *const c_charset = cs_subset_string(NeoMutt->sub, "charset");
319  const struct ListNode *np = NULL;
320  STAILQ_FOREACH(np, &c_assumed_charset->head, entries)
321  {
322  char const *c = np->data;
323  size_t n = mutt_str_len(c);
324  char *fromcode = mutt_mem_malloc(n + 1);
325  mutt_str_copy(fromcode, c, n + 1);
326  char *s = mutt_strn_dup(u, ulen);
327  int m = mutt_ch_convert_string(&s, fromcode, c_charset, MUTT_ICONV_NO_FLAGS);
328  FREE(&fromcode);
329  if (m == 0)
330  {
331  FREE(ps);
332  *ps = s;
333  return 0;
334  }
335  FREE(&s);
336  }
338  c_charset, MUTT_ICONV_HOOK_FROM);
339  return -1;
340 }
341 
351 void mutt_ch_canonical_charset(char *buf, size_t buflen, const char *name)
352 {
353  if (!buf || !name)
354  return;
355 
356  char in[1024], scratch[1024 + 10];
357 
358  mutt_str_copy(in, name, sizeof(in));
359  char *ext = strchr(in, '/');
360  if (ext)
361  *ext++ = '\0';
362 
363  if (mutt_istr_equal(in, "utf-8") || mutt_istr_equal(in, "utf8"))
364  {
365  mutt_str_copy(buf, "utf-8", buflen);
366  goto out;
367  }
368 
369  /* catch some common iso-8859-something misspellings */
370  size_t plen;
371  if ((plen = mutt_istr_startswith(in, "8859")) && (in[plen] != '-'))
372  snprintf(scratch, sizeof(scratch), "iso-8859-%s", in + plen);
373  else if ((plen = mutt_istr_startswith(in, "8859-")))
374  snprintf(scratch, sizeof(scratch), "iso-8859-%s", in + plen);
375  else if ((plen = mutt_istr_startswith(in, "iso8859")) && (in[plen] != '-'))
376  snprintf(scratch, sizeof(scratch), "iso_8859-%s", in + plen);
377  else if ((plen = mutt_istr_startswith(in, "iso8859-")))
378  snprintf(scratch, sizeof(scratch), "iso_8859-%s", in + plen);
379  else
380  mutt_str_copy(scratch, in, sizeof(scratch));
381 
382  for (size_t i = 0; PreferredMimeNames[i].key; i++)
383  {
384  if (mutt_istr_equal(scratch, PreferredMimeNames[i].key))
385  {
386  mutt_str_copy(buf, PreferredMimeNames[i].pref, buflen);
387  goto out;
388  }
389  }
390 
391  mutt_str_copy(buf, scratch, buflen);
392 
393  /* for cosmetics' sake, transform to lowercase. */
394  for (char *p = buf; *p; p++)
395  *p = tolower(*p);
396 
397 out:
398  if (ext && *ext)
399  {
400  mutt_str_cat(buf, buflen, "/");
401  mutt_str_cat(buf, buflen, ext);
402  }
403 }
404 
417 bool mutt_ch_chscmp(const char *cs1, const char *cs2)
418 {
419  if (!cs1 || !cs2)
420  return false;
421 
422  char buf[256];
423 
424  mutt_ch_canonical_charset(buf, sizeof(buf), cs1);
425 
426  int len1 = mutt_str_len(buf);
427  int len2 = mutt_str_len(cs2);
428 
429  return mutt_istrn_equal(((len1 > len2) ? buf : cs2),
430  ((len1 > len2) ? cs2 : buf), MIN(len1, len2));
431 }
432 
440 {
441  static char fcharset[128];
442  const char *c = NULL;
443  const struct Slist *const c_assumed_charset = cs_subset_slist(NeoMutt->sub, "assumed_charset");
444 
445  if (c_assumed_charset && (c_assumed_charset->count > 0))
446  c = STAILQ_FIRST(&c_assumed_charset->head)->data;
447  else
448  c = "us-ascii";
449 
450  mutt_str_copy(fcharset, c, sizeof(fcharset));
451  return fcharset;
452 }
453 
462 {
463  char buf[1024] = { 0 };
464 
465  mutt_ch_canonical_charset(buf, sizeof(buf), nl_langinfo(CODESET));
466 
467  if (buf[0] != '\0')
468  return mutt_str_dup(buf);
469 
470  return mutt_str_dup("iso-8859-1");
471 }
472 
484 bool mutt_ch_lookup_add(enum LookupType type, const char *pat,
485  const char *replace, struct Buffer *err)
486 {
487  if (!pat || !replace)
488  return false;
489 
490  regex_t *rx = mutt_mem_calloc(1, sizeof(regex_t));
491  int rc = REG_COMP(rx, pat, REG_ICASE);
492  if (rc != 0)
493  {
494  regerror(rc, rx, err->data, err->dsize);
495  FREE(&rx);
496  return false;
497  }
498 
499  struct Lookup *l = lookup_new();
500  l->type = type;
501  l->replacement = mutt_str_dup(replace);
502  l->regex.pattern = mutt_str_dup(pat);
503  l->regex.regex = rx;
504  l->regex.pat_not = false;
505 
506  TAILQ_INSERT_TAIL(&Lookups, l, entries);
507 
508  return true;
509 }
510 
517 {
518  struct Lookup *l = NULL;
519  struct Lookup *tmp = NULL;
520 
521  TAILQ_FOREACH_SAFE(l, &Lookups, entries, tmp)
522  {
523  TAILQ_REMOVE(&Lookups, l, entries);
524  lookup_free(&l);
525  }
526 }
527 
537 const char *mutt_ch_charset_lookup(const char *chs)
538 {
540 }
541 
564 iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, uint8_t flags)
565 {
566  char tocode1[128];
567  char fromcode1[128];
568  const char *tocode2 = NULL, *fromcode2 = NULL;
569  const char *tmp = NULL;
570 
571  iconv_t cd;
572 
573  /* transform to MIME preferred charset names */
574  mutt_ch_canonical_charset(tocode1, sizeof(tocode1), tocode);
575  mutt_ch_canonical_charset(fromcode1, sizeof(fromcode1), fromcode);
576 
577  /* maybe apply charset-hooks and recanonicalise fromcode,
578  * but only when caller asked us to sanitize a potentially wrong
579  * charset name incoming from the wild exterior. */
580  if (flags & MUTT_ICONV_HOOK_FROM)
581  {
582  tmp = mutt_ch_charset_lookup(fromcode1);
583  if (tmp)
584  mutt_ch_canonical_charset(fromcode1, sizeof(fromcode1), tmp);
585  }
586 
587  /* always apply iconv-hooks to suit system's iconv tastes */
588  tocode2 = mutt_ch_iconv_lookup(tocode1);
589  tocode2 = tocode2 ? tocode2 : tocode1;
590  fromcode2 = mutt_ch_iconv_lookup(fromcode1);
591  fromcode2 = fromcode2 ? fromcode2 : fromcode1;
592 
593  /* call system iconv with names it appreciates */
594  cd = iconv_open(tocode2, fromcode2);
595  if (cd != (iconv_t) -1)
596  return cd;
597 
598  return (iconv_t) -1;
599 }
600 
617 size_t mutt_ch_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft,
618  char **outbuf, size_t *outbytesleft, const char **inrepls,
619  const char *outrepl, int *iconverrno)
620 {
621  size_t rc = 0;
622  const char *ib = *inbuf;
623  size_t ibl = *inbytesleft;
624  char *ob = *outbuf;
625  size_t obl = *outbytesleft;
626 
627  while (true)
628  {
629  errno = 0;
630  const size_t ret1 = iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl);
631  if (ret1 != (size_t) -1)
632  rc += ret1;
633  if (iconverrno)
634  *iconverrno = errno;
635 
636  if (ibl && obl && (errno == EILSEQ))
637  {
638  if (inrepls)
639  {
640  /* Try replacing the input */
641  const char **t = NULL;
642  for (t = inrepls; *t; t++)
643  {
644  const char *ib1 = *t;
645  size_t ibl1 = strlen(*t);
646  char *ob1 = ob;
647  size_t obl1 = obl;
648  iconv(cd, (ICONV_CONST char **) &ib1, &ibl1, &ob1, &obl1);
649  if (ibl1 == 0)
650  {
651  ib++;
652  ibl--;
653  ob = ob1;
654  obl = obl1;
655  rc++;
656  break;
657  }
658  }
659  if (*t)
660  continue;
661  }
662  /* Replace the output */
663  if (!outrepl)
664  outrepl = "?";
665  iconv(cd, NULL, NULL, &ob, &obl);
666  if (obl)
667  {
668  int n = strlen(outrepl);
669  if (n > obl)
670  {
671  outrepl = "?";
672  n = 1;
673  }
674  memcpy(ob, outrepl, n);
675  ib++;
676  ibl--;
677  ob += n;
678  obl -= n;
679  rc++;
680  iconv(cd, NULL, NULL, NULL, NULL); /* for good measure */
681  continue;
682  }
683  }
684  *inbuf = ib;
685  *inbytesleft = ibl;
686  *outbuf = ob;
687  *outbytesleft = obl;
688  return rc;
689  }
690 }
691 
701 const char *mutt_ch_iconv_lookup(const char *chs)
702 {
704 }
705 
716 int mutt_ch_check(const char *s, size_t slen, const char *from, const char *to)
717 {
718  if (!s || !from || !to)
719  return -1;
720 
721  int rc = 0;
722  iconv_t cd = mutt_ch_iconv_open(to, from, MUTT_ICONV_NO_FLAGS);
723  if (cd == (iconv_t) -1)
724  return -1;
725 
726  size_t outlen = MB_LEN_MAX * slen;
727  char *out = mutt_mem_malloc(outlen + 1);
728  char *saved_out = out;
729 
730  const size_t convlen = iconv(cd, (ICONV_CONST char **) &s, &slen, &out, &outlen);
731  if (convlen == -1)
732  rc = errno;
733 
734  FREE(&saved_out);
735  iconv_close(cd);
736  return rc;
737 }
738 
752 int mutt_ch_convert_string(char **ps, const char *from, const char *to, uint8_t flags)
753 {
754  if (!ps)
755  return -1;
756 
757  char *s = *ps;
758 
759  if (!s || (*s == '\0'))
760  return 0;
761 
762  if (!to || !from)
763  return -1;
764 
765  const char *repls[] = { "\357\277\275", "?", 0 };
766  int rc = 0;
767 
768  iconv_t cd = mutt_ch_iconv_open(to, from, flags);
769  if (cd == (iconv_t) -1)
770  return -1;
771 
772  const char **inrepls = NULL;
773  const char *outrepl = NULL;
774 
775  if (mutt_ch_is_utf8(to))
776  outrepl = "\357\277\275";
777  else if (mutt_ch_is_utf8(from))
778  inrepls = repls;
779  else
780  outrepl = "?";
781 
782  const char *ib = s;
783  size_t ibl = strlen(s);
784  if (ibl >= (SIZE_MAX / MB_LEN_MAX))
785  {
786  iconv_close(cd);
787  return -1;
788  }
789  size_t obl = MB_LEN_MAX * ibl;
790  char *buf = mutt_mem_malloc(obl + 1);
791  char *ob = buf;
792 
793  mutt_ch_iconv(cd, &ib, &ibl, &ob, &obl, inrepls, outrepl, &rc);
794  iconv(cd, 0, 0, &ob, &obl);
795  iconv_close(cd);
796 
797  *ob = '\0';
798 
799  FREE(ps);
800  *ps = buf;
801 
802  mutt_str_adjust(ps);
803  return rc;
804 }
805 
817 bool mutt_ch_check_charset(const char *cs, bool strict)
818 {
819  if (!cs)
820  return false;
821 
822  if (mutt_ch_is_utf8(cs))
823  return true;
824 
825  if (!strict)
826  {
827  for (int i = 0; PreferredMimeNames[i].key; i++)
828  {
829  if (mutt_istr_equal(PreferredMimeNames[i].key, cs) ||
830  mutt_istr_equal(PreferredMimeNames[i].pref, cs))
831  {
832  return true;
833  }
834  }
835  }
836 
837  iconv_t cd = mutt_ch_iconv_open(cs, cs, MUTT_ICONV_NO_FLAGS);
838  if (cd != (iconv_t) (-1))
839  {
840  iconv_close(cd);
841  return true;
842  }
843 
844  return false;
845 }
846 
857 struct FgetConv *mutt_ch_fgetconv_open(FILE *fp, const char *from, const char *to, uint8_t flags)
858 {
859  struct FgetConv *fc = NULL;
860  iconv_t cd = (iconv_t) -1;
861 
862  if (from && to)
863  cd = mutt_ch_iconv_open(to, from, flags);
864 
865  if (cd != (iconv_t) -1)
866  {
867  static const char *repls[] = { "\357\277\275", "?", 0 };
868 
869  fc = mutt_mem_malloc(sizeof(struct FgetConv));
870  fc->p = fc->bufo;
871  fc->ob = fc->bufo;
872  fc->ib = fc->bufi;
873  fc->ibl = 0;
874  fc->inrepls = mutt_ch_is_utf8(to) ? repls : repls + 1;
875  }
876  else
877  fc = mutt_mem_malloc(sizeof(struct FgetConvNot));
878  fc->fp = fp;
879  fc->cd = cd;
880  return fc;
881 }
882 
888 {
889  if (!fc || !*fc)
890  return;
891 
892  if ((*fc)->cd != (iconv_t) -1)
893  iconv_close((*fc)->cd);
894  FREE(fc);
895 }
896 
907 int mutt_ch_fgetconv(struct FgetConv *fc)
908 {
909  if (!fc)
910  return EOF;
911  if (fc->cd == (iconv_t) -1)
912  return fgetc(fc->fp);
913  if (!fc->p)
914  return EOF;
915  if (fc->p < fc->ob)
916  return (unsigned char) *(fc->p)++;
917 
918  /* Try to convert some more */
919  fc->p = fc->bufo;
920  fc->ob = fc->bufo;
921  if (fc->ibl)
922  {
923  size_t obl = sizeof(fc->bufo);
924  iconv(fc->cd, (ICONV_CONST char **) &fc->ib, &fc->ibl, &fc->ob, &obl);
925  if (fc->p < fc->ob)
926  return (unsigned char) *(fc->p)++;
927  }
928 
929  /* If we trusted iconv a bit more, we would at this point
930  * ask why it had stopped converting ... */
931 
932  /* Try to read some more */
933  if ((fc->ibl == sizeof(fc->bufi)) ||
934  (fc->ibl && (fc->ib + fc->ibl < fc->bufi + sizeof(fc->bufi))))
935  {
936  fc->p = 0;
937  return EOF;
938  }
939  if (fc->ibl)
940  memcpy(fc->bufi, fc->ib, fc->ibl);
941  fc->ib = fc->bufi;
942  fc->ibl += fread(fc->ib + fc->ibl, 1, sizeof(fc->bufi) - fc->ibl, fc->fp);
943 
944  /* Try harder this time to convert some */
945  if (fc->ibl)
946  {
947  size_t obl = sizeof(fc->bufo);
948  mutt_ch_iconv(fc->cd, (const char **) &fc->ib, &fc->ibl, &fc->ob, &obl,
949  fc->inrepls, 0, NULL);
950  if (fc->p < fc->ob)
951  return (unsigned char) *(fc->p)++;
952  }
953 
954  /* Either the file has finished or one of the buffers is too small */
955  fc->p = 0;
956  return EOF;
957 }
958 
969 char *mutt_ch_fgetconvs(char *buf, size_t buflen, struct FgetConv *fc)
970 {
971  if (!buf)
972  return NULL;
973 
974  size_t r;
975  for (r = 0; (r + 1) < buflen;)
976  {
977  const int c = mutt_ch_fgetconv(fc);
978  if (c == EOF)
979  break;
980  buf[r++] = (char) c;
981  if (c == '\n')
982  break;
983  }
984  buf[r] = '\0';
985 
986  if (r > 0)
987  return buf;
988 
989  return NULL;
990 }
991 
1002 void mutt_ch_set_charset(const char *charset)
1003 {
1004  char buf[256];
1005 
1006  mutt_ch_canonical_charset(buf, sizeof(buf), charset);
1007 
1008  if (mutt_ch_is_utf8(buf))
1009  {
1010  CharsetIsUtf8 = true;
1011  ReplacementChar = 0xfffd; /* replacement character */
1012  }
1013  else
1014  {
1015  CharsetIsUtf8 = false;
1016  ReplacementChar = '?';
1017  }
1018 
1019 #if defined(HAVE_BIND_TEXTDOMAIN_CODESET) && defined(ENABLE_NLS)
1020  bind_textdomain_codeset(PACKAGE, buf);
1021 #endif
1022 }
1023 
1035 char *mutt_ch_choose(const char *fromcode, const struct Slist *charsets,
1036  const char *u, size_t ulen, char **d, size_t *dlen)
1037 {
1038  if (!fromcode || !charsets)
1039  return NULL;
1040 
1041  char *e = NULL, *tocode = NULL;
1042  size_t elen = 0, bestn = 0;
1043 
1044  const struct ListNode *np = NULL;
1045  STAILQ_FOREACH(np, &charsets->head, entries)
1046  {
1047  char *t = mutt_str_dup(np->data);
1048  if (!t)
1049  continue;
1050 
1051  size_t n = mutt_str_len(t);
1052  char *s = mutt_strn_dup(u, ulen);
1053  const int rc = d ? mutt_ch_convert_string(&s, fromcode, t, MUTT_ICONV_NO_FLAGS) :
1054  mutt_ch_check(s, ulen, fromcode, t);
1055  if (rc)
1056  {
1057  FREE(&t);
1058  FREE(&s);
1059  continue;
1060  }
1061  size_t slen = mutt_str_len(s);
1062 
1063  if (!tocode || (n < bestn))
1064  {
1065  bestn = n;
1066  FREE(&tocode);
1067  tocode = t;
1068  if (d)
1069  {
1070  FREE(&e);
1071  e = s;
1072  }
1073  else
1074  FREE(&s);
1075  elen = slen;
1076  }
1077  else
1078  {
1079  FREE(&t);
1080  FREE(&s);
1081  }
1082  }
1083  if (tocode)
1084  {
1085  if (d)
1086  *d = e;
1087  if (dlen)
1088  *dlen = elen;
1089 
1090  char canonical_buf[1024];
1091  mutt_ch_canonical_charset(canonical_buf, sizeof(canonical_buf), tocode);
1092  mutt_str_replace(&tocode, canonical_buf);
1093  }
1094  return tocode;
1095 }
const struct Slist * cs_subset_slist(const struct ConfigSubset *sub, const char *name)
Get a string-list config item by name.
Definition: helpers.c:268
const char * cs_subset_string(const struct ConfigSubset *sub, const char *name)
Get a string config item by name.
Definition: helpers.c:317
Convenience wrapper for the config headers.
Convenience wrapper for the core headers.
static char * chs
Definition: gnupgparse.c:73
void * mutt_mem_malloc(size_t size)
Allocate memory on the heap.
Definition: memory.c:90
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
Memory management wrappers.
#define FREE(x)
Definition: memory.h:43
#define MIN(a, b)
Definition: memory.h:31
bool mutt_ch_check_charset(const char *cs, bool strict)
Does iconv understand a character set?
Definition: charset.c:817
void mutt_ch_fgetconv_close(struct FgetConv **fc)
Close an fgetconv handle.
Definition: charset.c:887
size_t mutt_ch_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft, const char **inrepls, const char *outrepl, int *iconverrno)
Change the encoding of a string.
Definition: charset.c:617
void mutt_ch_lookup_remove(void)
Remove all the character set lookups.
Definition: charset.c:516
char * mutt_ch_get_langinfo_charset(void)
Get the user's choice of character set.
Definition: charset.c:461
static struct LookupList Lookups
Definition: charset.c:78
const struct MimeNames PreferredMimeNames[]
Lookup table of preferred charsets.
Definition: charset.c:99
static struct Lookup * lookup_new(void)
Create a new Lookup.
Definition: charset.c:247
bool mutt_ch_lookup_add(enum LookupType type, const char *pat, const char *replace, struct Buffer *err)
Add a new character set lookup.
Definition: charset.c:484
void mutt_ch_canonical_charset(char *buf, size_t buflen, const char *name)
Canonicalise the charset of a string.
Definition: charset.c:351
TAILQ_HEAD(LookupList, Lookup)
int mutt_ch_convert_string(char **ps, const char *from, const char *to, uint8_t flags)
Convert a string between encodings.
Definition: charset.c:752
struct FgetConv * mutt_ch_fgetconv_open(FILE *fp, const char *from, const char *to, uint8_t flags)
Prepare a file for charset conversion.
Definition: charset.c:857
void mutt_ch_set_charset(const char *charset)
Update the records for a new character set.
Definition: charset.c:1002
bool CharsetIsUtf8
Is the user's current character set utf-8?
Definition: charset.c:62
int mutt_ch_check(const char *s, size_t slen, const char *from, const char *to)
Check whether a string can be converted between encodings.
Definition: charset.c:716
int mutt_ch_fgetconv(struct FgetConv *fc)
Convert a file's character set.
Definition: charset.c:907
const char * mutt_ch_charset_lookup(const char *chs)
Look for a replacement character set.
Definition: charset.c:537
static void lookup_free(struct Lookup **ptr)
Free a Lookup.
Definition: charset.c:256
wchar_t ReplacementChar
When a Unicode character can't be displayed, use this instead.
Definition: charset.c:57
#define EILSEQ
Definition: charset.c:51
char * mutt_ch_get_default_charset(void)
Get the default character set.
Definition: charset.c:439
int mutt_ch_convert_nonmime_string(char **ps)
Try to convert a string using a list of character sets.
Definition: charset.c:307
char * mutt_ch_fgetconvs(char *buf, size_t buflen, struct FgetConv *fc)
Convert a file's charset into a string buffer.
Definition: charset.c:969
const char * mutt_ch_iconv_lookup(const char *chs)
Look for a replacement character set.
Definition: charset.c:701
bool mutt_ch_chscmp(const char *cs1, const char *cs2)
Are the names of two character sets equivalent?
Definition: charset.c:417
static const char * lookup_charset(enum LookupType type, const char *cs)
Look for a preferred character set name.
Definition: charset.c:281
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, uint8_t flags)
Set up iconv for conversions.
Definition: charset.c:564
char * mutt_ch_choose(const char *fromcode, const struct Slist *charsets, const char *u, size_t ulen, char **d, size_t *dlen)
Figure the best charset to encode a string.
Definition: charset.c:1035
Conversion between different character encodings.
#define MUTT_ICONV_HOOK_FROM
apply charset-hooks to fromcode
Definition: charset.h:72
#define mutt_ch_is_utf8(str)
Definition: charset.h:95
LookupType
Types of character set lookups.
Definition: charset.h:66
@ MUTT_LOOKUP_ICONV
Character set conversion.
Definition: charset.h:68
@ MUTT_LOOKUP_CHARSET
Alias for another character set.
Definition: charset.h:67
#define MUTT_ICONV_NO_FLAGS
No flags are set.
Definition: charset.h:71
bool mutt_regex_match(const struct Regex *regex, const char *str)
Shorthand to mutt_regex_capture()
Definition: regex.c:631
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:796
char * mutt_strn_dup(const char *begin, size_t len)
Duplicate a sub-string.
Definition: string.c:428
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition: string.c:250
void mutt_str_adjust(char **ptr)
Shrink-to-fit a string.
Definition: string.c:370
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:544
size_t mutt_str_copy(char *dest, const char *src, size_t dsize)
Copy a string into a buffer (guaranteeing NUL-termination)
Definition: string.c:629
char * mutt_str_cat(char *buf, size_t buflen, const char *s)
Concatenate two strings.
Definition: string.c:265
char * mutt_str_replace(char **p, const char *s)
Replace one string with another.
Definition: string.c:326
size_t mutt_istr_startswith(const char *str, const char *prefix)
Check whether a string starts with a prefix, ignoring case.
Definition: string.c:239
bool mutt_istrn_equal(const char *a, const char *b, size_t num)
Check for equality of two strings ignoring case (to a maximum), safely.
Definition: string.c:501
static size_t plen
Length of cached packet.
Definition: pgppacket.c:39
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:725
#define TAILQ_FOREACH_SAFE(var, head, field, tvar)
Definition: queue.h:735
#define STAILQ_FIRST(head)
Definition: queue.h:350
#define TAILQ_INSERT_TAIL(head, elm, field)
Definition: queue.h:809
#define STAILQ_FOREACH(var, head, field)
Definition: queue.h:352
#define TAILQ_REMOVE(head, elm, field)
Definition: queue.h:841
#define TAILQ_HEAD_INITIALIZER(head)
Definition: queue.h:637
#define TAILQ_ENTRY(type)
Definition: queue.h:640
Manage regular expressions.
#define REG_COMP(preg, regex, cflags)
Compile a regular expression.
Definition: regex3.h:53
Key value store.
String manipulation functions.
String manipulation buffer.
Definition: buffer.h:34
size_t dsize
Length of data.
Definition: buffer.h:37
char * data
Pointer to data.
Definition: buffer.h:35
A dummy converter.
Definition: charset.h:57
Cursor for converting a file's encoding.
Definition: charset.h:41
char bufi[512]
Definition: charset.h:44
iconv_t cd
Definition: charset.h:43
char bufo[512]
Definition: charset.h:45
size_t ibl
Definition: charset.h:49
FILE * fp
Definition: charset.h:42
char * p
Definition: charset.h:46
const char ** inrepls
Definition: charset.h:50
char * ib
Definition: charset.h:48
char * ob
Definition: charset.h:47
A List node for strings.
Definition: list.h:35
char * data
String.
Definition: list.h:36
Regex to String lookup table.
Definition: charset.c:70
char * replacement
Alternative charset to use.
Definition: charset.c:73
enum LookupType type
Lookup type.
Definition: charset.c:71
struct Regex regex
Regular expression.
Definition: charset.c:72
MIME name lookup entry.
Definition: charset.c:84
const char * key
Definition: charset.c:85
const char * pref
Definition: charset.c:86
Container for Accounts, Notifications.
Definition: neomutt.h:37
struct ConfigSubset * sub
Inherited config items.
Definition: neomutt.h:39
Cached regular expression.
Definition: regex3.h:89
char * pattern
printable version
Definition: regex3.h:90
bool pat_not
do not match
Definition: regex3.h:92
regex_t * regex
compiled expression
Definition: regex3.h:91
String list.
Definition: slist.h:47
struct ListHead head
List containing values.
Definition: slist.h:48
size_t count
Number of values in list.
Definition: slist.h:49