NeoMutt  2024-10-02-7-gd3e66a
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
convert.c File Reference

Conversion between different character encodings. More...

#include "config.h"
#include <errno.h>
#include <iconv.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include "mutt/lib.h"
#include "email/lib.h"
#include "lib.h"
+ Include dependency graph for convert.c:

Go to the source code of this file.

Functions

size_t mutt_convert_file_to (FILE *fp, const char *fromcode, struct Slist const *const tocodes, int *tocode, struct Content *info)
 Change the encoding of a file.
 
size_t mutt_convert_file_from_to (FILE *fp, const struct Slist *fromcodes, const struct Slist *tocodes, char **fromcode, char **tocode, struct Content *info)
 Convert a file between encodings.
 

Detailed Description

Conversion between different character encodings.

Authors
  • Michal Siedlaczek
  • Richard Russon

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file convert.c.

Function Documentation

◆ mutt_convert_file_to()

size_t mutt_convert_file_to ( FILE *  fp,
const char *  fromcode,
struct Slist const *const  tocodes,
int *  tocode,
struct Content info 
)

Change the encoding of a file.

Parameters
[in]fpFile to convert
[in]fromcodeOriginal encoding
[in]tocodesList of target encodings
[out]tocodeChosen encoding
[out]infoEncoding information
Return values
-1Error, no conversion was possible
>0Success, number of bytes converted

Find the best charset conversion of the file from fromcode into one of the tocodes. If successful, set *tocode and Content *info and return the number of characters converted inexactly.

We convert via UTF-8 in order to avoid the condition -1(EINVAL), which would otherwise prevent us from knowing the number of inexact conversions. Where the candidate target charset is UTF-8 we avoid doing the second conversion because iconv_open("UTF-8", "UTF-8") fails with some libraries.

We assume that the output from iconv is never more than 4 times as long as the input for any pair of charsets we might be interested in.

Definition at line 64 of file convert.c.

66{
67 char bufi[256] = { 0 };
68 char bufu[512] = { 0 };
69 char bufo[4 * sizeof(bufi)] = { 0 };
70 size_t rc;
71
72 const iconv_t cd1 = mutt_ch_iconv_open("utf-8", fromcode, MUTT_ICONV_NO_FLAGS);
73 if (!iconv_t_valid(cd1))
74 return -1;
75
76 int ncodes = tocodes->count;
77 iconv_t *cd = mutt_mem_calloc(ncodes, sizeof(iconv_t));
78 size_t *score = mutt_mem_calloc(ncodes, sizeof(size_t));
79 struct ContentState *states = mutt_mem_calloc(ncodes, sizeof(struct ContentState));
80 struct Content *infos = mutt_mem_calloc(ncodes, sizeof(struct Content));
81
82 struct ListNode *np = NULL;
83 int ni = 0;
84 STAILQ_FOREACH(np, &tocodes->head, entries)
85 {
86 if (!mutt_istr_equal(np->data, "utf-8"))
87 {
88 cd[ni] = mutt_ch_iconv_open(np->data, "utf-8", MUTT_ICONV_NO_FLAGS);
89 }
90 else
91 {
92 /* Special case for conversion to UTF-8 */
93 cd[ni] = ICONV_T_INVALID;
94 score[ni] = ICONV_ILLEGAL_SEQ;
95 }
96 ni += 1;
97 }
98
99 rewind(fp);
100 size_t ibl = 0;
101 while (true)
102 {
103 /* Try to fill input buffer */
104 size_t n = fread(bufi + ibl, 1, sizeof(bufi) - ibl, fp);
105 ibl += n;
106
107 /* Convert to UTF-8 */
108 const char *ib = bufi;
109 char *ob = bufu;
110 size_t obl = sizeof(bufu);
111 n = iconv(cd1, (ICONV_CONST char **) ((ibl != 0) ? &ib : 0), &ibl, &ob, &obl);
112 if ((n == ICONV_ILLEGAL_SEQ) && (((errno != EINVAL) && (errno != E2BIG)) || (ib == bufi)))
113 {
115 break;
116 }
117 const size_t ubl1 = ob - bufu;
118
119 /* Convert from UTF-8 */
120 for (int i = 0; i < ncodes; i++)
121 {
122 if (iconv_t_valid(cd[i]) && (score[i] != ICONV_ILLEGAL_SEQ))
123 {
124 const char *ub = bufu;
125 size_t ubl = ubl1;
126 ob = bufo;
127 obl = sizeof(bufo);
128 n = iconv(cd[i], (ICONV_CONST char **) ((ibl || ubl) ? &ub : 0), &ubl, &ob, &obl);
129 if (n == ICONV_ILLEGAL_SEQ)
130 {
131 score[i] = ICONV_ILLEGAL_SEQ;
132 }
133 else
134 {
135 score[i] += n;
136 mutt_update_content_info(&infos[i], &states[i], bufo, ob - bufo);
137 }
138 }
139 else if (!iconv_t_valid(cd[i]) && (score[i] == ICONV_ILLEGAL_SEQ))
140 {
141 /* Special case for conversion to UTF-8 */
142 mutt_update_content_info(&infos[i], &states[i], bufu, ubl1);
143 }
144 }
145
146 if (ibl)
147 {
148 /* Save unused input */
149 memmove(bufi, ib, ibl);
150 }
151 else if (!ubl1 && (ib < bufi + sizeof(bufi)))
152 {
153 rc = 0;
154 break;
155 }
156 }
157
158 if (rc == 0)
159 {
160 /* Find best score */
162 for (int i = 0; i < ncodes; i++)
163 {
164 if (!iconv_t_valid(cd[i]) && (score[i] == ICONV_ILLEGAL_SEQ))
165 {
166 /* Special case for conversion to UTF-8 */
167 *tocode = i;
168 rc = 0;
169 break;
170 }
171 else if (!iconv_t_valid(cd[i]) || (score[i] == ICONV_ILLEGAL_SEQ))
172 {
173 continue;
174 }
175 else if ((rc == ICONV_ILLEGAL_SEQ) || (score[i] < rc))
176 {
177 *tocode = i;
178 rc = score[i];
179 if (rc == 0)
180 break;
181 }
182 }
183 if (rc != ICONV_ILLEGAL_SEQ)
184 {
185 memcpy(info, &infos[*tocode], sizeof(struct Content));
186 mutt_update_content_info(info, &states[*tocode], 0, 0); /* EOF */
187 }
188 }
189
190 FREE(&cd);
191 FREE(&infos);
192 FREE(&score);
193 FREE(&states);
194
195 return rc;
196}
void mutt_update_content_info(struct Content *info, struct ContentState *s, char *buf, size_t buflen)
Cache some info about an email.
Definition: content_info.c:49
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:51
#define FREE(x)
Definition: memory.h:45
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, uint8_t flags)
Set up iconv for conversions.
Definition: charset.c:594
#define ICONV_T_INVALID
Error value for iconv functions.
Definition: charset.h:102
#define MUTT_ICONV_NO_FLAGS
No flags are set.
Definition: charset.h:73
#define ICONV_ILLEGAL_SEQ
Error value for iconv() - Illegal sequence.
Definition: charset.h:105
static bool iconv_t_valid(const iconv_t cd)
Is the conversion descriptor valid?
Definition: charset.h:114
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:672
#define STAILQ_FOREACH(var, head, field)
Definition: queue.h:352
Info about the body of an email.
Definition: content.h:57
Info about an attachment.
Definition: content.h:36
A List node for strings.
Definition: list.h:37
char * data
String.
Definition: list.h:38
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_convert_file_from_to()

size_t mutt_convert_file_from_to ( FILE *  fp,
const struct Slist fromcodes,
const struct Slist tocodes,
char **  fromcode,
char **  tocode,
struct Content info 
)

Convert a file between encodings.

Parameters
[in]fpFile to read from
[in]fromcodesCharsets to try converting FROM
[in]tocodesCharsets to try converting TO
[out]fromcodeFrom charset selected
[out]tocodeTo charset selected
[out]infoInfo about the file
Return values
numCharacters converted
ICONV_ILLEGAL_SEQError (as a size_t)

Find the first of the fromcodes that gives a valid conversion and the best charset conversion of the file into one of the tocodes. If successful, set *fromcode and *tocode to dynamically allocated strings, set Content *info, and return the number of characters converted inexactly. If no conversion was possible, return -1.

Definition at line 215 of file convert.c.

218{
219 char **tcode = NULL;
220 size_t rc;
221 int cn;
222 struct ListNode *np = NULL;
223
224 /* Copy them */
225 tcode = mutt_mem_calloc(tocodes->count, sizeof(char *));
226 np = NULL;
227 cn = 0;
228 STAILQ_FOREACH(np, &tocodes->head, entries)
229 {
230 tcode[cn++] = mutt_str_dup(np->data);
231 }
232
234 np = NULL;
235 cn = 0;
236 STAILQ_FOREACH(np, &fromcodes->head, entries)
237 {
238 /* Try each fromcode in turn */
239 rc = mutt_convert_file_to(fp, np->data, tocodes, &cn, info);
240 if (rc != ICONV_ILLEGAL_SEQ)
241 {
242 *fromcode = np->data;
243 *tocode = tcode[cn];
244 tcode[cn] = 0;
245 break;
246 }
247 }
248
249 /* Free memory */
250 for (cn = 0; cn < tocodes->count; cn++)
251 FREE(&tcode[cn]);
252
253 FREE(&tcode);
254
255 return rc;
256}
size_t mutt_convert_file_to(FILE *fp, const char *fromcode, struct Slist const *const tocodes, int *tocode, struct Content *info)
Change the encoding of a file.
Definition: convert.c:64
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition: string.c:253
struct ListHead head
List containing values.
Definition: slist.h:38
size_t count
Number of values in list.
Definition: slist.h:39
+ Here is the call graph for this function:
+ Here is the caller graph for this function: