NeoMutt  2024-12-12-14-g7b49f7
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
content_info.c
Go to the documentation of this file.
1
30#include "config.h"
31#include <errno.h>
32#include <stdbool.h>
33#include <stdio.h>
34#include <string.h>
35#include <sys/stat.h>
36#include "mutt/lib.h"
37#include "config/lib.h"
38#include "email/lib.h"
39#include "core/lib.h"
40#include "lib.h"
41
49void mutt_update_content_info(struct Content *info, struct ContentState *s,
50 char *buf, size_t buflen)
51{
52 bool from = s->from;
53 int whitespace = s->whitespace;
54 bool dot = s->dot;
55 int linelen = s->linelen;
56 bool was_cr = s->was_cr;
57
58 if (!buf) /* This signals EOF */
59 {
60 if (was_cr)
61 info->binary = true;
62 if (linelen > info->linemax)
63 info->linemax = linelen;
64
65 return;
66 }
67
68 for (; buflen; buf++, buflen--)
69 {
70 char ch = *buf;
71
72 if (was_cr)
73 {
74 was_cr = false;
75 if (ch == '\n')
76 {
77 if (whitespace)
78 info->space = true;
79 if (dot)
80 info->dot = true;
81 if (linelen > info->linemax)
82 info->linemax = linelen;
83 whitespace = 0;
84 dot = false;
85 linelen = 0;
86 continue;
87 }
88
89 info->binary = true;
90 }
91
92 linelen++;
93 if (ch == '\n')
94 {
95 info->crlf++;
96 if (whitespace)
97 info->space = true;
98 if (dot)
99 info->dot = true;
100 if (linelen > info->linemax)
101 info->linemax = linelen;
102 whitespace = 0;
103 linelen = 0;
104 dot = false;
105 }
106 else if (ch == '\r')
107 {
108 info->crlf++;
109 info->cr = true;
110 was_cr = true;
111 continue;
112 }
113 else if (ch & 0x80)
114 {
115 info->hibin++;
116 }
117 else if ((ch == '\t') || (ch == '\f'))
118 {
119 info->ascii++;
120 whitespace++;
121 }
122 else if (ch == 0)
123 {
124 info->nulbin++;
125 info->lobin++;
126 }
127 else if ((ch < 32) || (ch == 127))
128 {
129 info->lobin++;
130 }
131 else
132 {
133 if (linelen == 1)
134 {
135 if ((ch == 'F') || (ch == 'f'))
136 from = true;
137 else
138 from = false;
139 if (ch == '.')
140 dot = true;
141 else
142 dot = false;
143 }
144 else if (from)
145 {
146 if ((linelen == 2) && (ch != 'r'))
147 {
148 from = false;
149 }
150 else if ((linelen == 3) && (ch != 'o'))
151 {
152 from = false;
153 }
154 else if (linelen == 4)
155 {
156 if (ch == 'm')
157 info->from = true;
158 from = false;
159 }
160 }
161 if (ch == ' ')
162 whitespace++;
163 info->ascii++;
164 }
165
166 if (linelen > 1)
167 dot = false;
168 if ((ch != ' ') && (ch != '\t'))
169 whitespace = 0;
170 }
171
172 s->from = from;
173 s->whitespace = whitespace;
174 s->dot = dot;
175 s->linelen = linelen;
176 s->was_cr = was_cr;
177}
178
188struct Content *mutt_get_content_info(const char *fname, struct Body *b,
189 struct ConfigSubset *sub)
190{
191 struct Content *info = NULL;
192 struct ContentState cstate = { 0 };
193 FILE *fp = NULL;
194 char *fromcode = NULL;
195 char *tocode = NULL;
196 char buf[100] = { 0 };
197 size_t r;
198
199 struct stat st = { 0 };
200
201 if (b && !fname)
202 fname = b->filename;
203 if (!fname)
204 return NULL;
205
206 if (stat(fname, &st) == -1)
207 {
208 mutt_error(_("Can't stat %s: %s"), fname, strerror(errno));
209 return NULL;
210 }
211
212 if (!S_ISREG(st.st_mode))
213 {
214 mutt_error(_("%s isn't a regular file"), fname);
215 return NULL;
216 }
217
218 fp = mutt_file_fopen(fname, "r");
219 if (!fp)
220 {
221 mutt_debug(LL_DEBUG1, "%s: %s (errno %d)\n", fname, strerror(errno), errno);
222 return NULL;
223 }
224
225 info = MUTT_MEM_CALLOC(1, struct Content);
226
227 const char *const c_charset = cc_charset();
228 if (b && (b->type == TYPE_TEXT) && (!b->noconv && !b->force_charset))
229 {
230 const struct Slist *const c_attach_charset = cs_subset_slist(sub, "attach_charset");
231 const struct Slist *const c_send_charset = cs_subset_slist(sub, "send_charset");
232 struct Slist *c_charset_slist = slist_parse(c_charset, D_SLIST_SEP_COLON);
233
234 const struct Slist *fchs = b->use_disp ?
235 (c_attach_charset ? c_attach_charset : c_charset_slist) :
236 c_charset_slist;
237
238 struct Slist *chs = slist_parse(mutt_param_get(&b->parameter, "charset"), D_SLIST_SEP_COLON);
239
240 if (c_charset && (chs || c_send_charset) &&
241 (mutt_convert_file_from_to(fp, fchs, chs ? chs : c_send_charset, &fromcode,
242 &tocode, info) != ICONV_ILLEGAL_SEQ))
243 {
244 if (!chs)
245 {
246 char chsbuf[256] = { 0 };
247 mutt_ch_canonical_charset(chsbuf, sizeof(chsbuf), tocode);
248 mutt_param_set(&b->parameter, "charset", chsbuf);
249 }
250 FREE(&b->charset);
251 b->charset = mutt_str_dup(fromcode);
252 FREE(&tocode);
253 mutt_file_fclose(&fp);
254 slist_free(&c_charset_slist);
255 slist_free(&chs);
256 return info;
257 }
258
259 slist_free(&c_charset_slist);
260 slist_free(&chs);
261 }
262
263 rewind(fp);
264 while ((r = fread(buf, 1, sizeof(buf), fp)))
265 mutt_update_content_info(info, &cstate, buf, r);
266 mutt_update_content_info(info, &cstate, 0, 0);
267
268 mutt_file_fclose(&fp);
269
270 if (b && (b->type == TYPE_TEXT) && (!b->noconv && !b->force_charset))
271 {
272 mutt_param_set(&b->parameter, "charset",
273 (!info->hibin ? "us-ascii" :
274 c_charset && !mutt_ch_is_us_ascii(c_charset) ? c_charset :
275 "unknown-8bit"));
276 }
277
278 return info;
279}
const struct Slist * cs_subset_slist(const struct ConfigSubset *sub, const char *name)
Get a string-list config item by name.
Definition: helpers.c:242
Convenience wrapper for the config headers.
const char * cc_charset(void)
Get the cached value of $charset.
Definition: config_cache.c:116
void mutt_update_content_info(struct Content *info, struct ContentState *s, char *buf, size_t buflen)
Cache some info about an email.
Definition: content_info.c:49
struct Content * mutt_get_content_info(const char *fname, struct Body *b, struct ConfigSubset *sub)
Analyze file to determine MIME encoding to use.
Definition: content_info.c:188
size_t mutt_convert_file_from_to(FILE *fp, const struct Slist *fromcodes, const struct Slist *tocodes, char **fromcode, char **tocode, struct Content *info)
Convert a file between encodings.
Definition: convert.c:215
Convenience wrapper for the core headers.
Structs that make up an email.
#define mutt_file_fclose(FP)
Definition: file.h:138
#define mutt_file_fopen(PATH, MODE)
Definition: file.h:137
#define mutt_error(...)
Definition: logging2.h:92
#define mutt_debug(LEVEL,...)
Definition: logging2.h:89
@ LL_DEBUG1
Log at debug level 1.
Definition: logging2.h:43
#define FREE(x)
Definition: memory.h:55
#define MUTT_MEM_CALLOC(n, type)
Definition: memory.h:40
@ TYPE_TEXT
Type: 'text/*'.
Definition: mime.h:38
void mutt_ch_canonical_charset(char *buf, size_t buflen, const char *name)
Canonicalise the charset of a string.
Definition: charset.c:374
#define mutt_ch_is_us_ascii(str)
Definition: charset.h:90
#define ICONV_ILLEGAL_SEQ
Error value for iconv() - Illegal sequence.
Definition: charset.h:96
Convenience wrapper for the library headers.
#define _(a)
Definition: message.h:28
struct Slist * slist_parse(const char *str, uint32_t flags)
Parse a list of strings into a list.
Definition: slist.c:175
void slist_free(struct Slist **ptr)
Free an Slist object.
Definition: slist.c:122
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition: string.c:253
char * mutt_param_get(const struct ParameterList *pl, const char *s)
Find a matching Parameter.
Definition: parameter.c:85
void mutt_param_set(struct ParameterList *pl, const char *attribute, const char *value)
Set a Parameter.
Definition: parameter.c:111
Key value store.
The body of an email.
Definition: body.h:36
bool noconv
Don't do character set conversion.
Definition: body.h:46
char * charset
Send mode: charset of attached file as stored on disk.
Definition: body.h:79
struct ParameterList parameter
Parameters of the content-type.
Definition: body.h:63
bool use_disp
Content-Disposition uses filename= ?
Definition: body.h:47
bool force_charset
Send mode: don't adjust the character set when in send-mode.
Definition: body.h:44
unsigned int type
content-type primary type, ContentType
Definition: body.h:40
char * filename
When sending a message, this is the file to which this structure refers.
Definition: body.h:59
A set of inherited config items.
Definition: subset.h:47
Info about the body of an email.
Definition: content.h:56
bool was_cr
Was the last character CR?
Definition: content.h:61
int whitespace
Number of trailing whitespaces.
Definition: content.h:58
bool from
Is the current line a prefix of "From "?
Definition: content.h:57
int linelen
Length of the current line.
Definition: content.h:60
bool dot
Was the last character a dot?
Definition: content.h:59
Info about an attachment.
Definition: content.h:35
long crlf
\r and \n characters
Definition: content.h:39
long hibin
8-bit characters
Definition: content.h:36
bool cr
Has CR, even when in a CRLF pair.
Definition: content.h:46
bool space
Whitespace at the end of lines?
Definition: content.h:42
long ascii
Number of ascii chars.
Definition: content.h:40
bool binary
Long lines, or CR not in CRLF pair.
Definition: content.h:43
bool from
Has a line beginning with "From "?
Definition: content.h:44
long nulbin
Null characters (0x0)
Definition: content.h:38
long linemax
Length of the longest line in the file.
Definition: content.h:41
long lobin
Unprintable 7-bit chars (eg., control chars)
Definition: content.h:37
bool dot
Has a line consisting of a single dot?
Definition: content.h:45
String list.
Definition: slist.h:37
#define D_SLIST_SEP_COLON
Slist items are colon-separated.
Definition: types.h:112