NeoMutt  2023-03-22-27-g3cb248
Teaching an old dog new tricks
DOXYGEN
content_info.c
Go to the documentation of this file.
1
29#include "config.h"
30#include <errno.h>
31#include <stdbool.h>
32#include <stdio.h>
33#include <string.h>
34#include <sys/stat.h>
35#include "mutt/lib.h"
36#include "config/helpers.h"
37#include "email/lib.h"
38#include "convert/lib.h"
39
40struct ConfigSubset;
41
49void mutt_update_content_info(struct Content *info, struct ContentState *s,
50 char *buf, size_t buflen)
51{
52 bool from = s->from;
53 int whitespace = s->whitespace;
54 bool dot = s->dot;
55 int linelen = s->linelen;
56 bool was_cr = s->was_cr;
57
58 if (!buf) /* This signals EOF */
59 {
60 if (was_cr)
61 info->binary = true;
62 if (linelen > info->linemax)
63 info->linemax = linelen;
64
65 return;
66 }
67
68 for (; buflen; buf++, buflen--)
69 {
70 char ch = *buf;
71
72 if (was_cr)
73 {
74 was_cr = false;
75 if (ch == '\n')
76 {
77 if (whitespace)
78 info->space = true;
79 if (dot)
80 info->dot = true;
81 if (linelen > info->linemax)
82 info->linemax = linelen;
83 whitespace = 0;
84 dot = false;
85 linelen = 0;
86 continue;
87 }
88
89 info->binary = true;
90 }
91
92 linelen++;
93 if (ch == '\n')
94 {
95 info->crlf++;
96 if (whitespace)
97 info->space = true;
98 if (dot)
99 info->dot = true;
100 if (linelen > info->linemax)
101 info->linemax = linelen;
102 whitespace = 0;
103 linelen = 0;
104 dot = false;
105 }
106 else if (ch == '\r')
107 {
108 info->crlf++;
109 info->cr = true;
110 was_cr = true;
111 continue;
112 }
113 else if (ch & 0x80)
114 info->hibin++;
115 else if ((ch == '\t') || (ch == '\f'))
116 {
117 info->ascii++;
118 whitespace++;
119 }
120 else if (ch == 0)
121 {
122 info->nulbin++;
123 info->lobin++;
124 }
125 else if ((ch < 32) || (ch == 127))
126 info->lobin++;
127 else
128 {
129 if (linelen == 1)
130 {
131 if ((ch == 'F') || (ch == 'f'))
132 from = true;
133 else
134 from = false;
135 if (ch == '.')
136 dot = true;
137 else
138 dot = false;
139 }
140 else if (from)
141 {
142 if ((linelen == 2) && (ch != 'r'))
143 from = false;
144 else if ((linelen == 3) && (ch != 'o'))
145 from = false;
146 else if (linelen == 4)
147 {
148 if (ch == 'm')
149 info->from = true;
150 from = false;
151 }
152 }
153 if (ch == ' ')
154 whitespace++;
155 info->ascii++;
156 }
157
158 if (linelen > 1)
159 dot = false;
160 if ((ch != ' ') && (ch != '\t'))
161 whitespace = 0;
162 }
163
164 s->from = from;
165 s->whitespace = whitespace;
166 s->dot = dot;
167 s->linelen = linelen;
168 s->was_cr = was_cr;
169}
170
180struct Content *mutt_get_content_info(const char *fname, struct Body *b,
181 struct ConfigSubset *sub)
182{
183 struct Content *info = NULL;
184 struct ContentState cstate = { 0 };
185 FILE *fp = NULL;
186 char *fromcode = NULL;
187 char *tocode = NULL;
188 char buf[100] = { 0 };
189 size_t r;
190
191 struct stat st = { 0 };
192
193 if (b && !fname)
194 fname = b->filename;
195 if (!fname)
196 return NULL;
197
198 if (stat(fname, &st) == -1)
199 {
200 mutt_error(_("Can't stat %s: %s"), fname, strerror(errno));
201 return NULL;
202 }
203
204 if (!S_ISREG(st.st_mode))
205 {
206 mutt_error(_("%s isn't a regular file"), fname);
207 return NULL;
208 }
209
210 fp = fopen(fname, "r");
211 if (!fp)
212 {
213 mutt_debug(LL_DEBUG1, "%s: %s (errno %d)\n", fname, strerror(errno), errno);
214 return NULL;
215 }
216
217 info = mutt_mem_calloc(1, sizeof(struct Content));
218
219 const char *const c_charset = cs_subset_string(sub, "charset");
220
221 if (b && (b->type == TYPE_TEXT) && (!b->noconv && !b->force_charset))
222 {
223 const struct Slist *const c_attach_charset = cs_subset_slist(sub, "attach_charset");
224 const struct Slist *const c_send_charset = cs_subset_slist(sub, "send_charset");
225 struct Slist *c_charset_slist = slist_parse(c_charset, SLIST_SEP_COLON);
226
227 const struct Slist *fchs = b->use_disp ?
228 (c_attach_charset ? c_attach_charset : c_charset_slist) :
229 c_charset_slist;
230
231 struct Slist *chs = slist_parse(mutt_param_get(&b->parameter, "charset"), SLIST_SEP_COLON);
232
233 if (c_charset && (chs || c_send_charset) &&
234 (mutt_convert_file_from_to(fp, fchs, chs ? chs : c_send_charset,
235 &fromcode, &tocode, info) != (size_t) (-1)))
236 {
237 if (!chs)
238 {
239 char chsbuf[256] = { 0 };
240 mutt_ch_canonical_charset(chsbuf, sizeof(chsbuf), tocode);
241 mutt_param_set(&b->parameter, "charset", chsbuf);
242 }
243 FREE(&b->charset);
244 b->charset = mutt_str_dup(fromcode);
245 FREE(&tocode);
246 mutt_file_fclose(&fp);
247 slist_free(&c_charset_slist);
248 slist_free(&chs);
249 return info;
250 }
251
252 slist_free(&c_charset_slist);
253 slist_free(&chs);
254 }
255
256 rewind(fp);
257 while ((r = fread(buf, 1, sizeof(buf), fp)))
258 mutt_update_content_info(info, &cstate, buf, r);
259 mutt_update_content_info(info, &cstate, 0, 0);
260
261 mutt_file_fclose(&fp);
262
263 if (b && (b->type == TYPE_TEXT) && (!b->noconv && !b->force_charset))
264 {
265 mutt_param_set(&b->parameter, "charset",
266 (!info->hibin ? "us-ascii" :
267 c_charset && !mutt_ch_is_us_ascii(c_charset) ? c_charset :
268 "unknown-8bit"));
269 }
270
271 return info;
272}
const char * cs_subset_string(const struct ConfigSubset *sub, const char *name)
Get a string config item by name.
Definition: helpers.c:317
const struct Slist * cs_subset_slist(const struct ConfigSubset *sub, const char *name)
Get a string-list config item by name.
Definition: helpers.c:268
void mutt_update_content_info(struct Content *info, struct ContentState *s, char *buf, size_t buflen)
Cache some info about an email.
Definition: content_info.c:49
struct Content * mutt_get_content_info(const char *fname, struct Body *b, struct ConfigSubset *sub)
Analyze file to determine MIME encoding to use.
Definition: content_info.c:180
Conversion between different character encodings.
size_t mutt_convert_file_from_to(FILE *fp, const struct Slist *fromcodes, const struct Slist *tocodes, char **fromcode, char **tocode, struct Content *info)
Convert a file between encodings.
Definition: convert.c:218
Structs that make up an email.
int mutt_file_fclose(FILE **fp)
Close a FILE handle (and NULL the pointer)
Definition: file.c:149
static char * chs
Definition: gnupgparse.c:73
#define mutt_error(...)
Definition: logging.h:87
#define mutt_debug(LEVEL,...)
Definition: logging.h:84
Helper functions to get config values.
@ LL_DEBUG1
Log at debug level 1.
Definition: logging.h:40
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
#define FREE(x)
Definition: memory.h:43
@ TYPE_TEXT
Type: 'text/*'.
Definition: mime.h:38
void mutt_ch_canonical_charset(char *buf, size_t buflen, const char *name)
Canonicalise the charset of a string.
Definition: charset.c:350
#define mutt_ch_is_us_ascii(str)
Definition: charset.h:96
Convenience wrapper for the library headers.
#define _(a)
Definition: message.h:28
struct Slist * slist_parse(const char *str, uint32_t flags)
Parse a list of strings into a list.
Definition: slist.c:200
void slist_free(struct Slist **list)
Free an Slist object.
Definition: slist.c:162
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition: string.c:250
char * mutt_param_get(const struct ParameterList *pl, const char *s)
Find a matching Parameter.
Definition: parameter.c:84
void mutt_param_set(struct ParameterList *pl, const char *attribute, const char *value)
Set a Parameter.
Definition: parameter.c:110
#define SLIST_SEP_COLON
Definition: slist.h:35
The body of an email.
Definition: body.h:36
bool noconv
Don't do character set conversion.
Definition: body.h:46
char * charset
Send mode: charset of attached file as stored on disk.
Definition: body.h:78
struct ParameterList parameter
Parameters of the content-type.
Definition: body.h:62
bool use_disp
Content-Disposition uses filename= ?
Definition: body.h:47
bool force_charset
Send mode: don't adjust the character set when in send-mode.
Definition: body.h:44
unsigned int type
content-type primary type, ContentType
Definition: body.h:40
char * filename
When sending a message, this is the file to which this structure refers.
Definition: body.h:58
A set of inherited config items.
Definition: subset.h:47
Info about the body of an email.
Definition: content.h:56
bool was_cr
Was the last character CR?
Definition: content.h:61
int whitespace
Number of trailing whitespaces.
Definition: content.h:58
bool from
Is the current line a prefix of "From "?
Definition: content.h:57
int linelen
Length of the current line.
Definition: content.h:60
bool dot
Was the last character a dot?
Definition: content.h:59
Info about an attachment.
Definition: content.h:35
long crlf
\r and \n characters
Definition: content.h:39
long hibin
8-bit characters
Definition: content.h:36
bool cr
Has CR, even when in a CRLF pair.
Definition: content.h:46
bool space
Whitespace at the end of lines?
Definition: content.h:42
long ascii
Number of ascii chars.
Definition: content.h:40
bool binary
Long lines, or CR not in CRLF pair.
Definition: content.h:43
bool from
Has a line beginning with "From "?
Definition: content.h:44
long nulbin
Null characters (0x0)
Definition: content.h:38
long linemax
Length of the longest line in the file.
Definition: content.h:41
long lobin
Unprintable 7-bit chars (eg., control chars)
Definition: content.h:37
bool dot
Has a line consisting of a single dot?
Definition: content.h:45
String list.
Definition: slist.h:47