NeoMutt
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
content_info.c File Reference

Extracting content info from email body. More...

#include "config.h"
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include "mutt/lib.h"
#include "config/lib.h"
#include "email/lib.h"
#include "core/lib.h"
#include "lib.h"
+ Include dependency graph for content_info.c:

Go to the source code of this file.

Functions

void mutt_update_content_info (struct Content *info, struct ContentState *s, char *buf, size_t buflen)
 Cache some info about an email.
 
struct Contentmutt_get_content_info (const char *fname, struct Body *b, struct ConfigSubset *sub)
 Analyze file to determine MIME encoding to use.
 

Detailed Description

Extracting content info from email body.

Authors
  • Michal Siedlaczek

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file content_info.c.

Function Documentation

◆ mutt_update_content_info()

void mutt_update_content_info ( struct Content info,
struct ContentState s,
char *  buf,
size_t  buflen 
)

Cache some info about an email.

Parameters
infoInfo about an Attachment
sInfo about the Body of an email
bufBuffer for the result
buflenLength of the buffer

Definition at line 48 of file content_info.c.

50{
51 bool from = s->from;
52 int whitespace = s->whitespace;
53 bool dot = s->dot;
54 int linelen = s->linelen;
55 bool was_cr = s->was_cr;
56
57 if (!buf) /* This signals EOF */
58 {
59 if (was_cr)
60 info->binary = true;
61 if (linelen > info->linemax)
62 info->linemax = linelen;
63
64 return;
65 }
66
67 for (; buflen; buf++, buflen--)
68 {
69 char ch = *buf;
70
71 if (was_cr)
72 {
73 was_cr = false;
74 if (ch == '\n')
75 {
76 if (whitespace)
77 info->space = true;
78 if (dot)
79 info->dot = true;
80 if (linelen > info->linemax)
81 info->linemax = linelen;
82 whitespace = 0;
83 dot = false;
84 linelen = 0;
85 continue;
86 }
87
88 info->binary = true;
89 }
90
91 linelen++;
92 if (ch == '\n')
93 {
94 info->crlf++;
95 if (whitespace)
96 info->space = true;
97 if (dot)
98 info->dot = true;
99 if (linelen > info->linemax)
100 info->linemax = linelen;
101 whitespace = 0;
102 linelen = 0;
103 dot = false;
104 }
105 else if (ch == '\r')
106 {
107 info->crlf++;
108 info->cr = true;
109 was_cr = true;
110 continue;
111 }
112 else if (ch & 0x80)
113 {
114 info->hibin++;
115 }
116 else if ((ch == '\t') || (ch == '\f'))
117 {
118 info->ascii++;
119 whitespace++;
120 }
121 else if (ch == 0)
122 {
123 info->nulbin++;
124 info->lobin++;
125 }
126 else if ((ch < 32) || (ch == 127))
127 {
128 info->lobin++;
129 }
130 else
131 {
132 if (linelen == 1)
133 {
134 if ((ch == 'F') || (ch == 'f'))
135 from = true;
136 else
137 from = false;
138 if (ch == '.')
139 dot = true;
140 else
141 dot = false;
142 }
143 else if (from)
144 {
145 if ((linelen == 2) && (ch != 'r'))
146 {
147 from = false;
148 }
149 else if ((linelen == 3) && (ch != 'o'))
150 {
151 from = false;
152 }
153 else if (linelen == 4)
154 {
155 if (ch == 'm')
156 info->from = true;
157 from = false;
158 }
159 }
160 if (ch == ' ')
161 whitespace++;
162 info->ascii++;
163 }
164
165 if (linelen > 1)
166 dot = false;
167 if ((ch != ' ') && (ch != '\t'))
168 whitespace = 0;
169 }
170
171 s->from = from;
172 s->whitespace = whitespace;
173 s->dot = dot;
174 s->linelen = linelen;
175 s->was_cr = was_cr;
176}
bool was_cr
Was the last character CR?
Definition: content.h:61
int whitespace
Number of trailing whitespaces.
Definition: content.h:58
bool from
Is the current line a prefix of "From "?
Definition: content.h:57
int linelen
Length of the current line.
Definition: content.h:60
bool dot
Was the last character a dot?
Definition: content.h:59
long crlf
\r and \n characters
Definition: content.h:39
long hibin
8-bit characters
Definition: content.h:36
bool cr
Has CR, even when in a CRLF pair.
Definition: content.h:46
bool space
Whitespace at the end of lines?
Definition: content.h:42
long ascii
Number of ascii chars.
Definition: content.h:40
bool binary
Long lines, or CR not in CRLF pair.
Definition: content.h:43
bool from
Has a line beginning with "From "?
Definition: content.h:44
long nulbin
Null characters (0x0)
Definition: content.h:38
long linemax
Length of the longest line in the file.
Definition: content.h:41
long lobin
Unprintable 7-bit chars (eg., control chars)
Definition: content.h:37
bool dot
Has a line consisting of a single dot?
Definition: content.h:45
+ Here is the caller graph for this function:

◆ mutt_get_content_info()

struct Content * mutt_get_content_info ( const char *  fname,
struct Body b,
struct ConfigSubset sub 
)

Analyze file to determine MIME encoding to use.

Parameters
fnameFile to examine
bBody to update
subConfig Subset
Return values
ptrNewly allocated Content

Also set the body charset, sometimes, or not.

Definition at line 187 of file content_info.c.

189{
190 struct Content *info = NULL;
191 struct ContentState cstate = { 0 };
192 FILE *fp = NULL;
193 char *fromcode = NULL;
194 char *tocode = NULL;
195 char buf[100] = { 0 };
196 size_t r;
197
198 struct stat st = { 0 };
199
200 if (b && !fname)
201 fname = b->filename;
202 if (!fname)
203 return NULL;
204
205 if (stat(fname, &st) == -1)
206 {
207 mutt_error(_("Can't stat %s: %s"), fname, strerror(errno));
208 return NULL;
209 }
210
211 if (!S_ISREG(st.st_mode))
212 {
213 mutt_error(_("%s isn't a regular file"), fname);
214 return NULL;
215 }
216
217 fp = fopen(fname, "r");
218 if (!fp)
219 {
220 mutt_debug(LL_DEBUG1, "%s: %s (errno %d)\n", fname, strerror(errno), errno);
221 return NULL;
222 }
223
224 info = mutt_mem_calloc(1, sizeof(struct Content));
225
226 const char *const c_charset = cc_charset();
227 if (b && (b->type == TYPE_TEXT) && (!b->noconv && !b->force_charset))
228 {
229 const struct Slist *const c_attach_charset = cs_subset_slist(sub, "attach_charset");
230 const struct Slist *const c_send_charset = cs_subset_slist(sub, "send_charset");
231 struct Slist *c_charset_slist = slist_parse(c_charset, SLIST_SEP_COLON);
232
233 const struct Slist *fchs = b->use_disp ?
234 (c_attach_charset ? c_attach_charset : c_charset_slist) :
235 c_charset_slist;
236
237 struct Slist *chs = slist_parse(mutt_param_get(&b->parameter, "charset"), SLIST_SEP_COLON);
238
239 if (c_charset && (chs || c_send_charset) &&
240 (mutt_convert_file_from_to(fp, fchs, chs ? chs : c_send_charset, &fromcode,
241 &tocode, info) != ICONV_ILLEGAL_SEQ))
242 {
243 if (!chs)
244 {
245 char chsbuf[256] = { 0 };
246 mutt_ch_canonical_charset(chsbuf, sizeof(chsbuf), tocode);
247 mutt_param_set(&b->parameter, "charset", chsbuf);
248 }
249 FREE(&b->charset);
250 b->charset = mutt_str_dup(fromcode);
251 FREE(&tocode);
252 mutt_file_fclose(&fp);
253 slist_free(&c_charset_slist);
254 slist_free(&chs);
255 return info;
256 }
257
258 slist_free(&c_charset_slist);
259 slist_free(&chs);
260 }
261
262 rewind(fp);
263 while ((r = fread(buf, 1, sizeof(buf), fp)))
264 mutt_update_content_info(info, &cstate, buf, r);
265 mutt_update_content_info(info, &cstate, 0, 0);
266
267 mutt_file_fclose(&fp);
268
269 if (b && (b->type == TYPE_TEXT) && (!b->noconv && !b->force_charset))
270 {
271 mutt_param_set(&b->parameter, "charset",
272 (!info->hibin ? "us-ascii" :
273 c_charset && !mutt_ch_is_us_ascii(c_charset) ? c_charset :
274 "unknown-8bit"));
275 }
276
277 return info;
278}
const struct Slist * cs_subset_slist(const struct ConfigSubset *sub, const char *name)
Get a string-list config item by name.
Definition: helpers.c:243
const char * cc_charset(void)
Get the cached value of $charset.
Definition: config_cache.c:115
void mutt_update_content_info(struct Content *info, struct ContentState *s, char *buf, size_t buflen)
Cache some info about an email.
Definition: content_info.c:48
size_t mutt_convert_file_from_to(FILE *fp, const struct Slist *fromcodes, const struct Slist *tocodes, char **fromcode, char **tocode, struct Content *info)
Convert a file between encodings.
Definition: convert.c:212
int mutt_file_fclose(FILE **fp)
Close a FILE handle (and NULL the pointer)
Definition: file.c:152
#define mutt_error(...)
Definition: logging2.h:92
#define mutt_debug(LEVEL,...)
Definition: logging2.h:89
@ LL_DEBUG1
Log at debug level 1.
Definition: logging2.h:43
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
#define FREE(x)
Definition: memory.h:45
@ TYPE_TEXT
Type: 'text/*'.
Definition: mime.h:38
void mutt_ch_canonical_charset(char *buf, size_t buflen, const char *name)
Canonicalise the charset of a string.
Definition: charset.c:371
#define mutt_ch_is_us_ascii(str)
Definition: charset.h:97
#define ICONV_ILLEGAL_SEQ
Error value for iconv() - Illegal sequence.
Definition: charset.h:103
#define _(a)
Definition: message.h:28
struct Slist * slist_parse(const char *str, uint32_t flags)
Parse a list of strings into a list.
Definition: slist.c:215
void slist_free(struct Slist **ptr)
Free an Slist object.
Definition: slist.c:162
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition: string.c:251
char * mutt_param_get(const struct ParameterList *pl, const char *s)
Find a matching Parameter.
Definition: parameter.c:84
void mutt_param_set(struct ParameterList *pl, const char *attribute, const char *value)
Set a Parameter.
Definition: parameter.c:110
#define SLIST_SEP_COLON
Definition: slist.h:35
bool noconv
Don't do character set conversion.
Definition: body.h:46
char * charset
Send mode: charset of attached file as stored on disk.
Definition: body.h:78
struct ParameterList parameter
Parameters of the content-type.
Definition: body.h:62
bool use_disp
Content-Disposition uses filename= ?
Definition: body.h:47
bool force_charset
Send mode: don't adjust the character set when in send-mode.
Definition: body.h:44
unsigned int type
content-type primary type, ContentType
Definition: body.h:40
char * filename
When sending a message, this is the file to which this structure refers.
Definition: body.h:58
Info about the body of an email.
Definition: content.h:56
Info about an attachment.
Definition: content.h:35
String list.
Definition: slist.h:47
+ Here is the call graph for this function:
+ Here is the caller graph for this function: