NeoMutt  2024-11-14-34-g5aaf0d
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
content_info.c File Reference

Extracting content info from email body. More...

#include "config.h"
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include "mutt/lib.h"
#include "config/lib.h"
#include "email/lib.h"
#include "core/lib.h"
#include "lib.h"
+ Include dependency graph for content_info.c:

Go to the source code of this file.

Functions

void mutt_update_content_info (struct Content *info, struct ContentState *s, char *buf, size_t buflen)
 Cache some info about an email.
 
struct Contentmutt_get_content_info (const char *fname, struct Body *b, struct ConfigSubset *sub)
 Analyze file to determine MIME encoding to use.
 

Detailed Description

Extracting content info from email body.

Authors
  • Michal Siedlaczek
  • Richard Russon

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file content_info.c.

Function Documentation

◆ mutt_update_content_info()

void mutt_update_content_info ( struct Content info,
struct ContentState s,
char *  buf,
size_t  buflen 
)

Cache some info about an email.

Parameters
infoInfo about an Attachment
sInfo about the Body of an email
bufBuffer for the result
buflenLength of the buffer

Definition at line 49 of file content_info.c.

51{
52 bool from = s->from;
53 int whitespace = s->whitespace;
54 bool dot = s->dot;
55 int linelen = s->linelen;
56 bool was_cr = s->was_cr;
57
58 if (!buf) /* This signals EOF */
59 {
60 if (was_cr)
61 info->binary = true;
62 if (linelen > info->linemax)
63 info->linemax = linelen;
64
65 return;
66 }
67
68 for (; buflen; buf++, buflen--)
69 {
70 char ch = *buf;
71
72 if (was_cr)
73 {
74 was_cr = false;
75 if (ch == '\n')
76 {
77 if (whitespace)
78 info->space = true;
79 if (dot)
80 info->dot = true;
81 if (linelen > info->linemax)
82 info->linemax = linelen;
83 whitespace = 0;
84 dot = false;
85 linelen = 0;
86 continue;
87 }
88
89 info->binary = true;
90 }
91
92 linelen++;
93 if (ch == '\n')
94 {
95 info->crlf++;
96 if (whitespace)
97 info->space = true;
98 if (dot)
99 info->dot = true;
100 if (linelen > info->linemax)
101 info->linemax = linelen;
102 whitespace = 0;
103 linelen = 0;
104 dot = false;
105 }
106 else if (ch == '\r')
107 {
108 info->crlf++;
109 info->cr = true;
110 was_cr = true;
111 continue;
112 }
113 else if (ch & 0x80)
114 {
115 info->hibin++;
116 }
117 else if ((ch == '\t') || (ch == '\f'))
118 {
119 info->ascii++;
120 whitespace++;
121 }
122 else if (ch == 0)
123 {
124 info->nulbin++;
125 info->lobin++;
126 }
127 else if ((ch < 32) || (ch == 127))
128 {
129 info->lobin++;
130 }
131 else
132 {
133 if (linelen == 1)
134 {
135 if ((ch == 'F') || (ch == 'f'))
136 from = true;
137 else
138 from = false;
139 if (ch == '.')
140 dot = true;
141 else
142 dot = false;
143 }
144 else if (from)
145 {
146 if ((linelen == 2) && (ch != 'r'))
147 {
148 from = false;
149 }
150 else if ((linelen == 3) && (ch != 'o'))
151 {
152 from = false;
153 }
154 else if (linelen == 4)
155 {
156 if (ch == 'm')
157 info->from = true;
158 from = false;
159 }
160 }
161 if (ch == ' ')
162 whitespace++;
163 info->ascii++;
164 }
165
166 if (linelen > 1)
167 dot = false;
168 if ((ch != ' ') && (ch != '\t'))
169 whitespace = 0;
170 }
171
172 s->from = from;
173 s->whitespace = whitespace;
174 s->dot = dot;
175 s->linelen = linelen;
176 s->was_cr = was_cr;
177}
bool was_cr
Was the last character CR?
Definition: content.h:61
int whitespace
Number of trailing whitespaces.
Definition: content.h:58
bool from
Is the current line a prefix of "From "?
Definition: content.h:57
int linelen
Length of the current line.
Definition: content.h:60
bool dot
Was the last character a dot?
Definition: content.h:59
long crlf
\r and \n characters
Definition: content.h:39
long hibin
8-bit characters
Definition: content.h:36
bool cr
Has CR, even when in a CRLF pair.
Definition: content.h:46
bool space
Whitespace at the end of lines?
Definition: content.h:42
long ascii
Number of ascii chars.
Definition: content.h:40
bool binary
Long lines, or CR not in CRLF pair.
Definition: content.h:43
bool from
Has a line beginning with "From "?
Definition: content.h:44
long nulbin
Null characters (0x0)
Definition: content.h:38
long linemax
Length of the longest line in the file.
Definition: content.h:41
long lobin
Unprintable 7-bit chars (eg., control chars)
Definition: content.h:37
bool dot
Has a line consisting of a single dot?
Definition: content.h:45
+ Here is the caller graph for this function:

◆ mutt_get_content_info()

struct Content * mutt_get_content_info ( const char *  fname,
struct Body b,
struct ConfigSubset sub 
)

Analyze file to determine MIME encoding to use.

Parameters
fnameFile to examine
bBody to update
subConfig Subset
Return values
ptrNewly allocated Content

Also set the body charset, sometimes, or not.

Definition at line 188 of file content_info.c.

190{
191 struct Content *info = NULL;
192 struct ContentState cstate = { 0 };
193 FILE *fp = NULL;
194 char *fromcode = NULL;
195 char *tocode = NULL;
196 char buf[100] = { 0 };
197 size_t r;
198
199 struct stat st = { 0 };
200
201 if (b && !fname)
202 fname = b->filename;
203 if (!fname)
204 return NULL;
205
206 if (stat(fname, &st) == -1)
207 {
208 mutt_error(_("Can't stat %s: %s"), fname, strerror(errno));
209 return NULL;
210 }
211
212 if (!S_ISREG(st.st_mode))
213 {
214 mutt_error(_("%s isn't a regular file"), fname);
215 return NULL;
216 }
217
218 fp = mutt_file_fopen(fname, "r");
219 if (!fp)
220 {
221 mutt_debug(LL_DEBUG1, "%s: %s (errno %d)\n", fname, strerror(errno), errno);
222 return NULL;
223 }
224
225 info = MUTT_MEM_CALLOC(1, struct Content);
226
227 const char *const c_charset = cc_charset();
228 if (b && (b->type == TYPE_TEXT) && (!b->noconv && !b->force_charset))
229 {
230 const struct Slist *const c_attach_charset = cs_subset_slist(sub, "attach_charset");
231 const struct Slist *const c_send_charset = cs_subset_slist(sub, "send_charset");
232 struct Slist *c_charset_slist = slist_parse(c_charset, D_SLIST_SEP_COLON);
233
234 const struct Slist *fchs = b->use_disp ?
235 (c_attach_charset ? c_attach_charset : c_charset_slist) :
236 c_charset_slist;
237
238 struct Slist *chs = slist_parse(mutt_param_get(&b->parameter, "charset"), D_SLIST_SEP_COLON);
239
240 if (c_charset && (chs || c_send_charset) &&
241 (mutt_convert_file_from_to(fp, fchs, chs ? chs : c_send_charset, &fromcode,
242 &tocode, info) != ICONV_ILLEGAL_SEQ))
243 {
244 if (!chs)
245 {
246 char chsbuf[256] = { 0 };
247 mutt_ch_canonical_charset(chsbuf, sizeof(chsbuf), tocode);
248 mutt_param_set(&b->parameter, "charset", chsbuf);
249 }
250 FREE(&b->charset);
251 b->charset = mutt_str_dup(fromcode);
252 FREE(&tocode);
253 mutt_file_fclose(&fp);
254 slist_free(&c_charset_slist);
255 slist_free(&chs);
256 return info;
257 }
258
259 slist_free(&c_charset_slist);
260 slist_free(&chs);
261 }
262
263 rewind(fp);
264 while ((r = fread(buf, 1, sizeof(buf), fp)))
265 mutt_update_content_info(info, &cstate, buf, r);
266 mutt_update_content_info(info, &cstate, 0, 0);
267
268 mutt_file_fclose(&fp);
269
270 if (b && (b->type == TYPE_TEXT) && (!b->noconv && !b->force_charset))
271 {
272 mutt_param_set(&b->parameter, "charset",
273 (!info->hibin ? "us-ascii" :
274 c_charset && !mutt_ch_is_us_ascii(c_charset) ? c_charset :
275 "unknown-8bit"));
276 }
277
278 return info;
279}
const struct Slist * cs_subset_slist(const struct ConfigSubset *sub, const char *name)
Get a string-list config item by name.
Definition: helpers.c:242
const char * cc_charset(void)
Get the cached value of $charset.
Definition: config_cache.c:116
void mutt_update_content_info(struct Content *info, struct ContentState *s, char *buf, size_t buflen)
Cache some info about an email.
Definition: content_info.c:49
size_t mutt_convert_file_from_to(FILE *fp, const struct Slist *fromcodes, const struct Slist *tocodes, char **fromcode, char **tocode, struct Content *info)
Convert a file between encodings.
Definition: convert.c:215
#define mutt_file_fclose(FP)
Definition: file.h:138
#define mutt_file_fopen(PATH, MODE)
Definition: file.h:137
#define mutt_error(...)
Definition: logging2.h:92
#define mutt_debug(LEVEL,...)
Definition: logging2.h:89
@ LL_DEBUG1
Log at debug level 1.
Definition: logging2.h:43
#define FREE(x)
Definition: memory.h:55
#define MUTT_MEM_CALLOC(n, type)
Definition: memory.h:40
@ TYPE_TEXT
Type: 'text/*'.
Definition: mime.h:38
void mutt_ch_canonical_charset(char *buf, size_t buflen, const char *name)
Canonicalise the charset of a string.
Definition: charset.c:374
#define mutt_ch_is_us_ascii(str)
Definition: charset.h:90
#define ICONV_ILLEGAL_SEQ
Error value for iconv() - Illegal sequence.
Definition: charset.h:96
#define _(a)
Definition: message.h:28
struct Slist * slist_parse(const char *str, uint32_t flags)
Parse a list of strings into a list.
Definition: slist.c:175
void slist_free(struct Slist **ptr)
Free an Slist object.
Definition: slist.c:122
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition: string.c:253
char * mutt_param_get(const struct ParameterList *pl, const char *s)
Find a matching Parameter.
Definition: parameter.c:85
void mutt_param_set(struct ParameterList *pl, const char *attribute, const char *value)
Set a Parameter.
Definition: parameter.c:111
bool noconv
Don't do character set conversion.
Definition: body.h:46
char * charset
Send mode: charset of attached file as stored on disk.
Definition: body.h:79
struct ParameterList parameter
Parameters of the content-type.
Definition: body.h:63
bool use_disp
Content-Disposition uses filename= ?
Definition: body.h:47
bool force_charset
Send mode: don't adjust the character set when in send-mode.
Definition: body.h:44
unsigned int type
content-type primary type, ContentType
Definition: body.h:40
char * filename
When sending a message, this is the file to which this structure refers.
Definition: body.h:59
Info about the body of an email.
Definition: content.h:56
Info about an attachment.
Definition: content.h:35
String list.
Definition: slist.h:37
#define D_SLIST_SEP_COLON
Slist items are colon-separated.
Definition: types.h:112
+ Here is the call graph for this function:
+ Here is the caller graph for this function: