NeoMutt  2024-12-12-19-ge4b57e
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
mbyte.c
Go to the documentation of this file.
1
30#include "config.h"
31#include <ctype.h>
32#include <limits.h>
33#include <stdbool.h>
34#include <string.h>
35#include <wchar.h>
36#include <wctype.h>
37#include "mbyte.h"
38#include "buffer.h"
39#include "charset.h"
40#include "memory.h"
41#include "pool.h"
42#include "string2.h"
43
45
55int mutt_mb_charlen(const char *s, int *width)
56{
57 if (!s || (*s == '\0'))
58 return 0;
59
60 wchar_t wc = 0;
61 mbstate_t mbstate = { 0 };
62
63 size_t n = mutt_str_len(s);
64 size_t k = mbrtowc(&wc, s, n, &mbstate);
65 if (width)
66 *width = wcwidth(wc);
67 return ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL)) ? -1 : k;
68}
69
82bool mutt_mb_get_initials(const char *name, char *buf, size_t buflen)
83{
84 if (!name || !buf)
85 return false;
86
87 while (*name)
88 {
89 /* Char's length in bytes */
90 int clen = mutt_mb_charlen(name, NULL);
91 if (clen < 1)
92 return false;
93
94 /* Ignore punctuation at the beginning of a word */
95 if ((clen == 1) && ispunct(*name))
96 {
97 name++;
98 continue;
99 }
100
101 if (clen >= buflen)
102 return false;
103
104 /* Copy one multibyte character */
105 buflen -= clen;
106 while (clen--)
107 *buf++ = *name++;
108
109 /* Skip to end-of-word */
110 for (; *name; name += clen)
111 {
112 clen = mutt_mb_charlen(name, NULL);
113 if (clen < 1)
114 return false;
115 if ((clen == 1) && (isspace(*name) || (*name == '-')))
116 break;
117 }
118
119 /* Skip any whitespace, or hyphens */
120 while (*name && (isspace(*name) || (*name == '-')))
121 name++;
122 }
123
124 *buf = '\0';
125 return true;
126}
127
137int mutt_mb_width(const char *str, int col, bool indent)
138{
139 if (!str || !*str)
140 return 0;
141
142 bool nl = false;
143 int total_width = 0;
144 mbstate_t mbstate = { 0 };
145
146 size_t str_len = mutt_str_len(str);
147
148 while (*str && (str_len > 0))
149 {
150 wchar_t wc = L'\0';
151 size_t consumed = mbrtowc(&wc, str, str_len, &mbstate);
152 if (consumed == 0)
153 break;
154
155 if (consumed == ICONV_ILLEGAL_SEQ)
156 {
157 memset(&mbstate, 0, sizeof(mbstate));
158 wc = ReplacementChar;
159 consumed = 1;
160 }
161 else if (consumed == ICONV_BUF_TOO_SMALL)
162 {
163 wc = ReplacementChar;
164 consumed = str_len;
165 }
166
167 int wchar_width = wcwidth(wc);
168 if (wchar_width < 0)
169 wchar_width = 1;
170
171 if ((wc == L'\t') || (nl && (wc == L' ')))
172 {
173 /* correctly calc tab stop, even for sending as the line should look
174 * pretty on the receiving end */
175 nl = false;
176 wchar_width = 8 - (col % 8);
177 }
178 else if (indent && (wc == '\n'))
179 {
180 /* track newlines for display-case: if we have a space after a newline,
181 * assume 8 spaces as for display we always tab-fold */
182 nl = true;
183 }
184
185 total_width += wchar_width;
186 str += consumed;
187 str_len -= consumed;
188 }
189
190 return total_width;
191}
192
198int mutt_mb_wcwidth(wchar_t wc)
199{
200 int n = wcwidth(wc);
201 if (IsWPrint(wc) && (n > 0))
202 return n;
203 if (!(wc & ~0x7f))
204 return 2;
205 if (!(wc & ~0xffff))
206 return 6;
207 return 10;
208}
209
216int mutt_mb_wcswidth(const wchar_t *s, size_t n)
217{
218 if (!s)
219 return 0;
220
221 int w = 0;
222 while (n--)
223 w += mutt_mb_wcwidth(*s++);
224 return w;
225}
226
237size_t mutt_mb_width_ceiling(const wchar_t *s, size_t n, int w1)
238{
239 if (!s)
240 return 0;
241
242 const wchar_t *s0 = s;
243 int w = 0;
244 for (; n; s++, n--)
245 if ((w += mutt_mb_wcwidth(*s)) > w1)
246 break;
247 return s - s0;
248}
249
256void buf_mb_wcstombs(struct Buffer *dest, const wchar_t *wstr, size_t wlen)
257{
258 if (!dest || !wstr)
259 return;
260
261 // Give ourselves 4 utf-8 bytes per wide character
262 buf_alloc(dest, 4 * wlen);
263
264 mbstate_t mbstate = { 0 };
265 size_t k = 0;
266
267 char *buf = dest->data;
268 size_t buflen = dest->dsize;
269
270 for (; (wlen > 0) && (buflen >= MB_LEN_MAX); buf += k, buflen -= k, wstr++, wlen--)
271 {
272 k = wcrtomb(buf, *wstr, &mbstate);
273 if (k == ICONV_ILLEGAL_SEQ)
274 break;
275 if (*wstr == L'\0')
276 break;
277 }
278
279 *buf = '\0';
280 buf_fix_dptr(dest);
281}
282
291size_t mutt_mb_mbstowcs(wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)
292{
293 if (!pwbuf || !pwbuflen || !buf)
294 return 0;
295
296 wchar_t wc = 0;
297 mbstate_t mbstate = { 0 };
298 size_t k;
299 wchar_t *wbuf = *pwbuf;
300 size_t wbuflen = *pwbuflen;
301
302 while (*buf != '\0')
303 {
304 memset(&mbstate, 0, sizeof(mbstate));
305 for (; (k = mbrtowc(&wc, buf, MB_LEN_MAX, &mbstate)) &&
306 (k != ICONV_ILLEGAL_SEQ) && (k != ICONV_BUF_TOO_SMALL);
307 buf += k)
308 {
309 if (i >= wbuflen)
310 {
311 wbuflen = i + 20;
312 MUTT_MEM_REALLOC(&wbuf, wbuflen, wchar_t);
313 }
314 wbuf[i++] = wc;
315 }
316 if ((*buf != '\0') && ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL)))
317 {
318 if (i >= wbuflen)
319 {
320 wbuflen = i + 20;
321 MUTT_MEM_REALLOC(&wbuf, wbuflen, wchar_t);
322 }
323 wbuf[i++] = ReplacementChar;
324 buf++;
325 }
326 }
327 *pwbuf = wbuf;
328 *pwbuflen = wbuflen;
329 return i;
330}
331
340bool mutt_mb_is_shell_char(wchar_t ch)
341{
342 static const wchar_t shell_chars[] = L"<>&()$?*;{}| "; /* ! not included because it can be part of a pathname in NeoMutt */
343 return wcschr(shell_chars, ch);
344}
345
354bool mutt_mb_is_lower(const char *s)
355{
356 if (!s)
357 return false;
358
359 wchar_t wc = 0;
360 mbstate_t mbstate = { 0 };
361 size_t l;
362
363 memset(&mbstate, 0, sizeof(mbstate));
364 size_t n = mutt_str_len(s);
365
366 for (; (n > 0) && (*s != '\0') && (l = mbrtowc(&wc, s, n, &mbstate)) != 0; s += l, n -= l)
367 {
368 if ((l == ICONV_BUF_TOO_SMALL) || (l == ICONV_ILLEGAL_SEQ))
369 return false; // error; assume upper-case
370 if (iswalpha((wint_t) wc) && iswupper((wint_t) wc))
371 return false; // upper-case
372 }
373
374 return true; // lower-case
375}
376
386{
387 if ((wc == (wchar_t) 0x00ad) || /* soft hyphen */
388 (wc == (wchar_t) 0x200e) || /* left-to-right mark */
389 (wc == (wchar_t) 0x200f) || /* right-to-left mark */
390 (wc == (wchar_t) 0xfeff)) /* zero width no-break space */
391 {
392 return true;
393 }
394
395 /* left-to-right isolate, right-to-left isolate, first strong isolate,
396 * pop directional isolate */
397 if ((wc >= (wchar_t) 0x2066) && (wc <= (wchar_t) 0x2069))
398 return true;
399
400 /* left-to-right embedding, right-to-left embedding, pop directional formatting,
401 * left-to-right override, right-to-left override */
402 if ((wc >= (wchar_t) 0x202a) && (wc <= (wchar_t) 0x202e))
403 return true;
404
405 /* arabic letter mark */
406 if (wc == (wchar_t) 0x061c)
407 return true;
408
409 return false;
410}
411
424{
425 if (!s || !*s)
426 return -1;
427
428 wchar_t wc = 0;
429 size_t k, k2;
430 char scratch[MB_LEN_MAX + 1];
431 char *p = *s;
432 mbstate_t mbstate1 = { 0 };
433 mbstate_t mbstate2 = { 0 };
434
435 struct Buffer *buf = buf_pool_get();
436 for (; (k = mbrtowc(&wc, p, MB_LEN_MAX, &mbstate1)); p += k)
437 {
438 if ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL))
439 {
440 k = 1;
441 memset(&mbstate1, 0, sizeof(mbstate1));
442 wc = ReplacementChar;
443 }
444 if (CharsetIsUtf8 && IsBOM(wc))
445 {
446 continue;
447 }
448 if (!IsWPrint(wc))
449 wc = '?';
451 continue;
452 k2 = wcrtomb(scratch, wc, &mbstate2);
453 scratch[k2] = '\0';
454 buf_addstr(buf, scratch);
455 }
456 FREE(s);
457
458 if (buf_is_empty(buf))
459 *s = MUTT_MEM_CALLOC(1, char); // Fake empty string
460 else
461 *s = buf_strdup(buf);
462
463 buf_pool_release(&buf);
464 return 0;
465}
bool buf_is_empty(const struct Buffer *buf)
Is the Buffer empty?
Definition: buffer.c:291
void buf_fix_dptr(struct Buffer *buf)
Move the dptr to end of the Buffer.
Definition: buffer.c:182
size_t buf_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:226
char * buf_strdup(const struct Buffer *buf)
Copy a Buffer's string.
Definition: buffer.c:571
void buf_alloc(struct Buffer *buf, size_t new_size)
Make sure a buffer can store at least new_size bytes.
Definition: buffer.c:337
General purpose object for storing and parsing strings.
int mutt_mb_charlen(const char *s, int *width)
Count the bytes in a (multibyte) character.
Definition: mbyte.c:55
bool mutt_mb_is_shell_char(wchar_t ch)
Is character not typically part of a pathname.
Definition: mbyte.c:340
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
Definition: mbyte.c:423
size_t mutt_mb_width_ceiling(const wchar_t *s, size_t n, int w1)
Keep the end of the string on-screen.
Definition: mbyte.c:237
bool OptLocales
(pseudo) set if user has valid locale definition
Definition: mbyte.c:44
bool mutt_mb_get_initials(const char *name, char *buf, size_t buflen)
Turn a name into initials.
Definition: mbyte.c:82
bool mutt_mb_is_display_corrupting_utf8(wchar_t wc)
Will this character corrupt the display?
Definition: mbyte.c:385
size_t mutt_mb_mbstowcs(wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)
Convert a string from multibyte to wide characters.
Definition: mbyte.c:291
int mutt_mb_wcswidth(const wchar_t *s, size_t n)
Measure the screen width of a string.
Definition: mbyte.c:216
bool mutt_mb_is_lower(const char *s)
Does a multi-byte string contain only lowercase characters?
Definition: mbyte.c:354
int mutt_mb_width(const char *str, int col, bool indent)
Measure a string's display width (in screen columns)
Definition: mbyte.c:137
void buf_mb_wcstombs(struct Buffer *dest, const wchar_t *wstr, size_t wlen)
Convert a string from wide to multibyte characters.
Definition: mbyte.c:256
int mutt_mb_wcwidth(wchar_t wc)
Measure the screen width of a character.
Definition: mbyte.c:198
Multi-byte String manipulation functions.
#define IsBOM(wc)
Definition: mbyte.h:43
#define IsWPrint(wc)
Definition: mbyte.h:41
Memory management wrappers.
#define FREE(x)
Definition: memory.h:55
#define MUTT_MEM_CALLOC(n, type)
Definition: memory.h:40
#define MUTT_MEM_REALLOC(pptr, n, type)
Definition: memory.h:43
bool CharsetIsUtf8
Is the user's current character set utf-8?
Definition: charset.c:66
wchar_t ReplacementChar
When a Unicode character can't be displayed, use this instead.
Definition: charset.c:61
Conversion between different character encodings.
#define ICONV_BUF_TOO_SMALL
Error value for iconv() - Buffer too small.
Definition: charset.h:98
#define ICONV_ILLEGAL_SEQ
Error value for iconv() - Illegal sequence.
Definition: charset.h:96
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:496
struct Buffer * buf_pool_get(void)
Get a Buffer from the pool.
Definition: pool.c:82
void buf_pool_release(struct Buffer **ptr)
Return a Buffer to the pool.
Definition: pool.c:96
A global pool of Buffers.
String manipulation functions.
String manipulation buffer.
Definition: buffer.h:36
size_t dsize
Length of data.
Definition: buffer.h:39
char * data
Pointer to data.
Definition: buffer.h:37