NeoMutt  2022-04-29-247-gc6aae8
Teaching an old dog new tricks
DOXYGEN
mbyte.c
Go to the documentation of this file.
1
29#include "config.h"
30#include <ctype.h>
31#include <limits.h>
32#include <stdbool.h>
33#include <stdlib.h>
34#include <string.h>
35#include <wchar.h>
36#include <wctype.h>
37#include "mbyte.h"
38#include "buffer.h"
39#include "charset.h"
40#include "memory.h"
41#include "string2.h"
42
44
54int mutt_mb_charlen(const char *s, int *width)
55{
56 if (!s || (*s == '\0'))
57 return 0;
58
59 wchar_t wc = 0;
60 mbstate_t mbstate = { 0 };
61 size_t k, n;
62
63 n = mutt_str_len(s);
64 k = mbrtowc(&wc, s, n, &mbstate);
65 if (width)
66 *width = wcwidth(wc);
67 return ((k == (size_t) (-1)) || (k == (size_t) (-2))) ? -1 : k;
68}
69
82bool mutt_mb_get_initials(const char *name, char *buf, size_t buflen)
83{
84 if (!name || !buf)
85 return false;
86
87 while (*name)
88 {
89 /* Char's length in bytes */
90 int clen = mutt_mb_charlen(name, NULL);
91 if (clen < 1)
92 return false;
93
94 /* Ignore punctuation at the beginning of a word */
95 if ((clen == 1) && ispunct(*name))
96 {
97 name++;
98 continue;
99 }
100
101 if (clen >= buflen)
102 return false;
103
104 /* Copy one multibyte character */
105 buflen -= clen;
106 while (clen--)
107 *buf++ = *name++;
108
109 /* Skip to end-of-word */
110 for (; *name; name += clen)
111 {
112 clen = mutt_mb_charlen(name, NULL);
113 if (clen < 1)
114 return false;
115 if ((clen == 1) && (isspace(*name) || (*name == '-')))
116 break;
117 }
118
119 /* Skip any whitespace, or hyphens */
120 while (*name && (isspace(*name) || (*name == '-')))
121 name++;
122 }
123
124 *buf = '\0';
125 return true;
126}
127
137int mutt_mb_width(const char *str, int col, bool display)
138{
139 wchar_t wc = 0;
140 int l, w = 0, nl = 0;
141 const char *p = str;
142
143 while (p && *p)
144 {
145 if (mbtowc(&wc, p, MB_CUR_MAX) >= 0)
146 {
147 l = wcwidth(wc);
148 if (l < 0)
149 l = 1;
150 /* correctly calc tab stop, even for sending as the
151 * line should look pretty on the receiving end */
152 if ((wc == L'\t') || (nl && (wc == L' ')))
153 {
154 nl = 0;
155 l = 8 - (col % 8);
156 }
157 /* track newlines for display-case: if we have a space
158 * after a newline, assume 8 spaces as for display we
159 * always tab-fold */
160 else if (display && (wc == '\n'))
161 nl = 1;
162 }
163 else
164 l = 1;
165 w += l;
166 p++;
167 }
168 return w;
169}
170
176int mutt_mb_wcwidth(wchar_t wc)
177{
178 int n = wcwidth(wc);
179 if (IsWPrint(wc) && (n > 0))
180 return n;
181 if (!(wc & ~0x7f))
182 return 2;
183 if (!(wc & ~0xffff))
184 return 6;
185 return 10;
186}
187
194int mutt_mb_wcswidth(const wchar_t *s, size_t n)
195{
196 if (!s)
197 return 0;
198
199 int w = 0;
200 while (n--)
201 w += mutt_mb_wcwidth(*s++);
202 return w;
203}
204
215size_t mutt_mb_width_ceiling(const wchar_t *s, size_t n, int w1)
216{
217 if (!s)
218 return 0;
219
220 const wchar_t *s0 = s;
221 int w = 0;
222 for (; n; s++, n--)
223 if ((w += mutt_mb_wcwidth(*s)) > w1)
224 break;
225 return s - s0;
226}
227
235void mutt_mb_wcstombs(char *dest, size_t dlen, const wchar_t *src, size_t slen)
236{
237 if (!dest || !src)
238 return;
239
240 mbstate_t mbstate = { 0 };
241 size_t k;
242
243 /* First convert directly into the destination buffer */
244 for (; slen && (dlen >= MB_LEN_MAX); dest += k, dlen -= k, src++, slen--)
245 {
246 k = wcrtomb(dest, *src, &mbstate);
247 if (k == (size_t) (-1))
248 break;
249 }
250
251 /* If this works, we can stop now */
252 if (dlen >= MB_LEN_MAX)
253 {
254 dest += wcrtomb(dest, 0, &mbstate);
255 return;
256 }
257
258 /* Otherwise convert any remaining data into a local buffer */
259 {
260 char buf[3 * MB_LEN_MAX];
261 char *p = buf;
262
263 for (; slen && p - buf < dlen; p += k, src++, slen--)
264 {
265 k = wcrtomb(p, *src, &mbstate);
266 if (k == (size_t) (-1))
267 break;
268 }
269 p += wcrtomb(p, 0, &mbstate);
270
271 /* If it fits into the destination buffer, we can stop now */
272 if (p - buf <= dlen)
273 {
274 memcpy(dest, buf, p - buf);
275 return;
276 }
277
278 /* Otherwise we truncate the string in an ugly fashion */
279 memcpy(dest, buf, dlen);
280 dest[dlen - 1] = '\0'; /* assume original dlen > 0 */
281 }
282}
283
292size_t mutt_mb_mbstowcs(wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)
293{
294 if (!pwbuf || !pwbuflen || !buf)
295 return 0;
296
297 wchar_t wc = 0;
298 mbstate_t mbstate = { 0 };
299 size_t k;
300 wchar_t *wbuf = *pwbuf;
301 size_t wbuflen = *pwbuflen;
302
303 while (*buf != '\0')
304 {
305 memset(&mbstate, 0, sizeof(mbstate));
306 for (; (k = mbrtowc(&wc, buf, MB_LEN_MAX, &mbstate)) &&
307 k != (size_t) (-1) && k != (size_t) (-2);
308 buf += k)
309 {
310 if (i >= wbuflen)
311 {
312 wbuflen = i + 20;
313 mutt_mem_realloc(&wbuf, wbuflen * sizeof(*wbuf));
314 }
315 wbuf[i++] = wc;
316 }
317 if ((*buf != '\0') && ((k == (size_t) -1) || (k == (size_t) -2)))
318 {
319 if (i >= wbuflen)
320 {
321 wbuflen = i + 20;
322 mutt_mem_realloc(&wbuf, wbuflen * sizeof(*wbuf));
323 }
324 wbuf[i++] = ReplacementChar;
325 buf++;
326 }
327 }
328 *pwbuf = wbuf;
329 *pwbuflen = wbuflen;
330 return i;
331}
332
341bool mutt_mb_is_shell_char(wchar_t ch)
342{
343 static const wchar_t shell_chars[] = L"<>&()$?*;{}| "; /* ! not included because it can be part of a pathname in NeoMutt */
344 return wcschr(shell_chars, ch);
345}
346
355bool mutt_mb_is_lower(const char *s)
356{
357 if (!s)
358 return false;
359
360 wchar_t wc = 0;
361 mbstate_t mbstate = { 0 };
362 size_t l;
363
364 memset(&mbstate, 0, sizeof(mbstate));
365
366 for (; (l = mbrtowc(&wc, s, MB_CUR_MAX, &mbstate)) != 0; s += l)
367 {
368 if (l == (size_t) -2)
369 continue; /* shift sequences */
370 if (l == (size_t) -1)
371 return false;
372 if (iswalpha((wint_t) wc) && iswupper((wint_t) wc))
373 return false;
374 }
375
376 return true;
377}
378
388{
389 if ((wc == (wchar_t) 0x00ad) || /* soft hyphen */
390 (wc == (wchar_t) 0x200e) || /* left-to-right mark */
391 (wc == (wchar_t) 0x200f) || /* right-to-left mark */
392 (wc == (wchar_t) 0xfeff)) /* zero width no-break space */
393 {
394 return true;
395 }
396
397 /* left-to-right isolate, right-to-left isolate, first strong isolate,
398 * pop directional isolate */
399 if ((wc >= (wchar_t) 0x2066) && (wc <= (wchar_t) 0x2069))
400 return true;
401
402 /* left-to-right embedding, right-to-left embedding, pop directional formatting,
403 * left-to-right override, right-to-left override */
404 if ((wc >= (wchar_t) 0x202a) && (wc <= (wchar_t) 0x202e))
405 return true;
406
407 return false;
408}
409
422{
423 if (!s || !*s)
424 return -1;
425
426 wchar_t wc = 0;
427 size_t k, k2;
428 char scratch[MB_LEN_MAX + 1];
429 char *p = *s;
430 mbstate_t mbstate1 = { 0 };
431 mbstate_t mbstate2 = { 0 };
432
433 struct Buffer buf = mutt_buffer_make(0);
434 for (; (k = mbrtowc(&wc, p, MB_LEN_MAX, &mbstate1)); p += k)
435 {
436 if ((k == (size_t) -1) || (k == (size_t) -2))
437 {
438 k = 1;
439 memset(&mbstate1, 0, sizeof(mbstate1));
440 wc = ReplacementChar;
441 }
442 if (!IsWPrint(wc))
443 wc = '?';
445 continue;
446 k2 = wcrtomb(scratch, wc, &mbstate2);
447 scratch[k2] = '\0';
448 mutt_buffer_addstr(&buf, scratch);
449 }
450 FREE(s);
451 *s = buf.data ? buf.data : mutt_mem_calloc(1, 1);
452 return 0;
453}
struct Buffer mutt_buffer_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:67
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:233
General purpose object for storing and parsing strings.
int mutt_mb_charlen(const char *s, int *width)
Count the bytes in a (multibyte) character.
Definition: mbyte.c:54
void mutt_mb_wcstombs(char *dest, size_t dlen, const wchar_t *src, size_t slen)
Convert a string from wide to multibyte characters.
Definition: mbyte.c:235
bool mutt_mb_is_shell_char(wchar_t ch)
Is character not typically part of a pathname.
Definition: mbyte.c:341
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
Definition: mbyte.c:421
size_t mutt_mb_width_ceiling(const wchar_t *s, size_t n, int w1)
Keep the end of the string on-screen.
Definition: mbyte.c:215
bool OptLocales
(pseudo) set if user has valid locale definition
Definition: mbyte.c:43
bool mutt_mb_get_initials(const char *name, char *buf, size_t buflen)
Turn a name into initials.
Definition: mbyte.c:82
bool mutt_mb_is_display_corrupting_utf8(wchar_t wc)
Will this character corrupt the display?
Definition: mbyte.c:387
size_t mutt_mb_mbstowcs(wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)
Convert a string from multibyte to wide characters.
Definition: mbyte.c:292
int mutt_mb_width(const char *str, int col, bool display)
Measure a string's display width (in screen columns)
Definition: mbyte.c:137
int mutt_mb_wcswidth(const wchar_t *s, size_t n)
Measure the screen width of a string.
Definition: mbyte.c:194
bool mutt_mb_is_lower(const char *s)
Does a multi-byte string contain only lowercase characters?
Definition: mbyte.c:355
int mutt_mb_wcwidth(wchar_t wc)
Measure the screen width of a character.
Definition: mbyte.c:176
Multi-byte String manipulation functions.
#define IsWPrint(wc)
Definition: mbyte.h:39
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
Memory management wrappers.
#define FREE(x)
Definition: memory.h:43
bool CharsetIsUtf8
Is the user's current character set utf-8?
Definition: charset.c:62
wchar_t ReplacementChar
When a Unicode character can't be displayed, use this instead.
Definition: charset.c:57
Conversion between different character encodings.
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:567
String manipulation functions.
String manipulation buffer.
Definition: buffer.h:34
char * data
Pointer to data.
Definition: buffer.h:35