NeoMutt  2023-03-22-27-g3cb248
Teaching an old dog new tricks
DOXYGEN
mbyte.c
Go to the documentation of this file.
1
29#include "config.h"
30#include <ctype.h>
31#include <limits.h>
32#include <stdbool.h>
33#include <stdlib.h>
34#include <string.h>
35#include <wchar.h>
36#include <wctype.h>
37#include "mbyte.h"
38#include "buffer.h"
39#include "charset.h"
40#include "memory.h"
41#include "string2.h"
42
44
54int mutt_mb_charlen(const char *s, int *width)
55{
56 if (!s || (*s == '\0'))
57 return 0;
58
59 wchar_t wc = 0;
60 mbstate_t mbstate = { 0 };
61 size_t k, n;
62
63 n = mutt_str_len(s);
64 k = mbrtowc(&wc, s, n, &mbstate);
65 if (width)
66 *width = wcwidth(wc);
67 return ((k == (size_t) (-1)) || (k == (size_t) (-2))) ? -1 : k;
68}
69
82bool mutt_mb_get_initials(const char *name, char *buf, size_t buflen)
83{
84 if (!name || !buf)
85 return false;
86
87 while (*name)
88 {
89 /* Char's length in bytes */
90 int clen = mutt_mb_charlen(name, NULL);
91 if (clen < 1)
92 return false;
93
94 /* Ignore punctuation at the beginning of a word */
95 if ((clen == 1) && ispunct(*name))
96 {
97 name++;
98 continue;
99 }
100
101 if (clen >= buflen)
102 return false;
103
104 /* Copy one multibyte character */
105 buflen -= clen;
106 while (clen--)
107 *buf++ = *name++;
108
109 /* Skip to end-of-word */
110 for (; *name; name += clen)
111 {
112 clen = mutt_mb_charlen(name, NULL);
113 if (clen < 1)
114 return false;
115 if ((clen == 1) && (isspace(*name) || (*name == '-')))
116 break;
117 }
118
119 /* Skip any whitespace, or hyphens */
120 while (*name && (isspace(*name) || (*name == '-')))
121 name++;
122 }
123
124 *buf = '\0';
125 return true;
126}
127
137int mutt_mb_width(const char *str, int col, bool display)
138{
139 wchar_t wc = 0;
140 int l, w = 0, nl = 0;
141 const char *p = str;
142
143 while (p && *p)
144 {
145 if (mbtowc(&wc, p, MB_CUR_MAX) >= 0)
146 {
147 l = wcwidth(wc);
148 if (l < 0)
149 l = 1;
150 /* correctly calc tab stop, even for sending as the
151 * line should look pretty on the receiving end */
152 if ((wc == L'\t') || (nl && (wc == L' ')))
153 {
154 nl = 0;
155 l = 8 - (col % 8);
156 }
157 /* track newlines for display-case: if we have a space
158 * after a newline, assume 8 spaces as for display we
159 * always tab-fold */
160 else if (display && (wc == '\n'))
161 nl = 1;
162 }
163 else
164 {
165 l = 1;
166 }
167 w += l;
168 p++;
169 }
170 return w;
171}
172
178int mutt_mb_wcwidth(wchar_t wc)
179{
180 int n = wcwidth(wc);
181 if (IsWPrint(wc) && (n > 0))
182 return n;
183 if (!(wc & ~0x7f))
184 return 2;
185 if (!(wc & ~0xffff))
186 return 6;
187 return 10;
188}
189
196int mutt_mb_wcswidth(const wchar_t *s, size_t n)
197{
198 if (!s)
199 return 0;
200
201 int w = 0;
202 while (n--)
203 w += mutt_mb_wcwidth(*s++);
204 return w;
205}
206
217size_t mutt_mb_width_ceiling(const wchar_t *s, size_t n, int w1)
218{
219 if (!s)
220 return 0;
221
222 const wchar_t *s0 = s;
223 int w = 0;
224 for (; n; s++, n--)
225 if ((w += mutt_mb_wcwidth(*s)) > w1)
226 break;
227 return s - s0;
228}
229
237void mutt_mb_wcstombs(char *dest, size_t dlen, const wchar_t *src, size_t slen)
238{
239 if (!dest || !src)
240 return;
241
242 mbstate_t mbstate = { 0 };
243 size_t k;
244
245 /* First convert directly into the destination buffer */
246 for (; slen && (dlen >= MB_LEN_MAX); dest += k, dlen -= k, src++, slen--)
247 {
248 k = wcrtomb(dest, *src, &mbstate);
249 if (k == (size_t) (-1))
250 break;
251 }
252
253 /* If this works, we can stop now */
254 if (dlen >= MB_LEN_MAX)
255 {
256 dest += wcrtomb(dest, 0, &mbstate);
257 return;
258 }
259
260 /* Otherwise convert any remaining data into a local buffer */
261 {
262 char buf[3 * MB_LEN_MAX];
263 char *p = buf;
264
265 for (; slen && p - buf < dlen; p += k, src++, slen--)
266 {
267 k = wcrtomb(p, *src, &mbstate);
268 if (k == (size_t) (-1))
269 break;
270 }
271 p += wcrtomb(p, 0, &mbstate);
272
273 /* If it fits into the destination buffer, we can stop now */
274 if (p - buf <= dlen)
275 {
276 memcpy(dest, buf, p - buf);
277 return;
278 }
279
280 /* Otherwise we truncate the string in an ugly fashion */
281 memcpy(dest, buf, dlen);
282 dest[dlen - 1] = '\0'; /* assume original dlen > 0 */
283 }
284}
285
294size_t mutt_mb_mbstowcs(wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)
295{
296 if (!pwbuf || !pwbuflen || !buf)
297 return 0;
298
299 wchar_t wc = 0;
300 mbstate_t mbstate = { 0 };
301 size_t k;
302 wchar_t *wbuf = *pwbuf;
303 size_t wbuflen = *pwbuflen;
304
305 while (*buf != '\0')
306 {
307 memset(&mbstate, 0, sizeof(mbstate));
308 for (; (k = mbrtowc(&wc, buf, MB_LEN_MAX, &mbstate)) &&
309 k != (size_t) (-1) && k != (size_t) (-2);
310 buf += k)
311 {
312 if (i >= wbuflen)
313 {
314 wbuflen = i + 20;
315 mutt_mem_realloc(&wbuf, wbuflen * sizeof(*wbuf));
316 }
317 wbuf[i++] = wc;
318 }
319 if ((*buf != '\0') && ((k == (size_t) -1) || (k == (size_t) -2)))
320 {
321 if (i >= wbuflen)
322 {
323 wbuflen = i + 20;
324 mutt_mem_realloc(&wbuf, wbuflen * sizeof(*wbuf));
325 }
326 wbuf[i++] = ReplacementChar;
327 buf++;
328 }
329 }
330 *pwbuf = wbuf;
331 *pwbuflen = wbuflen;
332 return i;
333}
334
343bool mutt_mb_is_shell_char(wchar_t ch)
344{
345 static const wchar_t shell_chars[] = L"<>&()$?*;{}| "; /* ! not included because it can be part of a pathname in NeoMutt */
346 return wcschr(shell_chars, ch);
347}
348
357bool mutt_mb_is_lower(const char *s)
358{
359 if (!s)
360 return false;
361
362 wchar_t wc = 0;
363 mbstate_t mbstate = { 0 };
364 size_t l;
365
366 memset(&mbstate, 0, sizeof(mbstate));
367
368 for (; (l = mbrtowc(&wc, s, MB_CUR_MAX, &mbstate)) != 0; s += l)
369 {
370 if (l == (size_t) -2)
371 continue; /* shift sequences */
372 if (l == (size_t) -1)
373 return false;
374 if (iswalpha((wint_t) wc) && iswupper((wint_t) wc))
375 return false;
376 }
377
378 return true;
379}
380
390{
391 if ((wc == (wchar_t) 0x00ad) || /* soft hyphen */
392 (wc == (wchar_t) 0x200e) || /* left-to-right mark */
393 (wc == (wchar_t) 0x200f) || /* right-to-left mark */
394 (wc == (wchar_t) 0xfeff)) /* zero width no-break space */
395 {
396 return true;
397 }
398
399 /* left-to-right isolate, right-to-left isolate, first strong isolate,
400 * pop directional isolate */
401 if ((wc >= (wchar_t) 0x2066) && (wc <= (wchar_t) 0x2069))
402 return true;
403
404 /* left-to-right embedding, right-to-left embedding, pop directional formatting,
405 * left-to-right override, right-to-left override */
406 if ((wc >= (wchar_t) 0x202a) && (wc <= (wchar_t) 0x202e))
407 return true;
408
409 return false;
410}
411
424{
425 if (!s || !*s)
426 return -1;
427
428 wchar_t wc = 0;
429 size_t k, k2;
430 char scratch[MB_LEN_MAX + 1];
431 char *p = *s;
432 mbstate_t mbstate1 = { 0 };
433 mbstate_t mbstate2 = { 0 };
434
435 struct Buffer buf = mutt_buffer_make(0);
436 for (; (k = mbrtowc(&wc, p, MB_LEN_MAX, &mbstate1)); p += k)
437 {
438 if ((k == (size_t) -1) || (k == (size_t) -2))
439 {
440 k = 1;
441 memset(&mbstate1, 0, sizeof(mbstate1));
442 wc = ReplacementChar;
443 }
444 if (!IsWPrint(wc))
445 wc = '?';
447 continue;
448 k2 = wcrtomb(scratch, wc, &mbstate2);
449 scratch[k2] = '\0';
450 mutt_buffer_addstr(&buf, scratch);
451 }
452 FREE(s);
453 *s = buf.data ? buf.data : mutt_mem_calloc(1, 1);
454 return 0;
455}
struct Buffer mutt_buffer_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:67
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:233
General purpose object for storing and parsing strings.
int mutt_mb_charlen(const char *s, int *width)
Count the bytes in a (multibyte) character.
Definition: mbyte.c:54
void mutt_mb_wcstombs(char *dest, size_t dlen, const wchar_t *src, size_t slen)
Convert a string from wide to multibyte characters.
Definition: mbyte.c:237
bool mutt_mb_is_shell_char(wchar_t ch)
Is character not typically part of a pathname.
Definition: mbyte.c:343
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
Definition: mbyte.c:423
size_t mutt_mb_width_ceiling(const wchar_t *s, size_t n, int w1)
Keep the end of the string on-screen.
Definition: mbyte.c:217
bool OptLocales
(pseudo) set if user has valid locale definition
Definition: mbyte.c:43
bool mutt_mb_get_initials(const char *name, char *buf, size_t buflen)
Turn a name into initials.
Definition: mbyte.c:82
bool mutt_mb_is_display_corrupting_utf8(wchar_t wc)
Will this character corrupt the display?
Definition: mbyte.c:389
size_t mutt_mb_mbstowcs(wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)
Convert a string from multibyte to wide characters.
Definition: mbyte.c:294
int mutt_mb_width(const char *str, int col, bool display)
Measure a string's display width (in screen columns)
Definition: mbyte.c:137
int mutt_mb_wcswidth(const wchar_t *s, size_t n)
Measure the screen width of a string.
Definition: mbyte.c:196
bool mutt_mb_is_lower(const char *s)
Does a multi-byte string contain only lowercase characters?
Definition: mbyte.c:357
int mutt_mb_wcwidth(wchar_t wc)
Measure the screen width of a character.
Definition: mbyte.c:178
Multi-byte String manipulation functions.
#define IsWPrint(wc)
Definition: mbyte.h:39
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
Memory management wrappers.
#define FREE(x)
Definition: memory.h:43
bool CharsetIsUtf8
Is the user's current character set utf-8?
Definition: charset.c:60
wchar_t ReplacementChar
When a Unicode character can't be displayed, use this instead.
Definition: charset.c:55
Conversion between different character encodings.
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:567
String manipulation functions.
String manipulation buffer.
Definition: buffer.h:34
char * data
Pointer to data.
Definition: buffer.h:35