NeoMutt  2023-11-03-85-g512e01
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
mbyte.c
Go to the documentation of this file.
1
29#include "config.h"
30#include <ctype.h>
31#include <limits.h>
32#include <stdbool.h>
33#include <stdlib.h>
34#include <string.h>
35#include <wchar.h>
36#include <wctype.h>
37#include "mbyte.h"
38#include "buffer.h"
39#include "charset.h"
40#include "memory.h"
41#include "string2.h"
42
44
54int mutt_mb_charlen(const char *s, int *width)
55{
56 if (!s || (*s == '\0'))
57 return 0;
58
59 wchar_t wc = 0;
60 mbstate_t mbstate = { 0 };
61
62 size_t n = mutt_str_len(s);
63 size_t k = mbrtowc(&wc, s, n, &mbstate);
64 if (width)
65 *width = wcwidth(wc);
66 return ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL)) ? -1 : k;
67}
68
81bool mutt_mb_get_initials(const char *name, char *buf, size_t buflen)
82{
83 if (!name || !buf)
84 return false;
85
86 while (*name)
87 {
88 /* Char's length in bytes */
89 int clen = mutt_mb_charlen(name, NULL);
90 if (clen < 1)
91 return false;
92
93 /* Ignore punctuation at the beginning of a word */
94 if ((clen == 1) && ispunct(*name))
95 {
96 name++;
97 continue;
98 }
99
100 if (clen >= buflen)
101 return false;
102
103 /* Copy one multibyte character */
104 buflen -= clen;
105 while (clen--)
106 *buf++ = *name++;
107
108 /* Skip to end-of-word */
109 for (; *name; name += clen)
110 {
111 clen = mutt_mb_charlen(name, NULL);
112 if (clen < 1)
113 return false;
114 if ((clen == 1) && (isspace(*name) || (*name == '-')))
115 break;
116 }
117
118 /* Skip any whitespace, or hyphens */
119 while (*name && (isspace(*name) || (*name == '-')))
120 name++;
121 }
122
123 *buf = '\0';
124 return true;
125}
126
136int mutt_mb_width(const char *str, int col, bool indent)
137{
138 if (!str || !*str)
139 return 0;
140
141 bool nl = false;
142 int total_width = 0;
143 mbstate_t mbstate = { 0 };
144
145 size_t str_len = mutt_str_len(str);
146
147 while (*str && (str_len > 0))
148 {
149 wchar_t wc = L'\0';
150 size_t consumed = mbrtowc(&wc, str, str_len, &mbstate);
151 if (consumed == 0)
152 break;
153
154 if (consumed == ICONV_ILLEGAL_SEQ)
155 {
156 memset(&mbstate, 0, sizeof(mbstate));
157 wc = ReplacementChar;
158 consumed = 1;
159 }
160 else if (consumed == ICONV_BUF_TOO_SMALL)
161 {
162 wc = ReplacementChar;
163 consumed = str_len;
164 }
165
166 int wchar_width = wcwidth(wc);
167 if (wchar_width < 0)
168 wchar_width = 1;
169
170 if ((wc == L'\t') || (nl && (wc == L' ')))
171 {
172 /* correctly calc tab stop, even for sending as the line should look
173 * pretty on the receiving end */
174 nl = false;
175 wchar_width = 8 - (col % 8);
176 }
177 else if (indent && (wc == '\n'))
178 {
179 /* track newlines for display-case: if we have a space after a newline,
180 * assume 8 spaces as for display we always tab-fold */
181 nl = true;
182 }
183
184 total_width += wchar_width;
185 str += consumed;
186 str_len -= consumed;
187 }
188
189 return total_width;
190}
191
197int mutt_mb_wcwidth(wchar_t wc)
198{
199 int n = wcwidth(wc);
200 if (IsWPrint(wc) && (n > 0))
201 return n;
202 if (!(wc & ~0x7f))
203 return 2;
204 if (!(wc & ~0xffff))
205 return 6;
206 return 10;
207}
208
215int mutt_mb_wcswidth(const wchar_t *s, size_t n)
216{
217 if (!s)
218 return 0;
219
220 int w = 0;
221 while (n--)
222 w += mutt_mb_wcwidth(*s++);
223 return w;
224}
225
236size_t mutt_mb_width_ceiling(const wchar_t *s, size_t n, int w1)
237{
238 if (!s)
239 return 0;
240
241 const wchar_t *s0 = s;
242 int w = 0;
243 for (; n; s++, n--)
244 if ((w += mutt_mb_wcwidth(*s)) > w1)
245 break;
246 return s - s0;
247}
248
255void buf_mb_wcstombs(struct Buffer *dest, const wchar_t *wstr, size_t wlen)
256{
257 if (!dest || !wstr)
258 return;
259
260 // Give ourselves 4 utf-8 bytes per wide character
261 buf_alloc(dest, 4 * wlen);
262
263 mbstate_t mbstate = { 0 };
264 size_t k = 0;
265
266 char *buf = dest->data;
267 size_t buflen = dest->dsize;
268
269 for (; (wlen > 0) && (buflen >= MB_LEN_MAX); buf += k, buflen -= k, wstr++, wlen--)
270 {
271 k = wcrtomb(buf, *wstr, &mbstate);
272 if (k == ICONV_ILLEGAL_SEQ)
273 break;
274 if (*wstr == L'\0')
275 break;
276 }
277
278 *buf = '\0';
279 buf_fix_dptr(dest);
280}
281
290size_t mutt_mb_mbstowcs(wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)
291{
292 if (!pwbuf || !pwbuflen || !buf)
293 return 0;
294
295 wchar_t wc = 0;
296 mbstate_t mbstate = { 0 };
297 size_t k;
298 wchar_t *wbuf = *pwbuf;
299 size_t wbuflen = *pwbuflen;
300
301 while (*buf != '\0')
302 {
303 memset(&mbstate, 0, sizeof(mbstate));
304 for (; (k = mbrtowc(&wc, buf, MB_LEN_MAX, &mbstate)) &&
305 (k != ICONV_ILLEGAL_SEQ) && (k != ICONV_BUF_TOO_SMALL);
306 buf += k)
307 {
308 if (i >= wbuflen)
309 {
310 wbuflen = i + 20;
311 mutt_mem_realloc(&wbuf, wbuflen * sizeof(*wbuf));
312 }
313 wbuf[i++] = wc;
314 }
315 if ((*buf != '\0') && ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL)))
316 {
317 if (i >= wbuflen)
318 {
319 wbuflen = i + 20;
320 mutt_mem_realloc(&wbuf, wbuflen * sizeof(*wbuf));
321 }
322 wbuf[i++] = ReplacementChar;
323 buf++;
324 }
325 }
326 *pwbuf = wbuf;
327 *pwbuflen = wbuflen;
328 return i;
329}
330
339bool mutt_mb_is_shell_char(wchar_t ch)
340{
341 static const wchar_t shell_chars[] = L"<>&()$?*;{}| "; /* ! not included because it can be part of a pathname in NeoMutt */
342 return wcschr(shell_chars, ch);
343}
344
353bool mutt_mb_is_lower(const char *s)
354{
355 if (!s)
356 return false;
357
358 wchar_t wc = 0;
359 mbstate_t mbstate = { 0 };
360 size_t l;
361
362 memset(&mbstate, 0, sizeof(mbstate));
363
364 for (; (l = mbrtowc(&wc, s, MB_CUR_MAX, &mbstate)) != 0; s += l)
365 {
366 if (l == ICONV_BUF_TOO_SMALL)
367 continue; /* shift sequences */
368 if (l == ICONV_ILLEGAL_SEQ)
369 return false;
370 if (iswalpha((wint_t) wc) && iswupper((wint_t) wc))
371 return false;
372 }
373
374 return true;
375}
376
386{
387 if ((wc == (wchar_t) 0x00ad) || /* soft hyphen */
388 (wc == (wchar_t) 0x200e) || /* left-to-right mark */
389 (wc == (wchar_t) 0x200f) || /* right-to-left mark */
390 (wc == (wchar_t) 0xfeff)) /* zero width no-break space */
391 {
392 return true;
393 }
394
395 /* left-to-right isolate, right-to-left isolate, first strong isolate,
396 * pop directional isolate */
397 if ((wc >= (wchar_t) 0x2066) && (wc <= (wchar_t) 0x2069))
398 return true;
399
400 /* left-to-right embedding, right-to-left embedding, pop directional formatting,
401 * left-to-right override, right-to-left override */
402 if ((wc >= (wchar_t) 0x202a) && (wc <= (wchar_t) 0x202e))
403 return true;
404
405 return false;
406}
407
420{
421 if (!s || !*s)
422 return -1;
423
424 wchar_t wc = 0;
425 size_t k, k2;
426 char scratch[MB_LEN_MAX + 1];
427 char *p = *s;
428 mbstate_t mbstate1 = { 0 };
429 mbstate_t mbstate2 = { 0 };
430
431 struct Buffer buf = buf_make(0);
432 for (; (k = mbrtowc(&wc, p, MB_LEN_MAX, &mbstate1)); p += k)
433 {
434 if ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL))
435 {
436 k = 1;
437 memset(&mbstate1, 0, sizeof(mbstate1));
438 wc = ReplacementChar;
439 }
440 if (!IsWPrint(wc))
441 wc = '?';
443 continue;
444 k2 = wcrtomb(scratch, wc, &mbstate2);
445 scratch[k2] = '\0';
446 buf_addstr(&buf, scratch);
447 }
448 FREE(s);
449 *s = buf.data ? buf.data : mutt_mem_calloc(1, 1);
450 return 0;
451}
struct Buffer buf_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:70
void buf_fix_dptr(struct Buffer *buf)
Move the dptr to end of the Buffer.
Definition: buffer.c:194
size_t buf_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:238
void buf_alloc(struct Buffer *buf, size_t new_size)
Make sure a buffer can store at least new_size bytes.
Definition: buffer.c:349
General purpose object for storing and parsing strings.
int mutt_mb_charlen(const char *s, int *width)
Count the bytes in a (multibyte) character.
Definition: mbyte.c:54
bool mutt_mb_is_shell_char(wchar_t ch)
Is character not typically part of a pathname.
Definition: mbyte.c:339
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
Definition: mbyte.c:419
size_t mutt_mb_width_ceiling(const wchar_t *s, size_t n, int w1)
Keep the end of the string on-screen.
Definition: mbyte.c:236
bool OptLocales
(pseudo) set if user has valid locale definition
Definition: mbyte.c:43
bool mutt_mb_get_initials(const char *name, char *buf, size_t buflen)
Turn a name into initials.
Definition: mbyte.c:81
bool mutt_mb_is_display_corrupting_utf8(wchar_t wc)
Will this character corrupt the display?
Definition: mbyte.c:385
size_t mutt_mb_mbstowcs(wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)
Convert a string from multibyte to wide characters.
Definition: mbyte.c:290
int mutt_mb_wcswidth(const wchar_t *s, size_t n)
Measure the screen width of a string.
Definition: mbyte.c:215
bool mutt_mb_is_lower(const char *s)
Does a multi-byte string contain only lowercase characters?
Definition: mbyte.c:353
int mutt_mb_width(const char *str, int col, bool indent)
Measure a string's display width (in screen columns)
Definition: mbyte.c:136
void buf_mb_wcstombs(struct Buffer *dest, const wchar_t *wstr, size_t wlen)
Convert a string from wide to multibyte characters.
Definition: mbyte.c:255
int mutt_mb_wcwidth(wchar_t wc)
Measure the screen width of a character.
Definition: mbyte.c:197
Multi-byte String manipulation functions.
#define IsWPrint(wc)
Definition: mbyte.h:41
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
Memory management wrappers.
#define FREE(x)
Definition: memory.h:45
bool CharsetIsUtf8
Is the user's current character set utf-8?
Definition: charset.c:63
wchar_t ReplacementChar
When a Unicode character can't be displayed, use this instead.
Definition: charset.c:58
Conversion between different character encodings.
#define ICONV_BUF_TOO_SMALL
Error value for iconv() - Buffer too small.
Definition: charset.h:105
#define ICONV_ILLEGAL_SEQ
Error value for iconv() - Illegal sequence.
Definition: charset.h:103
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:568
String manipulation functions.
String manipulation buffer.
Definition: buffer.h:34
size_t dsize
Length of data.
Definition: buffer.h:37
char * data
Pointer to data.
Definition: buffer.h:35