NeoMutt  2021-10-29-225-gb9986f
Teaching an old dog new tricks
DOXYGEN
mbyte.c
Go to the documentation of this file.
1 
29 #include "config.h"
30 #include <ctype.h>
31 #include <limits.h>
32 #include <stdbool.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <wchar.h>
36 #include <wctype.h>
37 #include "mbyte.h"
38 #include "buffer.h"
39 #include "charset.h"
40 #include "memory.h"
41 #include "string2.h"
42 
43 bool OptLocales;
44 
54 int mutt_mb_charlen(const char *s, int *width)
55 {
56  if (!s || (*s == '\0'))
57  return 0;
58 
59  wchar_t wc = 0;
60  mbstate_t mbstate = { 0 };
61  size_t k, n;
62 
63  n = mutt_str_len(s);
64  k = mbrtowc(&wc, s, n, &mbstate);
65  if (width)
66  *width = wcwidth(wc);
67  return ((k == (size_t) (-1)) || (k == (size_t) (-2))) ? -1 : k;
68 }
69 
82 bool mutt_mb_get_initials(const char *name, char *buf, size_t buflen)
83 {
84  if (!name || !buf)
85  return false;
86 
87  while (*name)
88  {
89  /* Char's length in bytes */
90  int clen = mutt_mb_charlen(name, NULL);
91  if (clen < 1)
92  return false;
93 
94  /* Ignore punctuation at the beginning of a word */
95  if ((clen == 1) && ispunct(*name))
96  {
97  name++;
98  continue;
99  }
100 
101  if (clen >= buflen)
102  return false;
103 
104  /* Copy one multibyte character */
105  buflen -= clen;
106  while (clen--)
107  *buf++ = *name++;
108 
109  /* Skip to end-of-word */
110  for (; *name; name += clen)
111  {
112  clen = mutt_mb_charlen(name, NULL);
113  if (clen < 1)
114  return false;
115  if ((clen == 1) && (isspace(*name) || (*name == '-')))
116  break;
117  }
118 
119  /* Skip any whitespace, or hyphens */
120  while (*name && (isspace(*name) || (*name == '-')))
121  name++;
122  }
123 
124  *buf = '\0';
125  return true;
126 }
127 
137 int mutt_mb_width(const char *str, int col, bool display)
138 {
139  wchar_t wc = 0;
140  int l, w = 0, nl = 0;
141  const char *p = str;
142 
143  while (p && *p)
144  {
145  if (mbtowc(&wc, p, MB_CUR_MAX) >= 0)
146  {
147  l = wcwidth(wc);
148  if (l < 0)
149  l = 1;
150  /* correctly calc tab stop, even for sending as the
151  * line should look pretty on the receiving end */
152  if ((wc == L'\t') || (nl && (wc == L' ')))
153  {
154  nl = 0;
155  l = 8 - (col % 8);
156  }
157  /* track newlines for display-case: if we have a space
158  * after a newline, assume 8 spaces as for display we
159  * always tab-fold */
160  else if (display && (wc == '\n'))
161  nl = 1;
162  }
163  else
164  l = 1;
165  w += l;
166  p++;
167  }
168  return w;
169 }
170 
176 int mutt_mb_wcwidth(wchar_t wc)
177 {
178  int n = wcwidth(wc);
179  if (IsWPrint(wc) && (n > 0))
180  return n;
181  if (!(wc & ~0x7f))
182  return 2;
183  if (!(wc & ~0xffff))
184  return 6;
185  return 10;
186 }
187 
194 int mutt_mb_wcswidth(const wchar_t *s, size_t n)
195 {
196  if (!s)
197  return 0;
198 
199  int w = 0;
200  while (n--)
201  w += mutt_mb_wcwidth(*s++);
202  return w;
203 }
204 
215 size_t mutt_mb_width_ceiling(const wchar_t *s, size_t n, int w1)
216 {
217  if (!s)
218  return 0;
219 
220  const wchar_t *s0 = s;
221  int w = 0;
222  for (; n; s++, n--)
223  if ((w += mutt_mb_wcwidth(*s)) > w1)
224  break;
225  return s - s0;
226 }
227 
235 void mutt_mb_wcstombs(char *dest, size_t dlen, const wchar_t *src, size_t slen)
236 {
237  if (!dest || !src)
238  return;
239 
240  mbstate_t mbstate = { 0 };
241  size_t k;
242 
243  /* First convert directly into the destination buffer */
244  for (; slen && dlen >= MB_LEN_MAX; dest += k, dlen -= k, src++, slen--)
245  {
246  k = wcrtomb(dest, *src, &mbstate);
247  if (k == (size_t) (-1))
248  break;
249  }
250 
251  /* If this works, we can stop now */
252  if (dlen >= MB_LEN_MAX)
253  {
254  dest += wcrtomb(dest, 0, &mbstate);
255  return;
256  }
257 
258  /* Otherwise convert any remaining data into a local buffer */
259  {
260  char buf[3 * MB_LEN_MAX];
261  char *p = buf;
262 
263  for (; slen && p - buf < dlen; p += k, src++, slen--)
264  {
265  k = wcrtomb(p, *src, &mbstate);
266  if (k == (size_t) (-1))
267  break;
268  }
269  p += wcrtomb(p, 0, &mbstate);
270 
271  /* If it fits into the destination buffer, we can stop now */
272  if (p - buf <= dlen)
273  {
274  memcpy(dest, buf, p - buf);
275  return;
276  }
277 
278  /* Otherwise we truncate the string in an ugly fashion */
279  memcpy(dest, buf, dlen);
280  dest[dlen - 1] = '\0'; /* assume original dlen > 0 */
281  }
282 }
283 
292 size_t mutt_mb_mbstowcs(wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)
293 {
294  if (!pwbuf || !pwbuflen || !buf)
295  return 0;
296 
297  wchar_t wc = 0;
298  mbstate_t mbstate = { 0 };
299  size_t k;
300  wchar_t *wbuf = *pwbuf;
301  size_t wbuflen = *pwbuflen;
302 
303  while (*buf != '\0')
304  {
305  memset(&mbstate, 0, sizeof(mbstate));
306  for (; (k = mbrtowc(&wc, buf, MB_LEN_MAX, &mbstate)) &&
307  k != (size_t) (-1) && k != (size_t) (-2);
308  buf += k)
309  {
310  if (i >= wbuflen)
311  {
312  wbuflen = i + 20;
313  mutt_mem_realloc(&wbuf, wbuflen * sizeof(*wbuf));
314  }
315  wbuf[i++] = wc;
316  }
317  if ((*buf != '\0') && ((k == (size_t) -1) || (k == (size_t) -2)))
318  {
319  if (i >= wbuflen)
320  {
321  wbuflen = i + 20;
322  mutt_mem_realloc(&wbuf, wbuflen * sizeof(*wbuf));
323  }
324  wbuf[i++] = ReplacementChar;
325  buf++;
326  }
327  }
328  *pwbuf = wbuf;
329  *pwbuflen = wbuflen;
330  return i;
331 }
332 
341 bool mutt_mb_is_shell_char(wchar_t ch)
342 {
343  static const wchar_t shell_chars[] = L"<>&()$?*;{}| "; /* ! not included because it can be part of a pathname in NeoMutt */
344  return wcschr(shell_chars, ch);
345 }
346 
355 bool mutt_mb_is_lower(const char *s)
356 {
357  if (!s)
358  return false;
359 
360  wchar_t wc = 0;
361  mbstate_t mbstate = { 0 };
362  size_t l;
363 
364  memset(&mbstate, 0, sizeof(mbstate));
365 
366  for (; (l = mbrtowc(&wc, s, MB_CUR_MAX, &mbstate)) != 0; s += l)
367  {
368  if (l == (size_t) -2)
369  continue; /* shift sequences */
370  if (l == (size_t) -1)
371  return false;
372  if (iswalpha((wint_t) wc) && iswupper((wint_t) wc))
373  return false;
374  }
375 
376  return true;
377 }
378 
388 {
389  if ((wc == (wchar_t) 0x00ad) || /* soft hyphen */
390  (wc == (wchar_t) 0x200e) || /* left-to-right mark */
391  (wc == (wchar_t) 0x200f) || /* right-to-left mark */
392  (wc == (wchar_t) 0xfeff)) /* zero width no-break space */
393  {
394  return true;
395  }
396 
397  /* left-to-right isolate, right-to-left isolate, first strong isolate,
398  * pop directional isolate */
399  if ((wc >= (wchar_t) 0x2066) && (wc <= (wchar_t) 0x2069))
400  return true;
401 
402  /* left-to-right embedding, right-to-left embedding, pop directional formatting,
403  * left-to-right override, right-to-left override */
404  if ((wc >= (wchar_t) 0x202a) && (wc <= (wchar_t) 0x202e))
405  return true;
406 
407  return false;
408 }
409 
422 {
423  if (!s || !*s)
424  return -1;
425 
426  wchar_t wc = 0;
427  size_t k, k2;
428  char scratch[MB_LEN_MAX + 1];
429  char *p = *s;
430  mbstate_t mbstate1 = { 0 };
431  mbstate_t mbstate2 = { 0 };
432 
433  struct Buffer buf = mutt_buffer_make(0);
434  for (; (k = mbrtowc(&wc, p, MB_LEN_MAX, &mbstate1)); p += k)
435  {
436  if ((k == (size_t) -1) || (k == (size_t) -2))
437  {
438  k = 1;
439  memset(&mbstate1, 0, sizeof(mbstate1));
440  wc = ReplacementChar;
441  }
442  if (!IsWPrint(wc))
443  wc = '?';
445  continue;
446  k2 = wcrtomb(scratch, wc, &mbstate2);
447  scratch[k2] = '\0';
448  mutt_buffer_addstr(&buf, scratch);
449  }
450  FREE(s);
451  *s = buf.data ? buf.data : mutt_mem_calloc(1, 1);
452  return 0;
453 }
struct Buffer mutt_buffer_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:61
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:225
General purpose object for storing and parsing strings.
int mutt_mb_charlen(const char *s, int *width)
Count the bytes in a (multibyte) character.
Definition: mbyte.c:54
void mutt_mb_wcstombs(char *dest, size_t dlen, const wchar_t *src, size_t slen)
Convert a string from wide to multibyte characters.
Definition: mbyte.c:235
bool mutt_mb_is_shell_char(wchar_t ch)
Is character not typically part of a pathname.
Definition: mbyte.c:341
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
Definition: mbyte.c:421
size_t mutt_mb_width_ceiling(const wchar_t *s, size_t n, int w1)
Keep the end of the string on-screen.
Definition: mbyte.c:215
bool OptLocales
(pseudo) set if user has valid locale definition
Definition: mbyte.c:43
bool mutt_mb_get_initials(const char *name, char *buf, size_t buflen)
Turn a name into initials.
Definition: mbyte.c:82
bool mutt_mb_is_display_corrupting_utf8(wchar_t wc)
Will this character corrupt the display?
Definition: mbyte.c:387
size_t mutt_mb_mbstowcs(wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)
Convert a string from multibyte to wide characters.
Definition: mbyte.c:292
int mutt_mb_width(const char *str, int col, bool display)
Measure a string's display width (in screen columns)
Definition: mbyte.c:137
int mutt_mb_wcswidth(const wchar_t *s, size_t n)
Measure the screen width of a string.
Definition: mbyte.c:194
bool mutt_mb_is_lower(const char *s)
Does a multi-byte string contain only lowercase characters?
Definition: mbyte.c:355
int mutt_mb_wcwidth(wchar_t wc)
Measure the screen width of a character.
Definition: mbyte.c:176
Multi-byte String manipulation functions.
#define IsWPrint(wc)
Definition: mbyte.h:39
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
Memory management wrappers.
#define FREE(x)
Definition: memory.h:40
bool CharsetIsUtf8
Is the user's current character set utf-8?
Definition: charset.c:62
wchar_t ReplacementChar
When a Unicode character can't be displayed, use this instead.
Definition: charset.c:57
Conversion between different character encodings.
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:475
String manipulation functions.
String manipulation buffer.
Definition: buffer.h:34
char * data
Pointer to data.
Definition: buffer.h:35