NeoMutt  2020-11-20
Teaching an old dog new tricks
DOXYGEN
mbyte.c
Go to the documentation of this file.
1 
29 #include "config.h"
30 #include <stddef.h>
31 #include <ctype.h>
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <wchar.h>
37 #include <wctype.h>
38 #include "mbyte.h"
39 #include "buffer.h"
40 #include "charset.h"
41 #include "memory.h"
42 #include "string2.h"
43 
44 bool OptLocales;
45 
55 int mutt_mb_charlen(const char *s, int *width)
56 {
57  if (!s || (*s == '\0'))
58  return 0;
59 
60  wchar_t wc;
61  mbstate_t mbstate;
62  size_t k, n;
63 
64  n = mutt_str_len(s);
65  memset(&mbstate, 0, sizeof(mbstate));
66  k = mbrtowc(&wc, s, n, &mbstate);
67  if (width)
68  *width = wcwidth(wc);
69  return ((k == (size_t)(-1)) || (k == (size_t)(-2))) ? -1 : k;
70 }
71 
84 bool mutt_mb_get_initials(const char *name, char *buf, size_t buflen)
85 {
86  if (!name || !buf)
87  return false;
88 
89  while (*name)
90  {
91  /* Char's length in bytes */
92  int clen = mutt_mb_charlen(name, NULL);
93  if (clen < 1)
94  return false;
95 
96  /* Ignore punctuation at the beginning of a word */
97  if ((clen == 1) && ispunct(*name))
98  {
99  name++;
100  continue;
101  }
102 
103  if (clen >= buflen)
104  return false;
105 
106  /* Copy one multibyte character */
107  buflen -= clen;
108  while (clen--)
109  *buf++ = *name++;
110 
111  /* Skip to end-of-word */
112  for (; *name; name += clen)
113  {
114  clen = mutt_mb_charlen(name, NULL);
115  if (clen < 1)
116  return false;
117  if ((clen == 1) && (isspace(*name) || (*name == '-')))
118  break;
119  }
120 
121  /* Skip any whitespace, or hyphens */
122  while (*name && (isspace(*name) || (*name == '-')))
123  name++;
124  }
125 
126  *buf = '\0';
127  return true;
128 }
129 
139 int mutt_mb_width(const char *str, int col, bool display)
140 {
141  wchar_t wc;
142  int l, w = 0, nl = 0;
143  const char *p = str;
144 
145  while (p && *p)
146  {
147  if (mbtowc(&wc, p, MB_CUR_MAX) >= 0)
148  {
149  l = wcwidth(wc);
150  if (l < 0)
151  l = 1;
152  /* correctly calc tab stop, even for sending as the
153  * line should look pretty on the receiving end */
154  if ((wc == L'\t') || (nl && (wc == L' ')))
155  {
156  nl = 0;
157  l = 8 - (col % 8);
158  }
159  /* track newlines for display-case: if we have a space
160  * after a newline, assume 8 spaces as for display we
161  * always tab-fold */
162  else if (display && (wc == '\n'))
163  nl = 1;
164  }
165  else
166  l = 1;
167  w += l;
168  p++;
169  }
170  return w;
171 }
172 
178 int mutt_mb_wcwidth(wchar_t wc)
179 {
180  int n = wcwidth(wc);
181  if (IsWPrint(wc) && (n > 0))
182  return n;
183  if (!(wc & ~0x7f))
184  return 2;
185  if (!(wc & ~0xffff))
186  return 6;
187  return 10;
188 }
189 
196 int mutt_mb_wcswidth(const wchar_t *s, size_t n)
197 {
198  if (!s)
199  return 0;
200 
201  int w = 0;
202  while (n--)
203  w += mutt_mb_wcwidth(*s++);
204  return w;
205 }
206 
217 size_t mutt_mb_width_ceiling(const wchar_t *s, size_t n, int w1)
218 {
219  if (!s)
220  return 0;
221 
222  const wchar_t *s0 = s;
223  int w = 0;
224  for (; n; s++, n--)
225  if ((w += mutt_mb_wcwidth(*s)) > w1)
226  break;
227  return s - s0;
228 }
229 
237 void mutt_mb_wcstombs(char *dest, size_t dlen, const wchar_t *src, size_t slen)
238 {
239  if (!dest || !src)
240  return;
241 
242  mbstate_t st;
243  size_t k;
244 
245  /* First convert directly into the destination buffer */
246  memset(&st, 0, sizeof(st));
247  for (; slen && dlen >= MB_LEN_MAX; dest += k, dlen -= k, src++, slen--)
248  {
249  k = wcrtomb(dest, *src, &st);
250  if (k == (size_t)(-1))
251  break;
252  }
253 
254  /* If this works, we can stop now */
255  if (dlen >= MB_LEN_MAX)
256  {
257  dest += wcrtomb(dest, 0, &st);
258  return;
259  }
260 
261  /* Otherwise convert any remaining data into a local buffer */
262  {
263  char buf[3 * MB_LEN_MAX];
264  char *p = buf;
265 
266  for (; slen && p - buf < dlen; p += k, src++, slen--)
267  {
268  k = wcrtomb(p, *src, &st);
269  if (k == (size_t)(-1))
270  break;
271  }
272  p += wcrtomb(p, 0, &st);
273 
274  /* If it fits into the destination buffer, we can stop now */
275  if (p - buf <= dlen)
276  {
277  memcpy(dest, buf, p - buf);
278  return;
279  }
280 
281  /* Otherwise we truncate the string in an ugly fashion */
282  memcpy(dest, buf, dlen);
283  dest[dlen - 1] = '\0'; /* assume original dlen > 0 */
284  }
285 }
286 
295 size_t mutt_mb_mbstowcs(wchar_t **pwbuf, size_t *pwbuflen, size_t i, char *buf)
296 {
297  if (!pwbuf || !pwbuflen || !buf)
298  return 0;
299 
300  wchar_t wc;
301  mbstate_t st;
302  size_t k;
303  wchar_t *wbuf = *pwbuf;
304  size_t wbuflen = *pwbuflen;
305 
306  while (*buf != '\0')
307  {
308  memset(&st, 0, sizeof(st));
309  for (; (k = mbrtowc(&wc, buf, MB_LEN_MAX, &st)) && k != (size_t)(-1) &&
310  k != (size_t)(-2);
311  buf += k)
312  {
313  if (i >= wbuflen)
314  {
315  wbuflen = i + 20;
316  mutt_mem_realloc(&wbuf, wbuflen * sizeof(*wbuf));
317  }
318  wbuf[i++] = wc;
319  }
320  if ((*buf != '\0') && ((k == (size_t) -1) || (k == (size_t) -2)))
321  {
322  if (i >= wbuflen)
323  {
324  wbuflen = i + 20;
325  mutt_mem_realloc(&wbuf, wbuflen * sizeof(*wbuf));
326  }
327  wbuf[i++] = ReplacementChar;
328  buf++;
329  }
330  }
331  *pwbuf = wbuf;
332  *pwbuflen = wbuflen;
333  return i;
334 }
335 
344 bool mutt_mb_is_shell_char(wchar_t ch)
345 {
346  static const wchar_t shell_chars[] = L"<>&()$?*;{}| "; /* ! not included because it can be part of a pathname in NeoMutt */
347  return wcschr(shell_chars, ch);
348 }
349 
358 bool mutt_mb_is_lower(const char *s)
359 {
360  if (!s)
361  return false;
362 
363  wchar_t w;
364  mbstate_t mb;
365  size_t l;
366 
367  memset(&mb, 0, sizeof(mb));
368 
369  for (; (l = mbrtowc(&w, s, MB_CUR_MAX, &mb)) != 0; s += l)
370  {
371  if (l == (size_t) -2)
372  continue; /* shift sequences */
373  if (l == (size_t) -1)
374  return false;
375  if (iswalpha((wint_t) w) && iswupper((wint_t) w))
376  return false;
377  }
378 
379  return true;
380 }
381 
391 {
392  if ((wc == (wchar_t) 0x00ad) || /* soft hyphen */
393  (wc == (wchar_t) 0x200e) || /* left-to-right mark */
394  (wc == (wchar_t) 0x200f) || /* right-to-left mark */
395  (wc == (wchar_t) 0xfeff)) /* zero width no-break space */
396  {
397  return true;
398  }
399 
400  /* left-to-right isolate, right-to-left isolate, first strong isolate,
401  * pop directional isolate */
402  if ((wc >= (wchar_t) 0x2066) && (wc <= (wchar_t) 0x2069))
403  return true;
404 
405  /* left-to-right embedding, right-to-left embedding, pop directional formatting,
406  * left-to-right override, right-to-left override */
407  if ((wc >= (wchar_t) 0x202a) && (wc <= (wchar_t) 0x202e))
408  return true;
409 
410  return false;
411 }
412 
425 {
426  if (!s || !*s)
427  return -1;
428 
429  wchar_t wc;
430  size_t k, k2;
431  char scratch[MB_LEN_MAX + 1];
432  char *p = *s;
433  mbstate_t mbstate1, mbstate2;
434 
435  struct Buffer buf = mutt_buffer_make(0);
436  memset(&mbstate1, 0, sizeof(mbstate1));
437  memset(&mbstate2, 0, sizeof(mbstate2));
438  for (; (k = mbrtowc(&wc, p, MB_LEN_MAX, &mbstate1)); p += k)
439  {
440  if ((k == (size_t) -1) || (k == (size_t) -2))
441  {
442  k = 1;
443  memset(&mbstate1, 0, sizeof(mbstate1));
444  wc = ReplacementChar;
445  }
446  if (!IsWPrint(wc))
447  wc = '?';
449  continue;
450  k2 = wcrtomb(scratch, wc, &mbstate2);
451  scratch[k2] = '\0';
452  mutt_buffer_addstr(&buf, scratch);
453  }
454  FREE(s);
455  *s = buf.data ? buf.data : mutt_mem_calloc(1, 1);
456  return 0;
457 }
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
bool mutt_mb_is_display_corrupting_utf8(wchar_t wc)
Will this character corrupt the display?
Definition: mbyte.c:390
Memory management wrappers.
#define IsWPrint(wc)
Definition: mbyte.h:40
struct Buffer mutt_buffer_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:61
String manipulation buffer.
Definition: buffer.h:33
size_t mutt_mb_width_ceiling(const wchar_t *s, size_t n, int w1)
Keep the end of the string on-screen.
Definition: mbyte.c:217
Multi-byte String manipulation functions.
int mutt_mb_charlen(const char *s, int *width)
Count the bytes in a (multibyte) character.
Definition: mbyte.c:55
int mutt_mb_width(const char *str, int col, bool display)
Measure a string&#39;s display width (in screen columns)
Definition: mbyte.c:139
String manipulation functions.
void mutt_mb_wcstombs(char *dest, size_t dlen, const wchar_t *src, size_t slen)
Convert a string from wide to multibyte characters.
Definition: mbyte.c:237
int mutt_mb_wcswidth(const wchar_t *s, size_t n)
Measure the screen width of a string.
Definition: mbyte.c:196
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:225
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
char * data
Pointer to data.
Definition: buffer.h:35
bool mutt_mb_get_initials(const char *name, char *buf, size_t buflen)
Turn a name into initials.
Definition: mbyte.c:84
wchar_t ReplacementChar
When a Unicode character can&#39;t be displayed, use this instead.
Definition: charset.c:58
bool CharsetIsUtf8
Is the user&#39;s current character set utf-8?
Definition: charset.c:63
bool mutt_mb_is_shell_char(wchar_t ch)
Is character not typically part of a pathname.
Definition: mbyte.c:344
size_t mutt_mb_mbstowcs(wchar_t **pwbuf, size_t *pwbuflen, size_t i, char *buf)
Convert a string from multibyte to wide characters.
Definition: mbyte.c:295
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:631
General purpose object for storing and parsing strings.
#define FREE(x)
Definition: memory.h:40
int mutt_mb_wcwidth(wchar_t wc)
Measure the screen width of a character.
Definition: mbyte.c:178
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
Definition: mbyte.c:424
bool OptLocales
(pseudo) set if user has valid locale definition
Definition: mbyte.c:44
Conversion between different character encodings.
bool mutt_mb_is_lower(const char *s)
Does a multi-byte string contain only lowercase characters?
Definition: mbyte.c:358