NeoMutt  2021-02-05-329-g9e03b7
Teaching an old dog new tricks
DOXYGEN
mbyte.c
Go to the documentation of this file.
1 
29 #include "config.h"
30 #include <ctype.h>
31 #include <limits.h>
32 #include <stdbool.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <wchar.h>
36 #include <wctype.h>
37 #include "mbyte.h"
38 #include "buffer.h"
39 #include "charset.h"
40 #include "memory.h"
41 #include "string2.h"
42 
43 bool OptLocales;
44 
54 int mutt_mb_charlen(const char *s, int *width)
55 {
56  if (!s || (*s == '\0'))
57  return 0;
58 
59  wchar_t wc;
60  mbstate_t mbstate;
61  size_t k, n;
62 
63  n = mutt_str_len(s);
64  memset(&mbstate, 0, sizeof(mbstate));
65  k = mbrtowc(&wc, s, n, &mbstate);
66  if (width)
67  *width = wcwidth(wc);
68  return ((k == (size_t) (-1)) || (k == (size_t) (-2))) ? -1 : k;
69 }
70 
83 bool mutt_mb_get_initials(const char *name, char *buf, size_t buflen)
84 {
85  if (!name || !buf)
86  return false;
87 
88  while (*name)
89  {
90  /* Char's length in bytes */
91  int clen = mutt_mb_charlen(name, NULL);
92  if (clen < 1)
93  return false;
94 
95  /* Ignore punctuation at the beginning of a word */
96  if ((clen == 1) && ispunct(*name))
97  {
98  name++;
99  continue;
100  }
101 
102  if (clen >= buflen)
103  return false;
104 
105  /* Copy one multibyte character */
106  buflen -= clen;
107  while (clen--)
108  *buf++ = *name++;
109 
110  /* Skip to end-of-word */
111  for (; *name; name += clen)
112  {
113  clen = mutt_mb_charlen(name, NULL);
114  if (clen < 1)
115  return false;
116  if ((clen == 1) && (isspace(*name) || (*name == '-')))
117  break;
118  }
119 
120  /* Skip any whitespace, or hyphens */
121  while (*name && (isspace(*name) || (*name == '-')))
122  name++;
123  }
124 
125  *buf = '\0';
126  return true;
127 }
128 
138 int mutt_mb_width(const char *str, int col, bool display)
139 {
140  wchar_t wc;
141  int l, w = 0, nl = 0;
142  const char *p = str;
143 
144  while (p && *p)
145  {
146  if (mbtowc(&wc, p, MB_CUR_MAX) >= 0)
147  {
148  l = wcwidth(wc);
149  if (l < 0)
150  l = 1;
151  /* correctly calc tab stop, even for sending as the
152  * line should look pretty on the receiving end */
153  if ((wc == L'\t') || (nl && (wc == L' ')))
154  {
155  nl = 0;
156  l = 8 - (col % 8);
157  }
158  /* track newlines for display-case: if we have a space
159  * after a newline, assume 8 spaces as for display we
160  * always tab-fold */
161  else if (display && (wc == '\n'))
162  nl = 1;
163  }
164  else
165  l = 1;
166  w += l;
167  p++;
168  }
169  return w;
170 }
171 
177 int mutt_mb_wcwidth(wchar_t wc)
178 {
179  int n = wcwidth(wc);
180  if (IsWPrint(wc) && (n > 0))
181  return n;
182  if (!(wc & ~0x7f))
183  return 2;
184  if (!(wc & ~0xffff))
185  return 6;
186  return 10;
187 }
188 
195 int mutt_mb_wcswidth(const wchar_t *s, size_t n)
196 {
197  if (!s)
198  return 0;
199 
200  int w = 0;
201  while (n--)
202  w += mutt_mb_wcwidth(*s++);
203  return w;
204 }
205 
216 size_t mutt_mb_width_ceiling(const wchar_t *s, size_t n, int w1)
217 {
218  if (!s)
219  return 0;
220 
221  const wchar_t *s0 = s;
222  int w = 0;
223  for (; n; s++, n--)
224  if ((w += mutt_mb_wcwidth(*s)) > w1)
225  break;
226  return s - s0;
227 }
228 
236 void mutt_mb_wcstombs(char *dest, size_t dlen, const wchar_t *src, size_t slen)
237 {
238  if (!dest || !src)
239  return;
240 
241  mbstate_t st;
242  size_t k;
243 
244  /* First convert directly into the destination buffer */
245  memset(&st, 0, sizeof(st));
246  for (; slen && dlen >= MB_LEN_MAX; dest += k, dlen -= k, src++, slen--)
247  {
248  k = wcrtomb(dest, *src, &st);
249  if (k == (size_t) (-1))
250  break;
251  }
252 
253  /* If this works, we can stop now */
254  if (dlen >= MB_LEN_MAX)
255  {
256  dest += wcrtomb(dest, 0, &st);
257  return;
258  }
259 
260  /* Otherwise convert any remaining data into a local buffer */
261  {
262  char buf[3 * MB_LEN_MAX];
263  char *p = buf;
264 
265  for (; slen && p - buf < dlen; p += k, src++, slen--)
266  {
267  k = wcrtomb(p, *src, &st);
268  if (k == (size_t) (-1))
269  break;
270  }
271  p += wcrtomb(p, 0, &st);
272 
273  /* If it fits into the destination buffer, we can stop now */
274  if (p - buf <= dlen)
275  {
276  memcpy(dest, buf, p - buf);
277  return;
278  }
279 
280  /* Otherwise we truncate the string in an ugly fashion */
281  memcpy(dest, buf, dlen);
282  dest[dlen - 1] = '\0'; /* assume original dlen > 0 */
283  }
284 }
285 
294 size_t mutt_mb_mbstowcs(wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)
295 {
296  if (!pwbuf || !pwbuflen || !buf)
297  return 0;
298 
299  wchar_t wc;
300  mbstate_t st;
301  size_t k;
302  wchar_t *wbuf = *pwbuf;
303  size_t wbuflen = *pwbuflen;
304 
305  while (*buf != '\0')
306  {
307  memset(&st, 0, sizeof(st));
308  for (; (k = mbrtowc(&wc, buf, MB_LEN_MAX, &st)) && k != (size_t) (-1) &&
309  k != (size_t) (-2);
310  buf += k)
311  {
312  if (i >= wbuflen)
313  {
314  wbuflen = i + 20;
315  mutt_mem_realloc(&wbuf, wbuflen * sizeof(*wbuf));
316  }
317  wbuf[i++] = wc;
318  }
319  if ((*buf != '\0') && ((k == (size_t) -1) || (k == (size_t) -2)))
320  {
321  if (i >= wbuflen)
322  {
323  wbuflen = i + 20;
324  mutt_mem_realloc(&wbuf, wbuflen * sizeof(*wbuf));
325  }
326  wbuf[i++] = ReplacementChar;
327  buf++;
328  }
329  }
330  *pwbuf = wbuf;
331  *pwbuflen = wbuflen;
332  return i;
333 }
334 
343 bool mutt_mb_is_shell_char(wchar_t ch)
344 {
345  static const wchar_t shell_chars[] = L"<>&()$?*;{}| "; /* ! not included because it can be part of a pathname in NeoMutt */
346  return wcschr(shell_chars, ch);
347 }
348 
357 bool mutt_mb_is_lower(const char *s)
358 {
359  if (!s)
360  return false;
361 
362  wchar_t w;
363  mbstate_t mb;
364  size_t l;
365 
366  memset(&mb, 0, sizeof(mb));
367 
368  for (; (l = mbrtowc(&w, s, MB_CUR_MAX, &mb)) != 0; s += l)
369  {
370  if (l == (size_t) -2)
371  continue; /* shift sequences */
372  if (l == (size_t) -1)
373  return false;
374  if (iswalpha((wint_t) w) && iswupper((wint_t) w))
375  return false;
376  }
377 
378  return true;
379 }
380 
390 {
391  if ((wc == (wchar_t) 0x00ad) || /* soft hyphen */
392  (wc == (wchar_t) 0x200e) || /* left-to-right mark */
393  (wc == (wchar_t) 0x200f) || /* right-to-left mark */
394  (wc == (wchar_t) 0xfeff)) /* zero width no-break space */
395  {
396  return true;
397  }
398 
399  /* left-to-right isolate, right-to-left isolate, first strong isolate,
400  * pop directional isolate */
401  if ((wc >= (wchar_t) 0x2066) && (wc <= (wchar_t) 0x2069))
402  return true;
403 
404  /* left-to-right embedding, right-to-left embedding, pop directional formatting,
405  * left-to-right override, right-to-left override */
406  if ((wc >= (wchar_t) 0x202a) && (wc <= (wchar_t) 0x202e))
407  return true;
408 
409  return false;
410 }
411 
424 {
425  if (!s || !*s)
426  return -1;
427 
428  wchar_t wc;
429  size_t k, k2;
430  char scratch[MB_LEN_MAX + 1];
431  char *p = *s;
432  mbstate_t mbstate1, mbstate2;
433 
434  struct Buffer buf = mutt_buffer_make(0);
435  memset(&mbstate1, 0, sizeof(mbstate1));
436  memset(&mbstate2, 0, sizeof(mbstate2));
437  for (; (k = mbrtowc(&wc, p, MB_LEN_MAX, &mbstate1)); p += k)
438  {
439  if ((k == (size_t) -1) || (k == (size_t) -2))
440  {
441  k = 1;
442  memset(&mbstate1, 0, sizeof(mbstate1));
443  wc = ReplacementChar;
444  }
445  if (!IsWPrint(wc))
446  wc = '?';
448  continue;
449  k2 = wcrtomb(scratch, wc, &mbstate2);
450  scratch[k2] = '\0';
451  mutt_buffer_addstr(&buf, scratch);
452  }
453  FREE(s);
454  *s = buf.data ? buf.data : mutt_mem_calloc(1, 1);
455  return 0;
456 }
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
bool mutt_mb_is_display_corrupting_utf8(wchar_t wc)
Will this character corrupt the display?
Definition: mbyte.c:389
Memory management wrappers.
#define IsWPrint(wc)
Definition: mbyte.h:39
struct Buffer mutt_buffer_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:61
String manipulation buffer.
Definition: buffer.h:33
size_t mutt_mb_width_ceiling(const wchar_t *s, size_t n, int w1)
Keep the end of the string on-screen.
Definition: mbyte.c:216
Multi-byte String manipulation functions.
int mutt_mb_charlen(const char *s, int *width)
Count the bytes in a (multibyte) character.
Definition: mbyte.c:54
int mutt_mb_width(const char *str, int col, bool display)
Measure a string&#39;s display width (in screen columns)
Definition: mbyte.c:138
String manipulation functions.
void mutt_mb_wcstombs(char *dest, size_t dlen, const wchar_t *src, size_t slen)
Convert a string from wide to multibyte characters.
Definition: mbyte.c:236
int mutt_mb_wcswidth(const wchar_t *s, size_t n)
Measure the screen width of a string.
Definition: mbyte.c:195
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:225
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
char * data
Pointer to data.
Definition: buffer.h:35
bool mutt_mb_get_initials(const char *name, char *buf, size_t buflen)
Turn a name into initials.
Definition: mbyte.c:83
wchar_t ReplacementChar
When a Unicode character can&#39;t be displayed, use this instead.
Definition: charset.c:57
bool CharsetIsUtf8
Is the user&#39;s current character set utf-8?
Definition: charset.c:62
bool mutt_mb_is_shell_char(wchar_t ch)
Is character not typically part of a pathname.
Definition: mbyte.c:343
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:631
General purpose object for storing and parsing strings.
#define FREE(x)
Definition: memory.h:40
int mutt_mb_wcwidth(wchar_t wc)
Measure the screen width of a character.
Definition: mbyte.c:177
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
Definition: mbyte.c:423
size_t mutt_mb_mbstowcs(wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)
Convert a string from multibyte to wide characters.
Definition: mbyte.c:294
bool OptLocales
(pseudo) set if user has valid locale definition
Definition: mbyte.c:43
Conversion between different character encodings.
bool mutt_mb_is_lower(const char *s)
Does a multi-byte string contain only lowercase characters?
Definition: mbyte.c:357