NeoMutt  2021-10-29-225-gb9986f
Teaching an old dog new tricks
DOXYGEN
mbyte.c File Reference

Multi-byte String manipulation functions. More...

#include "config.h"
#include <ctype.h>
#include <limits.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include <wctype.h>
#include "mbyte.h"
#include "buffer.h"
#include "charset.h"
#include "memory.h"
#include "string2.h"
+ Include dependency graph for mbyte.c:

Go to the source code of this file.

Functions

int mutt_mb_charlen (const char *s, int *width)
 Count the bytes in a (multibyte) character. More...
 
bool mutt_mb_get_initials (const char *name, char *buf, size_t buflen)
 Turn a name into initials. More...
 
int mutt_mb_width (const char *str, int col, bool display)
 Measure a string's display width (in screen columns) More...
 
int mutt_mb_wcwidth (wchar_t wc)
 Measure the screen width of a character. More...
 
int mutt_mb_wcswidth (const wchar_t *s, size_t n)
 Measure the screen width of a string. More...
 
size_t mutt_mb_width_ceiling (const wchar_t *s, size_t n, int w1)
 Keep the end of the string on-screen. More...
 
void mutt_mb_wcstombs (char *dest, size_t dlen, const wchar_t *src, size_t slen)
 Convert a string from wide to multibyte characters. More...
 
size_t mutt_mb_mbstowcs (wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)
 Convert a string from multibyte to wide characters. More...
 
bool mutt_mb_is_shell_char (wchar_t ch)
 Is character not typically part of a pathname. More...
 
bool mutt_mb_is_lower (const char *s)
 Does a multi-byte string contain only lowercase characters? More...
 
bool mutt_mb_is_display_corrupting_utf8 (wchar_t wc)
 Will this character corrupt the display? More...
 
int mutt_mb_filter_unprintable (char **s)
 Replace unprintable characters. More...
 

Variables

bool OptLocales
 (pseudo) set if user has valid locale definition More...
 

Detailed Description

Multi-byte String manipulation functions.

Authors
  • Richard Russon

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file mbyte.c.

Function Documentation

◆ mutt_mb_charlen()

int mutt_mb_charlen ( const char *  s,
int *  width 
)

Count the bytes in a (multibyte) character.

Parameters
[in]sString to be examined
[out]widthNumber of screen columns the character would use
Return values
numBytes in the first (multibyte) character of input consumes
<0Conversion error
=0End of input
>0Length (bytes)

Definition at line 54 of file mbyte.c.

55 {
56  if (!s || (*s == '\0'))
57  return 0;
58 
59  wchar_t wc = 0;
60  mbstate_t mbstate = { 0 };
61  size_t k, n;
62 
63  n = mutt_str_len(s);
64  k = mbrtowc(&wc, s, n, &mbstate);
65  if (width)
66  *width = wcwidth(wc);
67  return ((k == (size_t) (-1)) || (k == (size_t) (-2))) ? -1 : k;
68 }
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:475
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_get_initials()

bool mutt_mb_get_initials ( const char *  name,
char *  buf,
size_t  buflen 
)

Turn a name into initials.

Parameters
nameString to be converted
bufBuffer for the result
buflenSize of the buffer
Return values
1Success
0Failure

Take a name, e.g. "John F. Kennedy" and reduce it to initials "JFK". The function saves the first character from each word. Words are delimited by whitespace, or hyphens (so "Jean-Pierre" becomes "JP").

Definition at line 82 of file mbyte.c.

83 {
84  if (!name || !buf)
85  return false;
86 
87  while (*name)
88  {
89  /* Char's length in bytes */
90  int clen = mutt_mb_charlen(name, NULL);
91  if (clen < 1)
92  return false;
93 
94  /* Ignore punctuation at the beginning of a word */
95  if ((clen == 1) && ispunct(*name))
96  {
97  name++;
98  continue;
99  }
100 
101  if (clen >= buflen)
102  return false;
103 
104  /* Copy one multibyte character */
105  buflen -= clen;
106  while (clen--)
107  *buf++ = *name++;
108 
109  /* Skip to end-of-word */
110  for (; *name; name += clen)
111  {
112  clen = mutt_mb_charlen(name, NULL);
113  if (clen < 1)
114  return false;
115  if ((clen == 1) && (isspace(*name) || (*name == '-')))
116  break;
117  }
118 
119  /* Skip any whitespace, or hyphens */
120  while (*name && (isspace(*name) || (*name == '-')))
121  name++;
122  }
123 
124  *buf = '\0';
125  return true;
126 }
int mutt_mb_charlen(const char *s, int *width)
Count the bytes in a (multibyte) character.
Definition: mbyte.c:54
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_width()

int mutt_mb_width ( const char *  str,
int  col,
bool  display 
)

Measure a string's display width (in screen columns)

Parameters
strString to measure
colDisplay column (used for expanding tabs)
displaywill this be displayed to the user?
Return values
numStrings width in screen columns

This is like wcwidth(), but gets const char* not wchar_t*.

Definition at line 137 of file mbyte.c.

138 {
139  wchar_t wc = 0;
140  int l, w = 0, nl = 0;
141  const char *p = str;
142 
143  while (p && *p)
144  {
145  if (mbtowc(&wc, p, MB_CUR_MAX) >= 0)
146  {
147  l = wcwidth(wc);
148  if (l < 0)
149  l = 1;
150  /* correctly calc tab stop, even for sending as the
151  * line should look pretty on the receiving end */
152  if ((wc == L'\t') || (nl && (wc == L' ')))
153  {
154  nl = 0;
155  l = 8 - (col % 8);
156  }
157  /* track newlines for display-case: if we have a space
158  * after a newline, assume 8 spaces as for display we
159  * always tab-fold */
160  else if (display && (wc == '\n'))
161  nl = 1;
162  }
163  else
164  l = 1;
165  w += l;
166  p++;
167  }
168  return w;
169 }
+ Here is the caller graph for this function:

◆ mutt_mb_wcwidth()

int mutt_mb_wcwidth ( wchar_t  wc)

Measure the screen width of a character.

Parameters
wcCharacter to examine
Return values
numWidth in screen columns

Definition at line 176 of file mbyte.c.

177 {
178  int n = wcwidth(wc);
179  if (IsWPrint(wc) && (n > 0))
180  return n;
181  if (!(wc & ~0x7f))
182  return 2;
183  if (!(wc & ~0xffff))
184  return 6;
185  return 10;
186 }
#define IsWPrint(wc)
Definition: mbyte.h:39
+ Here is the caller graph for this function:

◆ mutt_mb_wcswidth()

int mutt_mb_wcswidth ( const wchar_t *  s,
size_t  n 
)

Measure the screen width of a string.

Parameters
sString to measure
nLength of string in characters
Return values
numWidth in screen columns

Definition at line 194 of file mbyte.c.

195 {
196  if (!s)
197  return 0;
198 
199  int w = 0;
200  while (n--)
201  w += mutt_mb_wcwidth(*s++);
202  return w;
203 }
int mutt_mb_wcwidth(wchar_t wc)
Measure the screen width of a character.
Definition: mbyte.c:176
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_width_ceiling()

size_t mutt_mb_width_ceiling ( const wchar_t *  s,
size_t  n,
int  w1 
)

Keep the end of the string on-screen.

Parameters
sString being displayed
nLength of string in characters
w1Width limit
Return values
numChars to skip

Given a string and a width, determine how many characters from the beginning of the string should be skipped so that the string fits.

Definition at line 215 of file mbyte.c.

216 {
217  if (!s)
218  return 0;
219 
220  const wchar_t *s0 = s;
221  int w = 0;
222  for (; n; s++, n--)
223  if ((w += mutt_mb_wcwidth(*s)) > w1)
224  break;
225  return s - s0;
226 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_wcstombs()

void mutt_mb_wcstombs ( char *  dest,
size_t  dlen,
const wchar_t *  src,
size_t  slen 
)

Convert a string from wide to multibyte characters.

Parameters
destBuffer for the result
dlenLength of the result buffer
srcSource string to convert
slenLength of the source string

Definition at line 235 of file mbyte.c.

236 {
237  if (!dest || !src)
238  return;
239 
240  mbstate_t mbstate = { 0 };
241  size_t k;
242 
243  /* First convert directly into the destination buffer */
244  for (; slen && dlen >= MB_LEN_MAX; dest += k, dlen -= k, src++, slen--)
245  {
246  k = wcrtomb(dest, *src, &mbstate);
247  if (k == (size_t) (-1))
248  break;
249  }
250 
251  /* If this works, we can stop now */
252  if (dlen >= MB_LEN_MAX)
253  {
254  dest += wcrtomb(dest, 0, &mbstate);
255  return;
256  }
257 
258  /* Otherwise convert any remaining data into a local buffer */
259  {
260  char buf[3 * MB_LEN_MAX];
261  char *p = buf;
262 
263  for (; slen && p - buf < dlen; p += k, src++, slen--)
264  {
265  k = wcrtomb(p, *src, &mbstate);
266  if (k == (size_t) (-1))
267  break;
268  }
269  p += wcrtomb(p, 0, &mbstate);
270 
271  /* If it fits into the destination buffer, we can stop now */
272  if (p - buf <= dlen)
273  {
274  memcpy(dest, buf, p - buf);
275  return;
276  }
277 
278  /* Otherwise we truncate the string in an ugly fashion */
279  memcpy(dest, buf, dlen);
280  dest[dlen - 1] = '\0'; /* assume original dlen > 0 */
281  }
282 }
+ Here is the caller graph for this function:

◆ mutt_mb_mbstowcs()

size_t mutt_mb_mbstowcs ( wchar_t **  pwbuf,
size_t *  pwbuflen,
size_t  i,
const char *  buf 
)

Convert a string from multibyte to wide characters.

Parameters
[out]pwbufBuffer for the result
[out]pwbuflenLength of the result buffer
[in]iStarting index into the result buffer
[in]bufString to convert
Return values
numFirst character after the result

Definition at line 292 of file mbyte.c.

293 {
294  if (!pwbuf || !pwbuflen || !buf)
295  return 0;
296 
297  wchar_t wc = 0;
298  mbstate_t mbstate = { 0 };
299  size_t k;
300  wchar_t *wbuf = *pwbuf;
301  size_t wbuflen = *pwbuflen;
302 
303  while (*buf != '\0')
304  {
305  memset(&mbstate, 0, sizeof(mbstate));
306  for (; (k = mbrtowc(&wc, buf, MB_LEN_MAX, &mbstate)) &&
307  k != (size_t) (-1) && k != (size_t) (-2);
308  buf += k)
309  {
310  if (i >= wbuflen)
311  {
312  wbuflen = i + 20;
313  mutt_mem_realloc(&wbuf, wbuflen * sizeof(*wbuf));
314  }
315  wbuf[i++] = wc;
316  }
317  if ((*buf != '\0') && ((k == (size_t) -1) || (k == (size_t) -2)))
318  {
319  if (i >= wbuflen)
320  {
321  wbuflen = i + 20;
322  mutt_mem_realloc(&wbuf, wbuflen * sizeof(*wbuf));
323  }
324  wbuf[i++] = ReplacementChar;
325  buf++;
326  }
327  }
328  *pwbuf = wbuf;
329  *pwbuflen = wbuflen;
330  return i;
331 }
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
wchar_t ReplacementChar
When a Unicode character can't be displayed, use this instead.
Definition: charset.c:57
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_is_shell_char()

bool mutt_mb_is_shell_char ( wchar_t  ch)

Is character not typically part of a pathname.

Parameters
chCharacter to examine
Return values
trueCharacter is not typically part of a pathname
falseCharacter is typically part of a pathname
Note
The name is very confusing.

Definition at line 341 of file mbyte.c.

342 {
343  static const wchar_t shell_chars[] = L"<>&()$?*;{}| "; /* ! not included because it can be part of a pathname in NeoMutt */
344  return wcschr(shell_chars, ch);
345 }
+ Here is the caller graph for this function:

◆ mutt_mb_is_lower()

bool mutt_mb_is_lower ( const char *  s)

Does a multi-byte string contain only lowercase characters?

Parameters
sString to check
Return values
trueString contains no uppercase characters
falseError, or contains some uppercase characters

Non-alphabetic characters are considered lowercase.

Definition at line 355 of file mbyte.c.

356 {
357  if (!s)
358  return false;
359 
360  wchar_t wc = 0;
361  mbstate_t mbstate = { 0 };
362  size_t l;
363 
364  memset(&mbstate, 0, sizeof(mbstate));
365 
366  for (; (l = mbrtowc(&wc, s, MB_CUR_MAX, &mbstate)) != 0; s += l)
367  {
368  if (l == (size_t) -2)
369  continue; /* shift sequences */
370  if (l == (size_t) -1)
371  return false;
372  if (iswalpha((wint_t) wc) && iswupper((wint_t) wc))
373  return false;
374  }
375 
376  return true;
377 }
+ Here is the caller graph for this function:

◆ mutt_mb_is_display_corrupting_utf8()

bool mutt_mb_is_display_corrupting_utf8 ( wchar_t  wc)

Will this character corrupt the display?

Parameters
wcCharacter to examine
Return values
trueCharacter would corrupt the display
falseCharacter is safe to display
Note
This list isn't complete.

Definition at line 387 of file mbyte.c.

388 {
389  if ((wc == (wchar_t) 0x00ad) || /* soft hyphen */
390  (wc == (wchar_t) 0x200e) || /* left-to-right mark */
391  (wc == (wchar_t) 0x200f) || /* right-to-left mark */
392  (wc == (wchar_t) 0xfeff)) /* zero width no-break space */
393  {
394  return true;
395  }
396 
397  /* left-to-right isolate, right-to-left isolate, first strong isolate,
398  * pop directional isolate */
399  if ((wc >= (wchar_t) 0x2066) && (wc <= (wchar_t) 0x2069))
400  return true;
401 
402  /* left-to-right embedding, right-to-left embedding, pop directional formatting,
403  * left-to-right override, right-to-left override */
404  if ((wc >= (wchar_t) 0x202a) && (wc <= (wchar_t) 0x202e))
405  return true;
406 
407  return false;
408 }
+ Here is the caller graph for this function:

◆ mutt_mb_filter_unprintable()

int mutt_mb_filter_unprintable ( char **  s)

Replace unprintable characters.

Parameters
[in,out]sString to modify
Return values
0Success
-1Error

Unprintable characters will be replaced with ReplacementChar.

Note
The source string will be freed and a newly allocated string will be returned in its place. The caller should free the returned string.

Definition at line 421 of file mbyte.c.

422 {
423  if (!s || !*s)
424  return -1;
425 
426  wchar_t wc = 0;
427  size_t k, k2;
428  char scratch[MB_LEN_MAX + 1];
429  char *p = *s;
430  mbstate_t mbstate1 = { 0 };
431  mbstate_t mbstate2 = { 0 };
432 
433  struct Buffer buf = mutt_buffer_make(0);
434  for (; (k = mbrtowc(&wc, p, MB_LEN_MAX, &mbstate1)); p += k)
435  {
436  if ((k == (size_t) -1) || (k == (size_t) -2))
437  {
438  k = 1;
439  memset(&mbstate1, 0, sizeof(mbstate1));
440  wc = ReplacementChar;
441  }
442  if (!IsWPrint(wc))
443  wc = '?';
445  continue;
446  k2 = wcrtomb(scratch, wc, &mbstate2);
447  scratch[k2] = '\0';
448  mutt_buffer_addstr(&buf, scratch);
449  }
450  FREE(s);
451  *s = buf.data ? buf.data : mutt_mem_calloc(1, 1);
452  return 0;
453 }
struct Buffer mutt_buffer_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:61
size_t mutt_buffer_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:225
bool mutt_mb_is_display_corrupting_utf8(wchar_t wc)
Will this character corrupt the display?
Definition: mbyte.c:387
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
#define FREE(x)
Definition: memory.h:40
bool CharsetIsUtf8
Is the user's current character set utf-8?
Definition: charset.c:62
String manipulation buffer.
Definition: buffer.h:34
char * data
Pointer to data.
Definition: buffer.h:35
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Variable Documentation

◆ OptLocales

bool OptLocales

(pseudo) set if user has valid locale definition

Definition at line 43 of file mbyte.c.