NeoMutt  2023-05-17-56-ga67199
Teaching an old dog new tricks
DOXYGEN
mbyte.h File Reference

Multi-byte String manipulation functions. More...

#include "config.h"
#include <ctype.h>
#include <stdbool.h>
#include <wchar.h>
#include <wctype.h>
+ Include dependency graph for mbyte.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define IsPrint(ch)   (isprint((unsigned char) (ch)) || (OptLocales ? 0 : ((unsigned char) (ch) >= 0xa0)))
 
#define IsWPrint(wc)   (iswprint(wc) || (OptLocales ? 0 : (wc >= 0xa0)))
 

Functions

int mutt_mb_charlen (const char *s, int *width)
 Count the bytes in a (multibyte) character. More...
 
int mutt_mb_filter_unprintable (char **s)
 Replace unprintable characters. More...
 
bool mutt_mb_get_initials (const char *name, char *buf, size_t buflen)
 Turn a name into initials. More...
 
bool mutt_mb_is_display_corrupting_utf8 (wchar_t wc)
 Will this character corrupt the display? More...
 
bool mutt_mb_is_lower (const char *s)
 Does a multi-byte string contain only lowercase characters? More...
 
bool mutt_mb_is_shell_char (wchar_t ch)
 Is character not typically part of a pathname. More...
 
size_t mutt_mb_mbstowcs (wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)
 Convert a string from multibyte to wide characters. More...
 
void mutt_mb_wcstombs (char *dest, size_t dlen, const wchar_t *src, size_t slen)
 Convert a string from wide to multibyte characters. More...
 
int mutt_mb_wcswidth (const wchar_t *s, size_t n)
 Measure the screen width of a string. More...
 
int mutt_mb_wcwidth (wchar_t wc)
 Measure the screen width of a character. More...
 
int mutt_mb_width (const char *str, int col, bool display)
 Measure a string's display width (in screen columns) More...
 
size_t mutt_mb_width_ceiling (const wchar_t *s, size_t n, int w1)
 Keep the end of the string on-screen. More...
 

Variables

bool OptLocales
 (pseudo) set if user has valid locale definition More...
 

Detailed Description

Multi-byte String manipulation functions.

Authors
  • Richard Russon

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file mbyte.h.

Macro Definition Documentation

◆ IsPrint

#define IsPrint (   ch)    (isprint((unsigned char) (ch)) || (OptLocales ? 0 : ((unsigned char) (ch) >= 0xa0)))

Definition at line 38 of file mbyte.h.

◆ IsWPrint

#define IsWPrint (   wc)    (iswprint(wc) || (OptLocales ? 0 : (wc >= 0xa0)))

Definition at line 39 of file mbyte.h.

Function Documentation

◆ mutt_mb_charlen()

int mutt_mb_charlen ( const char *  s,
int *  width 
)

Count the bytes in a (multibyte) character.

Parameters
[in]sString to be examined
[out]widthNumber of screen columns the character would use
Return values
numBytes in the first (multibyte) character of input consumes
<0Conversion error
=0End of input
>0Length (bytes)

Definition at line 54 of file mbyte.c.

55{
56 if (!s || (*s == '\0'))
57 return 0;
58
59 wchar_t wc = 0;
60 mbstate_t mbstate = { 0 };
61 size_t k, n;
62
63 n = mutt_str_len(s);
64 k = mbrtowc(&wc, s, n, &mbstate);
65 if (width)
66 *width = wcwidth(wc);
67 return ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL)) ? -1 : k;
68}
#define ICONV_BUF_TOO_SMALL
Error value for iconv() - Buffer too small.
Definition: charset.h:105
#define ICONV_ILLEGAL_SEQ
Error value for iconv() - Illegal sequence.
Definition: charset.h:103
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:568
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_filter_unprintable()

int mutt_mb_filter_unprintable ( char **  s)

Replace unprintable characters.

Parameters
[in,out]sString to modify
Return values
0Success
-1Error

Unprintable characters will be replaced with ReplacementChar.

Note
The source string will be freed and a newly allocated string will be returned in its place. The caller should free the returned string.

Definition at line 423 of file mbyte.c.

424{
425 if (!s || !*s)
426 return -1;
427
428 wchar_t wc = 0;
429 size_t k, k2;
430 char scratch[MB_LEN_MAX + 1];
431 char *p = *s;
432 mbstate_t mbstate1 = { 0 };
433 mbstate_t mbstate2 = { 0 };
434
435 struct Buffer buf = buf_make(0);
436 for (; (k = mbrtowc(&wc, p, MB_LEN_MAX, &mbstate1)); p += k)
437 {
438 if ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL))
439 {
440 k = 1;
441 memset(&mbstate1, 0, sizeof(mbstate1));
442 wc = ReplacementChar;
443 }
444 if (!IsWPrint(wc))
445 wc = '?';
447 continue;
448 k2 = wcrtomb(scratch, wc, &mbstate2);
449 scratch[k2] = '\0';
450 buf_addstr(&buf, scratch);
451 }
452 FREE(s);
453 *s = buf.data ? buf.data : mutt_mem_calloc(1, 1);
454 return 0;
455}
struct Buffer buf_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:70
size_t buf_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:238
bool mutt_mb_is_display_corrupting_utf8(wchar_t wc)
Will this character corrupt the display?
Definition: mbyte.c:389
#define IsWPrint(wc)
Definition: mbyte.h:39
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
#define FREE(x)
Definition: memory.h:43
bool CharsetIsUtf8
Is the user's current character set utf-8?
Definition: charset.c:59
wchar_t ReplacementChar
When a Unicode character can't be displayed, use this instead.
Definition: charset.c:54
String manipulation buffer.
Definition: buffer.h:34
char * data
Pointer to data.
Definition: buffer.h:35
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_get_initials()

bool mutt_mb_get_initials ( const char *  name,
char *  buf,
size_t  buflen 
)

Turn a name into initials.

Parameters
nameString to be converted
bufBuffer for the result
buflenSize of the buffer
Return values
1Success
0Failure

Take a name, e.g. "John F. Kennedy" and reduce it to initials "JFK". The function saves the first character from each word. Words are delimited by whitespace, or hyphens (so "Jean-Pierre" becomes "JP").

Definition at line 82 of file mbyte.c.

83{
84 if (!name || !buf)
85 return false;
86
87 while (*name)
88 {
89 /* Char's length in bytes */
90 int clen = mutt_mb_charlen(name, NULL);
91 if (clen < 1)
92 return false;
93
94 /* Ignore punctuation at the beginning of a word */
95 if ((clen == 1) && ispunct(*name))
96 {
97 name++;
98 continue;
99 }
100
101 if (clen >= buflen)
102 return false;
103
104 /* Copy one multibyte character */
105 buflen -= clen;
106 while (clen--)
107 *buf++ = *name++;
108
109 /* Skip to end-of-word */
110 for (; *name; name += clen)
111 {
112 clen = mutt_mb_charlen(name, NULL);
113 if (clen < 1)
114 return false;
115 if ((clen == 1) && (isspace(*name) || (*name == '-')))
116 break;
117 }
118
119 /* Skip any whitespace, or hyphens */
120 while (*name && (isspace(*name) || (*name == '-')))
121 name++;
122 }
123
124 *buf = '\0';
125 return true;
126}
int mutt_mb_charlen(const char *s, int *width)
Count the bytes in a (multibyte) character.
Definition: mbyte.c:54
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_is_display_corrupting_utf8()

bool mutt_mb_is_display_corrupting_utf8 ( wchar_t  wc)

Will this character corrupt the display?

Parameters
wcCharacter to examine
Return values
trueCharacter would corrupt the display
falseCharacter is safe to display
Note
This list isn't complete.

Definition at line 389 of file mbyte.c.

390{
391 if ((wc == (wchar_t) 0x00ad) || /* soft hyphen */
392 (wc == (wchar_t) 0x200e) || /* left-to-right mark */
393 (wc == (wchar_t) 0x200f) || /* right-to-left mark */
394 (wc == (wchar_t) 0xfeff)) /* zero width no-break space */
395 {
396 return true;
397 }
398
399 /* left-to-right isolate, right-to-left isolate, first strong isolate,
400 * pop directional isolate */
401 if ((wc >= (wchar_t) 0x2066) && (wc <= (wchar_t) 0x2069))
402 return true;
403
404 /* left-to-right embedding, right-to-left embedding, pop directional formatting,
405 * left-to-right override, right-to-left override */
406 if ((wc >= (wchar_t) 0x202a) && (wc <= (wchar_t) 0x202e))
407 return true;
408
409 return false;
410}
+ Here is the caller graph for this function:

◆ mutt_mb_is_lower()

bool mutt_mb_is_lower ( const char *  s)

Does a multi-byte string contain only lowercase characters?

Parameters
sString to check
Return values
trueString contains no uppercase characters
falseError, or contains some uppercase characters

Non-alphabetic characters are considered lowercase.

Definition at line 357 of file mbyte.c.

358{
359 if (!s)
360 return false;
361
362 wchar_t wc = 0;
363 mbstate_t mbstate = { 0 };
364 size_t l;
365
366 memset(&mbstate, 0, sizeof(mbstate));
367
368 for (; (l = mbrtowc(&wc, s, MB_CUR_MAX, &mbstate)) != 0; s += l)
369 {
370 if (l == ICONV_BUF_TOO_SMALL)
371 continue; /* shift sequences */
372 if (l == ICONV_ILLEGAL_SEQ)
373 return false;
374 if (iswalpha((wint_t) wc) && iswupper((wint_t) wc))
375 return false;
376 }
377
378 return true;
379}
+ Here is the caller graph for this function:

◆ mutt_mb_is_shell_char()

bool mutt_mb_is_shell_char ( wchar_t  ch)

Is character not typically part of a pathname.

Parameters
chCharacter to examine
Return values
trueCharacter is not typically part of a pathname
falseCharacter is typically part of a pathname
Note
The name is very confusing.

Definition at line 343 of file mbyte.c.

344{
345 static const wchar_t shell_chars[] = L"<>&()$?*;{}| "; /* ! not included because it can be part of a pathname in NeoMutt */
346 return wcschr(shell_chars, ch);
347}
+ Here is the caller graph for this function:

◆ mutt_mb_mbstowcs()

size_t mutt_mb_mbstowcs ( wchar_t **  pwbuf,
size_t *  pwbuflen,
size_t  i,
const char *  buf 
)

Convert a string from multibyte to wide characters.

Parameters
[out]pwbufBuffer for the result
[out]pwbuflenLength of the result buffer
[in]iStarting index into the result buffer
[in]bufString to convert
Return values
numFirst character after the result

Definition at line 294 of file mbyte.c.

295{
296 if (!pwbuf || !pwbuflen || !buf)
297 return 0;
298
299 wchar_t wc = 0;
300 mbstate_t mbstate = { 0 };
301 size_t k;
302 wchar_t *wbuf = *pwbuf;
303 size_t wbuflen = *pwbuflen;
304
305 while (*buf != '\0')
306 {
307 memset(&mbstate, 0, sizeof(mbstate));
308 for (; (k = mbrtowc(&wc, buf, MB_LEN_MAX, &mbstate)) &&
309 (k != ICONV_ILLEGAL_SEQ) && (k != ICONV_BUF_TOO_SMALL);
310 buf += k)
311 {
312 if (i >= wbuflen)
313 {
314 wbuflen = i + 20;
315 mutt_mem_realloc(&wbuf, wbuflen * sizeof(*wbuf));
316 }
317 wbuf[i++] = wc;
318 }
319 if ((*buf != '\0') && ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL)))
320 {
321 if (i >= wbuflen)
322 {
323 wbuflen = i + 20;
324 mutt_mem_realloc(&wbuf, wbuflen * sizeof(*wbuf));
325 }
326 wbuf[i++] = ReplacementChar;
327 buf++;
328 }
329 }
330 *pwbuf = wbuf;
331 *pwbuflen = wbuflen;
332 return i;
333}
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_wcstombs()

void mutt_mb_wcstombs ( char *  dest,
size_t  dlen,
const wchar_t *  src,
size_t  slen 
)

Convert a string from wide to multibyte characters.

Parameters
destBuffer for the result
dlenLength of the result buffer
srcSource string to convert
slenLength of the source string

Definition at line 237 of file mbyte.c.

238{
239 if (!dest || !src)
240 return;
241
242 mbstate_t mbstate = { 0 };
243 size_t k;
244
245 /* First convert directly into the destination buffer */
246 for (; slen && (dlen >= MB_LEN_MAX); dest += k, dlen -= k, src++, slen--)
247 {
248 k = wcrtomb(dest, *src, &mbstate);
249 if (k == ICONV_ILLEGAL_SEQ)
250 break;
251 }
252
253 /* If this works, we can stop now */
254 if (dlen >= MB_LEN_MAX)
255 {
256 dest += wcrtomb(dest, 0, &mbstate);
257 return;
258 }
259
260 /* Otherwise convert any remaining data into a local buffer */
261 {
262 char buf[3 * MB_LEN_MAX];
263 char *p = buf;
264
265 for (; slen && p - buf < dlen; p += k, src++, slen--)
266 {
267 k = wcrtomb(p, *src, &mbstate);
268 if (k == ICONV_ILLEGAL_SEQ)
269 break;
270 }
271 p += wcrtomb(p, 0, &mbstate);
272
273 /* If it fits into the destination buffer, we can stop now */
274 if (p - buf <= dlen)
275 {
276 memcpy(dest, buf, p - buf);
277 return;
278 }
279
280 /* Otherwise we truncate the string in an ugly fashion */
281 memcpy(dest, buf, dlen);
282 dest[dlen - 1] = '\0'; /* assume original dlen > 0 */
283 }
284}
+ Here is the caller graph for this function:

◆ mutt_mb_wcswidth()

int mutt_mb_wcswidth ( const wchar_t *  s,
size_t  n 
)

Measure the screen width of a string.

Parameters
sString to measure
nLength of string in characters
Return values
numWidth in screen columns

Definition at line 196 of file mbyte.c.

197{
198 if (!s)
199 return 0;
200
201 int w = 0;
202 while (n--)
203 w += mutt_mb_wcwidth(*s++);
204 return w;
205}
int mutt_mb_wcwidth(wchar_t wc)
Measure the screen width of a character.
Definition: mbyte.c:178
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_wcwidth()

int mutt_mb_wcwidth ( wchar_t  wc)

Measure the screen width of a character.

Parameters
wcCharacter to examine
Return values
numWidth in screen columns

Definition at line 178 of file mbyte.c.

179{
180 int n = wcwidth(wc);
181 if (IsWPrint(wc) && (n > 0))
182 return n;
183 if (!(wc & ~0x7f))
184 return 2;
185 if (!(wc & ~0xffff))
186 return 6;
187 return 10;
188}
+ Here is the caller graph for this function:

◆ mutt_mb_width()

int mutt_mb_width ( const char *  str,
int  col,
bool  display 
)

Measure a string's display width (in screen columns)

Parameters
strString to measure
colDisplay column (used for expanding tabs)
displaywill this be displayed to the user?
Return values
numStrings width in screen columns

This is like wcwidth(), but gets const char* not wchar_t*.

Definition at line 137 of file mbyte.c.

138{
139 wchar_t wc = 0;
140 int l, w = 0, nl = 0;
141 const char *p = str;
142
143 while (p && *p)
144 {
145 if (mbtowc(&wc, p, MB_CUR_MAX) >= 0)
146 {
147 l = wcwidth(wc);
148 if (l < 0)
149 l = 1;
150 /* correctly calc tab stop, even for sending as the
151 * line should look pretty on the receiving end */
152 if ((wc == L'\t') || (nl && (wc == L' ')))
153 {
154 nl = 0;
155 l = 8 - (col % 8);
156 }
157 /* track newlines for display-case: if we have a space
158 * after a newline, assume 8 spaces as for display we
159 * always tab-fold */
160 else if (display && (wc == '\n'))
161 nl = 1;
162 }
163 else
164 {
165 l = 1;
166 }
167 w += l;
168 p++;
169 }
170 return w;
171}
+ Here is the caller graph for this function:

◆ mutt_mb_width_ceiling()

size_t mutt_mb_width_ceiling ( const wchar_t *  s,
size_t  n,
int  w1 
)

Keep the end of the string on-screen.

Parameters
sString being displayed
nLength of string in characters
w1Width limit
Return values
numChars to skip

Given a string and a width, determine how many characters from the beginning of the string should be skipped so that the string fits.

Definition at line 217 of file mbyte.c.

218{
219 if (!s)
220 return 0;
221
222 const wchar_t *s0 = s;
223 int w = 0;
224 for (; n; s++, n--)
225 if ((w += mutt_mb_wcwidth(*s)) > w1)
226 break;
227 return s - s0;
228}
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Variable Documentation

◆ OptLocales

bool OptLocales
extern

(pseudo) set if user has valid locale definition

Definition at line 43 of file mbyte.c.