NeoMutt
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
mbyte.c File Reference

Multi-byte String manipulation functions. More...

#include "config.h"
#include <ctype.h>
#include <limits.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include <wctype.h>
#include "mbyte.h"
#include "buffer.h"
#include "charset.h"
#include "memory.h"
#include "string2.h"
+ Include dependency graph for mbyte.c:

Go to the source code of this file.

Functions

int mutt_mb_charlen (const char *s, int *width)
 Count the bytes in a (multibyte) character.
 
bool mutt_mb_get_initials (const char *name, char *buf, size_t buflen)
 Turn a name into initials.
 
int mutt_mb_width (const char *str, int col, bool indent)
 Measure a string's display width (in screen columns)
 
int mutt_mb_wcwidth (wchar_t wc)
 Measure the screen width of a character.
 
int mutt_mb_wcswidth (const wchar_t *s, size_t n)
 Measure the screen width of a string.
 
size_t mutt_mb_width_ceiling (const wchar_t *s, size_t n, int w1)
 Keep the end of the string on-screen.
 
void buf_mb_wcstombs (struct Buffer *dest, const wchar_t *wstr, size_t wlen)
 Convert a string from wide to multibyte characters.
 
size_t mutt_mb_mbstowcs (wchar_t **pwbuf, size_t *pwbuflen, size_t i, const char *buf)
 Convert a string from multibyte to wide characters.
 
bool mutt_mb_is_shell_char (wchar_t ch)
 Is character not typically part of a pathname.
 
bool mutt_mb_is_lower (const char *s)
 Does a multi-byte string contain only lowercase characters?
 
bool mutt_mb_is_display_corrupting_utf8 (wchar_t wc)
 Will this character corrupt the display?
 
int mutt_mb_filter_unprintable (char **s)
 Replace unprintable characters.
 

Variables

bool OptLocales
 (pseudo) set if user has valid locale definition
 

Detailed Description

Multi-byte String manipulation functions.

Authors
  • Richard Russon

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file mbyte.c.

Function Documentation

◆ mutt_mb_charlen()

int mutt_mb_charlen ( const char *  s,
int *  width 
)

Count the bytes in a (multibyte) character.

Parameters
[in]sString to be examined
[out]widthNumber of screen columns the character would use
Return values
numBytes in the first (multibyte) character of input consumes
<0Conversion error
=0End of input
>0Length (bytes)

Definition at line 54 of file mbyte.c.

55{
56 if (!s || (*s == '\0'))
57 return 0;
58
59 wchar_t wc = 0;
60 mbstate_t mbstate = { 0 };
61
62 size_t n = mutt_str_len(s);
63 size_t k = mbrtowc(&wc, s, n, &mbstate);
64 if (width)
65 *width = wcwidth(wc);
66 return ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL)) ? -1 : k;
67}
#define ICONV_BUF_TOO_SMALL
Error value for iconv() - Buffer too small.
Definition: charset.h:105
#define ICONV_ILLEGAL_SEQ
Error value for iconv() - Illegal sequence.
Definition: charset.h:103
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:568
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_get_initials()

bool mutt_mb_get_initials ( const char *  name,
char *  buf,
size_t  buflen 
)

Turn a name into initials.

Parameters
nameString to be converted
bufBuffer for the result
buflenSize of the buffer
Return values
1Success
0Failure

Take a name, e.g. "John F. Kennedy" and reduce it to initials "JFK". The function saves the first character from each word. Words are delimited by whitespace, or hyphens (so "Jean-Pierre" becomes "JP").

Definition at line 81 of file mbyte.c.

82{
83 if (!name || !buf)
84 return false;
85
86 while (*name)
87 {
88 /* Char's length in bytes */
89 int clen = mutt_mb_charlen(name, NULL);
90 if (clen < 1)
91 return false;
92
93 /* Ignore punctuation at the beginning of a word */
94 if ((clen == 1) && ispunct(*name))
95 {
96 name++;
97 continue;
98 }
99
100 if (clen >= buflen)
101 return false;
102
103 /* Copy one multibyte character */
104 buflen -= clen;
105 while (clen--)
106 *buf++ = *name++;
107
108 /* Skip to end-of-word */
109 for (; *name; name += clen)
110 {
111 clen = mutt_mb_charlen(name, NULL);
112 if (clen < 1)
113 return false;
114 if ((clen == 1) && (isspace(*name) || (*name == '-')))
115 break;
116 }
117
118 /* Skip any whitespace, or hyphens */
119 while (*name && (isspace(*name) || (*name == '-')))
120 name++;
121 }
122
123 *buf = '\0';
124 return true;
125}
int mutt_mb_charlen(const char *s, int *width)
Count the bytes in a (multibyte) character.
Definition: mbyte.c:54
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_width()

int mutt_mb_width ( const char *  str,
int  col,
bool  indent 
)

Measure a string's display width (in screen columns)

Parameters
strString to measure
colDisplay column (used for expanding tabs)
indentIf true, newline-space will be indented 8 chars
Return values
numString's width in screen columns

This is like wcwidth(), but gets const char* not wchar_t*.

Definition at line 136 of file mbyte.c.

137{
138 if (!str || !*str)
139 return 0;
140
141 bool nl = false;
142 int total_width = 0;
143 mbstate_t mbstate = { 0 };
144
145 size_t str_len = mutt_str_len(str);
146
147 while (*str && (str_len > 0))
148 {
149 wchar_t wc = L'\0';
150 size_t consumed = mbrtowc(&wc, str, str_len, &mbstate);
151 if (consumed == 0)
152 break;
153
154 if (consumed == ICONV_ILLEGAL_SEQ)
155 {
156 memset(&mbstate, 0, sizeof(mbstate));
157 wc = ReplacementChar;
158 consumed = 1;
159 }
160 else if (consumed == ICONV_BUF_TOO_SMALL)
161 {
162 wc = ReplacementChar;
163 consumed = str_len;
164 }
165
166 int wchar_width = wcwidth(wc);
167 if (wchar_width < 0)
168 wchar_width = 1;
169
170 if ((wc == L'\t') || (nl && (wc == L' ')))
171 {
172 /* correctly calc tab stop, even for sending as the line should look
173 * pretty on the receiving end */
174 nl = false;
175 wchar_width = 8 - (col % 8);
176 }
177 else if (indent && (wc == '\n'))
178 {
179 /* track newlines for display-case: if we have a space after a newline,
180 * assume 8 spaces as for display we always tab-fold */
181 nl = true;
182 }
183
184 total_width += wchar_width;
185 str += consumed;
186 str_len -= consumed;
187 }
188
189 return total_width;
190}
wchar_t ReplacementChar
When a Unicode character can't be displayed, use this instead.
Definition: charset.c:58
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_wcwidth()

int mutt_mb_wcwidth ( wchar_t  wc)

Measure the screen width of a character.

Parameters
wcCharacter to examine
Return values
numWidth in screen columns

Definition at line 197 of file mbyte.c.

198{
199 int n = wcwidth(wc);
200 if (IsWPrint(wc) && (n > 0))
201 return n;
202 if (!(wc & ~0x7f))
203 return 2;
204 if (!(wc & ~0xffff))
205 return 6;
206 return 10;
207}
#define IsWPrint(wc)
Definition: mbyte.h:41
+ Here is the caller graph for this function:

◆ mutt_mb_wcswidth()

int mutt_mb_wcswidth ( const wchar_t *  s,
size_t  n 
)

Measure the screen width of a string.

Parameters
sString to measure
nLength of string in characters
Return values
numWidth in screen columns

Definition at line 215 of file mbyte.c.

216{
217 if (!s)
218 return 0;
219
220 int w = 0;
221 while (n--)
222 w += mutt_mb_wcwidth(*s++);
223 return w;
224}
int mutt_mb_wcwidth(wchar_t wc)
Measure the screen width of a character.
Definition: mbyte.c:197
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_width_ceiling()

size_t mutt_mb_width_ceiling ( const wchar_t *  s,
size_t  n,
int  w1 
)

Keep the end of the string on-screen.

Parameters
sString being displayed
nLength of string in characters
w1Width limit
Return values
numChars to skip

Given a string and a width, determine how many characters from the beginning of the string should be skipped so that the string fits.

Definition at line 236 of file mbyte.c.

237{
238 if (!s)
239 return 0;
240
241 const wchar_t *s0 = s;
242 int w = 0;
243 for (; n; s++, n--)
244 if ((w += mutt_mb_wcwidth(*s)) > w1)
245 break;
246 return s - s0;
247}
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ buf_mb_wcstombs()

void buf_mb_wcstombs ( struct Buffer dest,
const wchar_t *  wstr,
size_t  wlen 
)

Convert a string from wide to multibyte characters.

Parameters
destBuffer for the result
wstrSource wide string to convert
wlenLength of the wide string

Definition at line 255 of file mbyte.c.

256{
257 if (!dest || !wstr)
258 return;
259
260 // Give ourselves 4 utf-8 bytes per wide character
261 buf_alloc(dest, 4 * wlen);
262
263 mbstate_t mbstate = { 0 };
264 size_t k = 0;
265
266 char *buf = dest->data;
267 size_t buflen = dest->dsize;
268
269 for (; (wlen > 0) && (buflen >= MB_LEN_MAX); buf += k, buflen -= k, wstr++, wlen--)
270 {
271 k = wcrtomb(buf, *wstr, &mbstate);
272 if (k == ICONV_ILLEGAL_SEQ)
273 break;
274 if (*wstr == L'\0')
275 break;
276 }
277
278 *buf = '\0';
279 buf_fix_dptr(dest);
280}
void buf_fix_dptr(struct Buffer *buf)
Move the dptr to end of the Buffer.
Definition: buffer.c:194
void buf_alloc(struct Buffer *buf, size_t new_size)
Make sure a buffer can store at least new_size bytes.
Definition: buffer.c:349
size_t dsize
Length of data.
Definition: buffer.h:37
char * data
Pointer to data.
Definition: buffer.h:35
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_mbstowcs()

size_t mutt_mb_mbstowcs ( wchar_t **  pwbuf,
size_t *  pwbuflen,
size_t  i,
const char *  buf 
)

Convert a string from multibyte to wide characters.

Parameters
[out]pwbufBuffer for the result
[out]pwbuflenLength of the result buffer
[in]iStarting index into the result buffer
[in]bufString to convert
Return values
numFirst character after the result

Definition at line 290 of file mbyte.c.

291{
292 if (!pwbuf || !pwbuflen || !buf)
293 return 0;
294
295 wchar_t wc = 0;
296 mbstate_t mbstate = { 0 };
297 size_t k;
298 wchar_t *wbuf = *pwbuf;
299 size_t wbuflen = *pwbuflen;
300
301 while (*buf != '\0')
302 {
303 memset(&mbstate, 0, sizeof(mbstate));
304 for (; (k = mbrtowc(&wc, buf, MB_LEN_MAX, &mbstate)) &&
305 (k != ICONV_ILLEGAL_SEQ) && (k != ICONV_BUF_TOO_SMALL);
306 buf += k)
307 {
308 if (i >= wbuflen)
309 {
310 wbuflen = i + 20;
311 mutt_mem_realloc(&wbuf, wbuflen * sizeof(*wbuf));
312 }
313 wbuf[i++] = wc;
314 }
315 if ((*buf != '\0') && ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL)))
316 {
317 if (i >= wbuflen)
318 {
319 wbuflen = i + 20;
320 mutt_mem_realloc(&wbuf, wbuflen * sizeof(*wbuf));
321 }
322 wbuf[i++] = ReplacementChar;
323 buf++;
324 }
325 }
326 *pwbuf = wbuf;
327 *pwbuflen = wbuflen;
328 return i;
329}
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:114
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_mb_is_shell_char()

bool mutt_mb_is_shell_char ( wchar_t  ch)

Is character not typically part of a pathname.

Parameters
chCharacter to examine
Return values
trueCharacter is not typically part of a pathname
falseCharacter is typically part of a pathname
Note
The name is very confusing.

Definition at line 339 of file mbyte.c.

340{
341 static const wchar_t shell_chars[] = L"<>&()$?*;{}| "; /* ! not included because it can be part of a pathname in NeoMutt */
342 return wcschr(shell_chars, ch);
343}
+ Here is the caller graph for this function:

◆ mutt_mb_is_lower()

bool mutt_mb_is_lower ( const char *  s)

Does a multi-byte string contain only lowercase characters?

Parameters
sString to check
Return values
trueString contains no uppercase characters
falseError, or contains some uppercase characters

Non-alphabetic characters are considered lowercase.

Definition at line 353 of file mbyte.c.

354{
355 if (!s)
356 return false;
357
358 wchar_t wc = 0;
359 mbstate_t mbstate = { 0 };
360 size_t l;
361
362 memset(&mbstate, 0, sizeof(mbstate));
363
364 for (; (l = mbrtowc(&wc, s, MB_CUR_MAX, &mbstate)) != 0; s += l)
365 {
366 if (l == ICONV_BUF_TOO_SMALL)
367 continue; /* shift sequences */
368 if (l == ICONV_ILLEGAL_SEQ)
369 return false;
370 if (iswalpha((wint_t) wc) && iswupper((wint_t) wc))
371 return false;
372 }
373
374 return true;
375}
+ Here is the caller graph for this function:

◆ mutt_mb_is_display_corrupting_utf8()

bool mutt_mb_is_display_corrupting_utf8 ( wchar_t  wc)

Will this character corrupt the display?

Parameters
wcCharacter to examine
Return values
trueCharacter would corrupt the display
falseCharacter is safe to display
Note
This list isn't complete.

Definition at line 385 of file mbyte.c.

386{
387 if ((wc == (wchar_t) 0x00ad) || /* soft hyphen */
388 (wc == (wchar_t) 0x200e) || /* left-to-right mark */
389 (wc == (wchar_t) 0x200f) || /* right-to-left mark */
390 (wc == (wchar_t) 0xfeff)) /* zero width no-break space */
391 {
392 return true;
393 }
394
395 /* left-to-right isolate, right-to-left isolate, first strong isolate,
396 * pop directional isolate */
397 if ((wc >= (wchar_t) 0x2066) && (wc <= (wchar_t) 0x2069))
398 return true;
399
400 /* left-to-right embedding, right-to-left embedding, pop directional formatting,
401 * left-to-right override, right-to-left override */
402 if ((wc >= (wchar_t) 0x202a) && (wc <= (wchar_t) 0x202e))
403 return true;
404
405 return false;
406}
+ Here is the caller graph for this function:

◆ mutt_mb_filter_unprintable()

int mutt_mb_filter_unprintable ( char **  s)

Replace unprintable characters.

Parameters
[in,out]sString to modify
Return values
0Success
-1Error

Unprintable characters will be replaced with ReplacementChar.

Note
The source string will be freed and a newly allocated string will be returned in its place. The caller should free the returned string.

Definition at line 419 of file mbyte.c.

420{
421 if (!s || !*s)
422 return -1;
423
424 wchar_t wc = 0;
425 size_t k, k2;
426 char scratch[MB_LEN_MAX + 1];
427 char *p = *s;
428 mbstate_t mbstate1 = { 0 };
429 mbstate_t mbstate2 = { 0 };
430
431 struct Buffer buf = buf_make(0);
432 for (; (k = mbrtowc(&wc, p, MB_LEN_MAX, &mbstate1)); p += k)
433 {
434 if ((k == ICONV_ILLEGAL_SEQ) || (k == ICONV_BUF_TOO_SMALL))
435 {
436 k = 1;
437 memset(&mbstate1, 0, sizeof(mbstate1));
438 wc = ReplacementChar;
439 }
440 if (!IsWPrint(wc))
441 wc = '?';
443 continue;
444 k2 = wcrtomb(scratch, wc, &mbstate2);
445 scratch[k2] = '\0';
446 buf_addstr(&buf, scratch);
447 }
448 FREE(s);
449 *s = buf.data ? buf.data : mutt_mem_calloc(1, 1);
450 return 0;
451}
struct Buffer buf_make(size_t size)
Make a new buffer on the stack.
Definition: buffer.c:70
size_t buf_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:238
bool mutt_mb_is_display_corrupting_utf8(wchar_t wc)
Will this character corrupt the display?
Definition: mbyte.c:385
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
Definition: memory.c:50
#define FREE(x)
Definition: memory.h:45
bool CharsetIsUtf8
Is the user's current character set utf-8?
Definition: charset.c:63
String manipulation buffer.
Definition: buffer.h:34
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Variable Documentation

◆ OptLocales

bool OptLocales

(pseudo) set if user has valid locale definition

Definition at line 43 of file mbyte.c.