code/utf7_8c_source.html

#include "config.h"

#include <stdbool.h>

#include <string.h>

#include "private.h"

#include "mutt/lib.h"

#include "core/lib.h"


static const int Index64u[128] = {

  // clang-format off

  -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,

  -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,

  -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, 63,-1,-1,-1,

  52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,

  -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,

  15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,

  -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,

  41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1

  // clang-format on

};


static const char B64Chars[64] = {

  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',

  'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',

  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',

  'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',

  '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', ',',

};


static char *utf7_to_utf8(const char *u7, size_t u7len, char **u8, size_t *u8len)

{

  int b, ch, k;


  char *buf = MUTT_MEM_MALLOC(u7len + u7len / 8 + 1, char);

  char *p = buf;

  int pair1 = 0;


  for (; u7len; u7++, u7len--)

  {

    if (*u7 == '&')

    {

      u7++;

      u7len--;


      if (u7len && (*u7 == '-'))

      {

        *p++ = '&';

        continue;

      }


      ch = 0;

      k = 10;

      for (; u7len; u7++, u7len--)

      {

        if ((*u7 & 0x80) || ((b = Index64u[(int) *u7]) == -1))

          break;

        if (k > 0)

        {

          ch |= b << k;

          k -= 6;

        }

        else

        {

          ch |= b >> (-k);

          if (ch < 0x80)

          {

            if ((0x20 <= ch) && (ch < 0x7f))

            {

              /* Printable US-ASCII */

              goto bail;

            }

            *p++ = ch;

          }

          else if (ch < 0x800)

          {

            *p++ = 0xc0 | (ch >> 6);

            *p++ = 0x80 | (ch & 0x3f);

          }

          else

          {

            /* High surrogate pair */

            if ((ch & ~0x3ff) == 0xd800)

            {

              if (pair1)

                goto bail;

              pair1 = ch;

            }

            else

            {

              /* Low surrogate pair */

              if ((ch & ~0x3ff) == 0xdc00)

              {

                if (!pair1)

                  goto bail;


                ch = ((pair1 - 0xd800) << 10) + (ch - 0xdc00) + 0x10000;

                pair1 = 0;

              }

              if (pair1)

                goto bail;


              if (ch < 0x10000)

              {

                *p++ = 0xe0 | (ch >> 12);

                *p++ = 0x80 | ((ch >> 6) & 0x3f);

                *p++ = 0x80 | (ch & 0x3f);

              }

              else

              {

                *p++ = 0xf0 | (ch >> 18);

                *p++ = 0x80 | ((ch >> 12) & 0x3f);

                *p++ = 0x80 | ((ch >> 6) & 0x3f);

                *p++ = 0x80 | (ch & 0x3f);

              }

            }

          }


          ch = (b << (16 + k)) & 0xffff;

          k += 10;

        }

      }

      if (ch || (k < 6))

      {

        /* Non-zero or too many extra bits */

        goto bail;

      }

      if (!u7len || (*u7 != '-'))

      {

        /* BASE64 not properly terminated */

        goto bail;

      }

      if ((u7len > 2) && (u7[1] == '&') && (u7[2] != '-'))

      {

        /* Adjacent BASE64 sections */

        goto bail;

      }

    }

    else if ((*u7 < 0x20) || (*u7 >= 0x7f))

    {

      /* Not printable US-ASCII */

      goto bail;

    }

    else

    {

      *p++ = *u7;

    }

  }

  *p++ = '\0';

  if (u8len)

    *u8len = p - buf;


  MUTT_MEM_REALLOC(&buf, p - buf, char);

  if (u8)

    *u8 = buf;

  return buf;


bail:

  FREE(&buf);

  return NULL;

}


static char *utf8_to_utf7(const char *u8, size_t u8len, char **u7, size_t *u7len)

{

  int ch;

  int n, b = 0, k = 0;

  bool base64 = false;


  /* In the worst case we convert 2 chars to 7 chars. For example:

   * "\x10&\x10&..." -> "&ABA-&-&ABA-&-...".  */

  char *buf = MUTT_MEM_MALLOC((u8len / 2) * 7 + 6, char);

  char *p = buf;


  while (u8len)

  {

    unsigned char c = *u8;


    if (c < 0x80)

    {

      ch = c;

      n = 0;

    }

    else if (c < 0xc2)

    {

      goto bail;

    }

    else if (c < 0xe0)

    {

      ch = c & 0x1f;

      n = 1;

    }

    else if (c < 0xf0)

    {

      ch = c & 0x0f;

      n = 2;

    }

    else if (c < 0xf8)

    {

      ch = c & 0x07;

      n = 3;

    }

    else if (c < 0xfc)

    {

      ch = c & 0x03;

      n = 4;

    }

    else if (c < 0xfe)

    {

      ch = c & 0x01;

      n = 5;

    }

    else

    {

      goto bail;

    }


    u8++;

    u8len--;

    if (n > u8len)

      goto bail;

    for (int i = 0; i < n; i++)

    {

      if ((u8[i] & 0xc0) != 0x80)

        goto bail;

      ch = (ch << 6) | (u8[i] & 0x3f);

    }

    if ((n > 1) && !(ch >> (n * 5 + 1)))

      goto bail;

    u8 += n;

    u8len -= n;


    if ((ch < 0x20) || (ch >= 0x7f))

    {

      if (!base64)

      {

        *p++ = '&';

        base64 = true;

        b = 0;

        k = 10;

      }


      // For code points >= 0x10000 we need to use a UTF-16 surrogate pair

      if (ch & ~0xffff)

      {

        ch -= 0x10000;

        int pair1 = 0xd800 + (ch >> 10);

        int pair2 = 0xdc00 + (ch & 0x3ff);


        /* Output the high surrogate */

        *p++ = B64Chars[b | pair1 >> k];

        k -= 6;

        for (; k >= 0; k -= 6)

          *p++ = B64Chars[(pair1 >> k) & 0x3f];

        b = (pair1 << (-k)) & 0x3f;

        k += 16;


        /* The low surrogate will be output just below */

        ch = pair2;

      }


      *p++ = B64Chars[b | ch >> k];

      k -= 6;

      for (; k >= 0; k -= 6)

        *p++ = B64Chars[(ch >> k) & 0x3f];

      b = (ch << (-k)) & 0x3f;

      k += 16;

    }

    else

    {

      if (base64)

      {

        if (k > 10)

          *p++ = B64Chars[b];

        *p++ = '-';

        base64 = false;

      }

      *p++ = ch;

      if (ch == '&')

        *p++ = '-';

    }

  }


  if (base64)

  {

    if (k > 10)

      *p++ = B64Chars[b];

    *p++ = '-';

  }


  *p++ = '\0';

  if (u7len)

    *u7len = p - buf;

  MUTT_MEM_REALLOC(&buf, p - buf, char);

  if (u7)

    *u7 = buf;

  return buf;


bail:

  FREE(&buf);

  return NULL;

}


void imap_utf_encode(bool unicode, char **s)

{

  if (!s || !*s)

    return;


  const char *c_charset = cc_charset();

  if (!c_charset)

    return;


  if (unicode && mutt_ch_is_utf8(c_charset))

  {

    return;

  }


  if (mutt_ch_convert_string(s, c_charset, "utf-8", MUTT_ICONV_NO_FLAGS) != 0)

  {

    FREE(s);

    return;

  }


  if (!unicode)

  {

    char *utf7 = utf8_to_utf7(*s, strlen(*s), NULL, 0);

    FREE(s);

    *s = utf7;

  }

}


void imap_utf_decode(bool unicode, char **s)

{

  if (!s || !*s)

    return;


  const char *c_charset = cc_charset();

  if (!c_charset)

    return;


  if (unicode && mutt_ch_is_utf8(c_charset))

  {

    return;

  }


  if (!unicode)

  {

    char *utf8 = utf7_to_utf8(*s, strlen(*s), 0, 0);

    FREE(s);

    *s = utf8;

  }


  if (mutt_ch_convert_string(s, "utf-8", c_charset, MUTT_ICONV_NO_FLAGS) != 0)

  {

    FREE(s);

  }

}

cc_charset
const char * cc_charset(void)
Get the cached value of $charset.
Definition: config_cache.c:116

lib.h
Convenience wrapper for the core headers.

FREE
#define FREE(x)
Definition: memory.h:55

MUTT_MEM_REALLOC
#define MUTT_MEM_REALLOC(pptr, n, type)
Definition: memory.h:43

MUTT_MEM_MALLOC
#define MUTT_MEM_MALLOC(n, type)
Definition: memory.h:41

mutt_ch_convert_string
int mutt_ch_convert_string(char **ps, const char *from, const char *to, uint8_t flags)
Convert a string between encodings.
Definition: charset.c:831

mutt_ch_is_utf8
#define mutt_ch_is_utf8(str)
Definition: charset.h:89

MUTT_ICONV_NO_FLAGS
#define MUTT_ICONV_NO_FLAGS
No flags are set.
Definition: charset.h:64

lib.h
Convenience wrapper for the library headers.

private.h
GUI display the mailboxes in a side panel.

imap_utf_encode
void imap_utf_encode(bool unicode, char **s)
Encode email from local charset to UTF-8.
Definition: utf7.c:397

utf8_to_utf7
static char * utf8_to_utf7(const char *u8, size_t u8len, char **u7, size_t *u7len)
Convert data from UTF-8 to RFC2060's UTF-7.
Definition: utf7.c:252

imap_utf_decode
void imap_utf_decode(bool unicode, char **s)
Decode email from UTF-8 to local charset.
Definition: utf7.c:430

B64Chars
static const char B64Chars[64]
Characters of the Base64 encoding.
Definition: utf7.c:82

utf7_to_utf8
static char * utf7_to_utf8(const char *u7, size_t u7len, char **u8, size_t *u8len)
Convert data from RFC2060's UTF-7 to UTF-8.
Definition: utf7.c:106

Index64u
static const int Index64u[128]
Lookup table for Base64 encoding/decoding.
Definition: utf7.c:66