00001
00002
00003
00004
00005
00006
00007
00008
00009 static int
00010 utf8_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
00011 {
00012 unsigned char c = s[0];
00013
00014 if (c < 0x80) {
00015 *pwc = c;
00016 return 1;
00017 } else if (c < 0xc2) {
00018 return RET_ILSEQ;
00019 } else if (c < 0xe0) {
00020 if (n < 2)
00021 return RET_TOOFEW(0);
00022 if (!((s[1] ^ 0x80) < 0x40))
00023 return RET_ILSEQ;
00024 *pwc = ((ucs4_t) (c & 0x1f) << 6)
00025 | (ucs4_t) (s[1] ^ 0x80);
00026 return 2;
00027 } else if (c < 0xf0) {
00028 if (n < 3)
00029 return RET_TOOFEW(0);
00030 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
00031 && (c >= 0xe1 || s[1] >= 0xa0)))
00032 return RET_ILSEQ;
00033 *pwc = ((ucs4_t) (c & 0x0f) << 12)
00034 | ((ucs4_t) (s[1] ^ 0x80) << 6)
00035 | (ucs4_t) (s[2] ^ 0x80);
00036 return 3;
00037 } else if (c < 0xf8) {
00038 if (n < 4)
00039 return RET_TOOFEW(0);
00040 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
00041 && (s[3] ^ 0x80) < 0x40
00042 && (c >= 0xf1 || s[1] >= 0x90)))
00043 return RET_ILSEQ;
00044 *pwc = ((ucs4_t) (c & 0x07) << 18)
00045 | ((ucs4_t) (s[1] ^ 0x80) << 12)
00046 | ((ucs4_t) (s[2] ^ 0x80) << 6)
00047 | (ucs4_t) (s[3] ^ 0x80);
00048 return 4;
00049 } else if (c < 0xfc) {
00050 if (n < 5)
00051 return RET_TOOFEW(0);
00052 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
00053 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
00054 && (c >= 0xf9 || s[1] >= 0x88)))
00055 return RET_ILSEQ;
00056 *pwc = ((ucs4_t) (c & 0x03) << 24)
00057 | ((ucs4_t) (s[1] ^ 0x80) << 18)
00058 | ((ucs4_t) (s[2] ^ 0x80) << 12)
00059 | ((ucs4_t) (s[3] ^ 0x80) << 6)
00060 | (ucs4_t) (s[4] ^ 0x80);
00061 return 5;
00062 } else if (c < 0xfe) {
00063 if (n < 6)
00064 return RET_TOOFEW(0);
00065 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
00066 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
00067 && (s[5] ^ 0x80) < 0x40
00068 && (c >= 0xfd || s[1] >= 0x84)))
00069 return RET_ILSEQ;
00070 *pwc = ((ucs4_t) (c & 0x01) << 30)
00071 | ((ucs4_t) (s[1] ^ 0x80) << 24)
00072 | ((ucs4_t) (s[2] ^ 0x80) << 18)
00073 | ((ucs4_t) (s[3] ^ 0x80) << 12)
00074 | ((ucs4_t) (s[4] ^ 0x80) << 6)
00075 | (ucs4_t) (s[5] ^ 0x80);
00076 return 6;
00077 } else
00078 return RET_ILSEQ;
00079 }
00080
00081 static int
00082 utf8_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
00083 {
00084 int count;
00085 if (wc < 0x80)
00086 count = 1;
00087 else if (wc < 0x800)
00088 count = 2;
00089 else if (wc < 0x10000)
00090 count = 3;
00091 else if (wc < 0x200000)
00092 count = 4;
00093 else if (wc < 0x4000000)
00094 count = 5;
00095 else if (wc <= 0x7fffffff)
00096 count = 6;
00097 else
00098 return RET_ILSEQ;
00099 if (n < count)
00100 return RET_TOOSMALL;
00101 switch (count) {
00102 case 6: r[5] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x4000000;
00103 case 5: r[4] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x200000;
00104 case 4: r[3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000;
00105 case 3: r[2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800;
00106 case 2: r[1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0;
00107 case 1: r[0] = wc;
00108 }
00109 return count;
00110 }