krb5 commit: Modernize UTF-8/UCS-2 conversion code
Greg Hudson
ghudson at mit.edu
Mon Apr 17 14:58:31 EDT 2017
https://github.com/krb5/krb5/commit/c4e8d444632140ecb47f31df133c0657f07f9be0
commit c4e8d444632140ecb47f31df133c0657f07f9be0
Author: Robbie Harwood <rharwood at redhat.com>
Date: Thu Apr 6 12:15:39 2017 -0400
Modernize UTF-8/UCS-2 conversion code
Remove unused entry points as we only need to convert between
little-endian UCS-2 byte buffers and UTF-8. Rename and simplify the
remaining two function contracts. Avoid pointer alignment and
endianness issues by operating on byte buffers and using store_16_le()
and load_16_le(). Avoid two-pass operation using k5buf.
[ghudson at mit.edu: simplified code using k5buf; simplified function
names and contracts; rewrote commit message]
src/include/k5-utf8.h | 57 +---
src/lib/crypto/krb/s2k_rc4.c | 2 +-
src/lib/krb5/krb/pac.c | 3 +-
src/lib/krb5/krb/pac_sign.c | 5 +-
src/util/support/libkrb5support-fixed.exports | 5 +-
src/util/support/utf8_conv.c | 411 +++----------------------
6 files changed, 71 insertions(+), 412 deletions(-)
diff --git a/src/include/k5-utf8.h b/src/include/k5-utf8.h
index 22f433c..4b7415e 100644
--- a/src/include/k5-utf8.h
+++ b/src/include/k5-utf8.h
@@ -81,49 +81,22 @@ size_t krb5int_ucs2_to_utf8(krb5_ucs2 c, char *buf);
int krb5int_utf8_to_ucs4(const char *p, krb5_ucs4 *out);
size_t krb5int_ucs4_to_utf8(krb5_ucs4 c, char *buf);
-int
-krb5int_ucs2s_to_utf8s(const krb5_ucs2 *ucs2s,
- char **utf8s,
- size_t *utf8slen);
-
-int
-krb5int_ucs2cs_to_utf8s(const krb5_ucs2 *ucs2s,
- size_t ucs2slen,
- char **utf8s,
- size_t *utf8slen);
-
-int
-krb5int_ucs2les_to_utf8s(const unsigned char *ucs2les,
- char **utf8s,
- size_t *utf8slen);
-
-int
-krb5int_ucs2lecs_to_utf8s(const unsigned char *ucs2les,
- size_t ucs2leslen,
- char **utf8s,
- size_t *utf8slen);
-
-int
-krb5int_utf8s_to_ucs2s(const char *utf8s,
- krb5_ucs2 **ucs2s,
- size_t *ucs2chars);
-
-int
-krb5int_utf8cs_to_ucs2s(const char *utf8s,
- size_t utf8slen,
- krb5_ucs2 **ucs2s,
- size_t *ucs2chars);
-
-int
-krb5int_utf8s_to_ucs2les(const char *utf8s,
- unsigned char **ucs2les,
- size_t *ucs2leslen);
-
-int
-krb5int_utf8cs_to_ucs2les(const char *utf8s,
- size_t utf8slen,
- unsigned char **ucs2les,
- size_t *ucs2leslen);
+/*
+ * Convert a little-endian UCS-2 string to an allocated null-terminated UTF-8
+ * string. nbytes is the length of ucs2bytes in bytes, and must be an even
+ * number. Return EINVAL on invalid input, ENOMEM on out of memory, or 0 on
+ * success.
+ */
+int k5_ucs2le_to_utf8(const uint8_t *ucs2bytes, size_t nbytes,
+ char **utf8_out);
+
+/*
+ * Convert a UTF-8 string to an allocated little-endian UCS-2 string. The
+ * resulting length is in bytes and will always be even. Return EINVAL on
+ * invalid input, ENOMEM on out of memory, or 0 on success.
+ */
+int k5_utf8_to_ucs2le(const char *utf8, uint8_t **ucs2_out,
+ size_t *nbytes_out);
/* returns the number of bytes in the UTF-8 string */
size_t krb5int_utf8_bytes(const char *);
diff --git a/src/lib/crypto/krb/s2k_rc4.c b/src/lib/crypto/krb/s2k_rc4.c
index 7286637..fb41b26 100644
--- a/src/lib/crypto/krb/s2k_rc4.c
+++ b/src/lib/crypto/krb/s2k_rc4.c
@@ -24,7 +24,7 @@ krb5int_arcfour_string_to_key(const struct krb5_keytypes *ktp,
utf8 = k5memdup0(string->data, string->length, &err);
if (utf8 == NULL)
return err;
- err = krb5int_utf8s_to_ucs2les(utf8, ©str, ©strlen);
+ err = k5_utf8_to_ucs2le(utf8, ©str, ©strlen);
free(utf8);
if (err)
return err;
diff --git a/src/lib/krb5/krb/pac.c b/src/lib/krb5/krb/pac.c
index 9098927..6616dd5 100644
--- a/src/lib/krb5/krb/pac.c
+++ b/src/lib/krb5/krb/pac.c
@@ -436,8 +436,7 @@ k5_pac_validate_client(krb5_context context,
pac_princname_length % 2)
return ERANGE;
- ret = krb5int_ucs2lecs_to_utf8s(p, (size_t)pac_princname_length / 2,
- &pac_princname, NULL);
+ ret = k5_ucs2le_to_utf8(p, pac_princname_length, &pac_princname);
if (ret != 0)
return ret;
diff --git a/src/lib/krb5/krb/pac_sign.c b/src/lib/krb5/krb/pac_sign.c
index d40df45..c6eee76 100644
--- a/src/lib/krb5/krb/pac_sign.c
+++ b/src/lib/krb5/krb/pac_sign.c
@@ -54,9 +54,8 @@ k5_insert_client_info(krb5_context context,
if (ret != 0)
goto cleanup;
- ret = krb5int_utf8s_to_ucs2les(princ_name_utf8,
- &princ_name_ucs2,
- &princ_name_ucs2_len);
+ ret = k5_utf8_to_ucs2le(princ_name_utf8, &princ_name_ucs2,
+ &princ_name_ucs2_len);
if (ret != 0)
goto cleanup;
diff --git a/src/util/support/libkrb5support-fixed.exports b/src/util/support/libkrb5support-fixed.exports
index d5d4177..750dc24 100644
--- a/src/util/support/libkrb5support-fixed.exports
+++ b/src/util/support/libkrb5support-fixed.exports
@@ -52,6 +52,8 @@ k5_path_isabs
k5_path_join
k5_path_split
k5_strerror_r
+k5_utf8_to_ucs2le
+k5_ucs2le_to_utf8
krb5int_key_register
krb5int_key_delete
krb5int_getspecific
@@ -77,9 +79,6 @@ krb5int_mutex_free
krb5int_mutex_lock
krb5int_mutex_unlock
krb5int_gmt_mktime
-krb5int_utf8cs_to_ucs2les
-krb5int_utf8s_to_ucs2les
-krb5int_ucs2lecs_to_utf8s
krb5int_ucs4_to_utf8
krb5int_utf8_to_ucs4
krb5int_utf8_lentab
diff --git a/src/util/support/utf8_conv.c b/src/util/support/utf8_conv.c
index 80ca90b..5f279c3 100644
--- a/src/util/support/utf8_conv.c
+++ b/src/util/support/utf8_conv.c
@@ -61,397 +61,86 @@
#include "k5-platform.h"
#include "k5-utf8.h"
+#include "k5-buf.h"
#include "supp-int.h"
static unsigned char mask[] = { 0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
-static ssize_t
-k5_utf8s_to_ucs2s(krb5_ucs2 *ucs2str,
- const char *utf8str,
- size_t count,
- int little_endian)
+int
+k5_utf8_to_ucs2le(const char *utf8, uint8_t **ucs2_out, size_t *nbytes_out)
{
- size_t ucs2len = 0;
- size_t utflen, i;
+ struct k5buf buf;
krb5_ucs2 ch;
+ size_t chlen, i;
+ void *p;
- /* If input ptr is NULL or empty... */
- if (utf8str == NULL || *utf8str == '\0') {
- if (ucs2str != NULL)
- *ucs2str = 0;
-
- return 0;
- }
+ *ucs2_out = NULL;
+ *nbytes_out = 0;
- /* Examine next UTF-8 character. */
- while (ucs2len < count && *utf8str != '\0') {
- /* Get UTF-8 sequence length from 1st byte */
- utflen = KRB5_UTF8_CHARLEN2(utf8str, utflen);
+ k5_buf_init_dynamic(&buf);
- if (utflen == 0 || utflen > KRB5_MAX_UTF8_LEN)
- return -1;
+ /* Examine next UTF-8 character. */
+ while (*utf8 != '\0') {
+ /* Get UTF-8 sequence length from first byte. */
+ chlen = KRB5_UTF8_CHARLEN2(utf8, chlen);
+ if (chlen == 0 || chlen > KRB5_MAX_UTF8_LEN)
+ goto invalid;
/* First byte minus length tag */
- ch = (krb5_ucs2)(utf8str[0] & mask[utflen]);
+ ch = (krb5_ucs2)(utf8[0] & mask[chlen]);
- for (i = 1; i < utflen; i++) {
- /* Subsequent bytes must start with 10 */
- if ((utf8str[i] & 0xc0) != 0x80)
- return -1;
+ for (i = 1; i < chlen; i++) {
+ /* Subsequent bytes must start with 10. */
+ if ((utf8[i] & 0xc0) != 0x80)
+ goto invalid;
- ch <<= 6; /* 6 bits of data in each subsequent byte */
- ch |= (krb5_ucs2)(utf8str[i] & 0x3f);
+ /* 6 bits of data in each subsequent byte */
+ ch <<= 6;
+ ch |= (krb5_ucs2)(utf8[i] & 0x3f);
}
- if (ucs2str != NULL) {
-#ifdef K5_BE
-#ifndef SWAP16
-#define SWAP16(X) ((((X) << 8) | ((X) >> 8)) & 0xFFFF)
-#endif
- if (little_endian)
- ucs2str[ucs2len] = SWAP16(ch);
- else
-#endif
- ucs2str[ucs2len] = ch;
- }
+ p = k5_buf_get_space(&buf, 2);
+ if (p == NULL)
+ return ENOMEM;
+ store_16_le(ch, p);
- utf8str += utflen; /* Move to next UTF-8 character */
- ucs2len++; /* Count number of wide chars stored/required */
- }
-
- if (ucs2str != NULL && ucs2len < count) {
- /* Add null terminator if there's room in the buffer. */
- ucs2str[ucs2len] = 0;
- }
-
- return ucs2len;
-}
-
-int
-krb5int_utf8s_to_ucs2s(const char *utf8s,
- krb5_ucs2 **ucs2s,
- size_t *ucs2chars)
-{
- ssize_t len;
- size_t chars;
-
- chars = krb5int_utf8_chars(utf8s);
- *ucs2s = (krb5_ucs2 *)malloc((chars + 1) * sizeof(krb5_ucs2));
- if (*ucs2s == NULL) {
- return ENOMEM;
- }
-
- len = k5_utf8s_to_ucs2s(*ucs2s, utf8s, chars + 1, 0);
- if (len < 0) {
- free(*ucs2s);
- *ucs2s = NULL;
- return EINVAL;
- }
-
- if (ucs2chars != NULL) {
- *ucs2chars = chars;
+ /* Move to next UTF-8 character. */
+ utf8 += chlen;
}
+ *ucs2_out = buf.data;
+ *nbytes_out = buf.len;
return 0;
-}
-
-int
-krb5int_utf8cs_to_ucs2s(const char *utf8s,
- size_t utf8slen,
- krb5_ucs2 **ucs2s,
- size_t *ucs2chars)
-{
- ssize_t len;
- size_t chars;
-
- chars = krb5int_utf8c_chars(utf8s, utf8slen);
- *ucs2s = (krb5_ucs2 *)malloc((chars + 1) * sizeof(krb5_ucs2));
- if (*ucs2s == NULL) {
- return ENOMEM;
- }
-
- len = k5_utf8s_to_ucs2s(*ucs2s, utf8s, chars, 0);
- if (len < 0) {
- free(*ucs2s);
- *ucs2s = NULL;
- return EINVAL;
- }
- (*ucs2s)[chars] = 0;
-
- if (ucs2chars != NULL) {
- *ucs2chars = chars;
- }
- return 0;
+invalid:
+ k5_buf_free(&buf);
+ return EINVAL;
}
int
-krb5int_utf8s_to_ucs2les(const char *utf8s,
- unsigned char **ucs2les,
- size_t *ucs2leslen)
+k5_ucs2le_to_utf8(const uint8_t *ucs2bytes, size_t nbytes, char **utf8_out)
{
- ssize_t len;
- size_t chars;
-
- chars = krb5int_utf8_chars(utf8s);
-
- *ucs2les = (unsigned char *)malloc((chars + 1) * sizeof(krb5_ucs2));
- if (*ucs2les == NULL) {
- return ENOMEM;
- }
-
- len = k5_utf8s_to_ucs2s((krb5_ucs2 *)*ucs2les, utf8s, chars + 1, 1);
- if (len < 0) {
- free(*ucs2les);
- *ucs2les = NULL;
- return EINVAL;
- }
-
- if (ucs2leslen != NULL) {
- *ucs2leslen = chars * sizeof(krb5_ucs2);
- }
-
- return 0;
-}
-
-int
-krb5int_utf8cs_to_ucs2les(const char *utf8s,
- size_t utf8slen,
- unsigned char **ucs2les,
- size_t *ucs2leslen)
-{
- ssize_t len;
- size_t chars;
- krb5_ucs2 *ucs2s;
-
- *ucs2les = NULL;
-
- chars = krb5int_utf8c_chars(utf8s, utf8slen);
- ucs2s = malloc((chars + 1) * sizeof(krb5_ucs2));
- if (ucs2s == NULL)
- return ENOMEM;
-
- len = k5_utf8s_to_ucs2s(ucs2s, utf8s, chars, 1);
- if (len < 0) {
- free(ucs2s);
- return EINVAL;
- }
- ucs2s[chars] = 0;
-
- *ucs2les = (unsigned char *)ucs2s;
- if (ucs2leslen != NULL) {
- *ucs2leslen = chars * sizeof(krb5_ucs2);
- }
-
- return 0;
-}
-
-/*-----------------------------------------------------------------------------
- Convert a wide char string to a UTF-8 string.
- No more than 'count' bytes will be written to the output buffer.
- Return the # of bytes written to the output buffer, excl null terminator.
-
- ucs2len is -1 if the UCS-2 string is NUL terminated, otherwise it is the
- length of the UCS-2 string in characters
-*/
-static ssize_t
-k5_ucs2s_to_utf8s(char *utf8str, const krb5_ucs2 *ucs2str,
- size_t count, ssize_t ucs2len, int little_endian)
-{
- int len = 0;
- int n;
- char *p = utf8str;
- krb5_ucs2 empty = 0, ch;
-
- if (ucs2str == NULL) /* Treat input ptr NULL as an empty string */
- ucs2str = ∅
-
- if (utf8str == NULL) /* Just compute size of output, excl null */
- {
- while (ucs2len == -1 ? *ucs2str : --ucs2len >= 0) {
- /* Get UTF-8 size of next wide char */
- ch = *ucs2str++;
-#ifdef K5_BE
- if (little_endian)
- ch = SWAP16(ch);
-#endif
-
- n = krb5int_ucs2_to_utf8(ch, NULL);
- if (n < 1 || n > INT_MAX - len)
- return -1;
- len += n;
- }
-
- return len;
- }
-
- /* Do the actual conversion. */
-
- n = 1; /* In case of empty ucs2str */
- while (ucs2len == -1 ? *ucs2str != 0 : --ucs2len >= 0) {
- ch = *ucs2str++;
-#ifdef K5_BE
- if (little_endian)
- ch = SWAP16(ch);
-#endif
-
- n = krb5int_ucs2_to_utf8(ch, p);
-
- if (n < 1)
- break;
-
- p += n;
- count -= n; /* Space left in output buffer */
- }
-
- /* If not enough room for last character, pad remainder with null
- so that return value = original count, indicating buffer full. */
- if (n == 0) {
- while (count--)
- *p++ = 0;
- }
- /* Add a null terminator if there's room. */
- else if (count)
- *p = 0;
-
- if (n == -1) /* Conversion encountered invalid wide char. */
- return -1;
-
- /* Return the number of bytes written to output buffer, excl null. */
- return (p - utf8str);
-}
-
-int
-krb5int_ucs2s_to_utf8s(const krb5_ucs2 *ucs2s,
- char **utf8s,
- size_t *utf8slen)
-{
- ssize_t len;
-
- len = k5_ucs2s_to_utf8s(NULL, ucs2s, 0, -1, 0);
- if (len < 0) {
- return EINVAL;
- }
-
- *utf8s = (char *)malloc((size_t)len + 1);
- if (*utf8s == NULL) {
- return ENOMEM;
- }
-
- len = k5_ucs2s_to_utf8s(*utf8s, ucs2s, (size_t)len + 1, -1, 0);
- if (len < 0) {
- free(*utf8s);
- *utf8s = NULL;
- return EINVAL;
- }
-
- if (utf8slen != NULL) {
- *utf8slen = len;
- }
-
- return 0;
-}
-
-int
-krb5int_ucs2les_to_utf8s(const unsigned char *ucs2les,
- char **utf8s,
- size_t *utf8slen)
-{
- ssize_t len;
-
- len = k5_ucs2s_to_utf8s(NULL, (krb5_ucs2 *)ucs2les, 0, -1, 1);
- if (len < 0)
- return EINVAL;
-
- *utf8s = (char *)malloc((size_t)len + 1);
- if (*utf8s == NULL) {
- return ENOMEM;
- }
-
- len = k5_ucs2s_to_utf8s(*utf8s, (krb5_ucs2 *)ucs2les, (size_t)len + 1, -1, 1);
- if (len < 0) {
- free(*utf8s);
- *utf8s = NULL;
- return EINVAL;
- }
-
- if (utf8slen != NULL) {
- *utf8slen = len;
- }
-
- return 0;
-}
-
-int
-krb5int_ucs2cs_to_utf8s(const krb5_ucs2 *ucs2s,
- size_t ucs2slen,
- char **utf8s,
- size_t *utf8slen)
-{
- ssize_t len;
-
- if (ucs2slen > SSIZE_MAX)
- return ERANGE;
-
- len = k5_ucs2s_to_utf8s(NULL, (krb5_ucs2 *)ucs2s, 0,
- (ssize_t)ucs2slen, 0);
- if (len < 0)
- return EINVAL;
-
- *utf8s = (char *)malloc((size_t)len + 1);
- if (*utf8s == NULL) {
- return ENOMEM;
- }
-
- len = k5_ucs2s_to_utf8s(*utf8s, (krb5_ucs2 *)ucs2s, (size_t)len,
- (ssize_t)ucs2slen, 0);
- if (len < 0) {
- free(*utf8s);
- *utf8s = NULL;
- return EINVAL;
- }
- (*utf8s)[len] = '\0';
-
- if (utf8slen != NULL) {
- *utf8slen = len;
- }
-
- return 0;
-}
-
-int
-krb5int_ucs2lecs_to_utf8s(const unsigned char *ucs2les,
- size_t ucs2leslen,
- char **utf8s,
- size_t *utf8slen)
-{
- ssize_t len;
+ struct k5buf buf;
+ krb5_ucs2 ch;
+ size_t chlen, i;
+ void *p;
- if (ucs2leslen > SSIZE_MAX)
- return ERANGE;
+ *utf8_out = NULL;
- len = k5_ucs2s_to_utf8s(NULL, (krb5_ucs2 *)ucs2les, 0,
- (ssize_t)ucs2leslen, 1);
- if (len < 0)
+ if (nbytes % 2 != 0)
return EINVAL;
- *utf8s = (char *)malloc((size_t)len + 1);
- if (*utf8s == NULL) {
- return ENOMEM;
- }
-
- len = k5_ucs2s_to_utf8s(*utf8s, (krb5_ucs2 *)ucs2les, (size_t)len,
- (ssize_t)ucs2leslen, 1);
- if (len < 0) {
- free(*utf8s);
- *utf8s = NULL;
- return EINVAL;
- }
- (*utf8s)[len] = '\0';
+ k5_buf_init_dynamic(&buf);
- if (utf8slen != NULL) {
- *utf8slen = len;
+ for (i = 0; i < nbytes; i += 2) {
+ ch = load_16_le(&ucs2bytes[i]);
+ chlen = krb5int_ucs2_to_utf8(ch, NULL);
+ p = k5_buf_get_space(&buf, chlen);
+ if (p == NULL)
+ return ENOMEM;
+ (void)krb5int_ucs2_to_utf8(ch, p);
}
+ *utf8_out = buf.data;
return 0;
}
More information about the cvs-krb5
mailing list