svn rev #22350: trunk/src/ include/ lib/krb5/unicode/ util/support/

ghudson@MIT.EDU ghudson at MIT.EDU
Thu May 14 12:16:33 EDT 2009


http://src.mit.edu/fisheye/changelog/krb5/?cs=22350
Commit By: ghudson
Log Message:
ticket: 6489
subject: UCS2 support doesn't handle upper half of BMP
tags: pullup
target_version: 1.7

Make krb5_ucs2 an unsigned type.  Eliminate the need for distinguished
values for ucs2 and ucs4 characters by changing the API of the single-
character conversion routines.



Changed Files:
U   trunk/src/include/k5-utf8.h
U   trunk/src/lib/krb5/unicode/ucstr.c
U   trunk/src/util/support/utf8.c
Modified: trunk/src/include/k5-utf8.h
===================================================================
--- trunk/src/include/k5-utf8.h	2009-05-14 01:18:43 UTC (rev 22349)
+++ trunk/src/include/k5-utf8.h	2009-05-14 16:16:32 UTC (rev 22350)
@@ -84,9 +84,9 @@
 #endif
 
 #if INT_MAX == 0x7fff
-typedef	int	krb5_ucs2;
+typedef	unsigned int	krb5_ucs2;
 #elif SHRT_MAX == 0x7fff
-typedef	short	krb5_ucs2;
+typedef	unsigned short	krb5_ucs2;
 #else
 #error undefined 16 bit type
 #endif
@@ -101,15 +101,12 @@
 #error: undefined 32 bit type
 #endif
 
-#define KRB5_UCS2_INVALID   ((krb5_ucs2)0x8000)
-#define KRB5_UCS4_INVALID   ((krb5_ucs4)0x80000000)
-
 #define KRB5_MAX_UTF8_LEN   (sizeof(krb5_ucs2) * 3/2)
 
-krb5_ucs2 krb5int_utf8_to_ucs2(const char *p);
+int krb5int_utf8_to_ucs2(const char *p, krb5_ucs2 *out);
 size_t krb5int_ucs2_to_utf8(krb5_ucs2 c, char *buf);
 
-krb5_ucs4 krb5int_utf8_to_ucs4(const char *p);
+int krb5int_utf8_to_ucs4(const char *p, krb5_ucs4 *out);
 size_t krb5int_ucs4_to_utf8(krb5_ucs4 c, char *buf);
 
 int

Modified: trunk/src/lib/krb5/unicode/ucstr.c
===================================================================
--- trunk/src/lib/krb5/unicode/ucstr.c	2009-05-14 01:18:43 UTC (rev 22349)
+++ trunk/src/lib/krb5/unicode/ucstr.c	2009-05-14 16:16:32 UTC (rev 22350)
@@ -397,8 +397,7 @@
 
     /* convert and normalize 1st string */
     for (i = 0, ulen = 0; i < l1; i += len, ulen++) {
-	ucs[ulen] = krb5int_utf8_to_ucs4(s1 + i);
-	if (ucs[ulen] == KRB5_UCS4_INVALID) {
+	if (krb5int_utf8_to_ucs4(s1 + i, &ucs[ulen]) == -1) {
 	    free(ucs);
 	    return -1;		/* what to do??? */
 	}
@@ -420,8 +419,7 @@
 
     /* convert and normalize 2nd string */
     for (i = 0, ulen = 0; i < l2; i += len, ulen++) {
-	ucs[ulen] = krb5int_utf8_to_ucs4(s2 + i);
-	if (ucs[ulen] == KRB5_UCS4_INVALID) {
+	if (krb5int_utf8_to_ucs4(s2 + i, &ucs[ulen]) == -1) {
 	    free(ucsout1);
 	    free(ucs);
 	    return 1;		/* what to do??? */

Modified: trunk/src/util/support/utf8.c
===================================================================
--- trunk/src/util/support/utf8.c	2009-05-14 01:18:43 UTC (rev 22349)
+++ trunk/src/util/support/utf8.c	2009-05-14 16:16:32 UTC (rev 22350)
@@ -159,7 +159,11 @@
     return i;
 }
 
-krb5_ucs4 krb5int_utf8_to_ucs4(const char *p)
+/*
+ * Convert a UTF8 character to a UCS4 character.  Return 0 on success,
+ * -1 on failure.
+ */
+int krb5int_utf8_to_ucs4(const char *p, krb5_ucs4 *out)
 {
     const unsigned char *c = (const unsigned char *) p;
     krb5_ucs4 ch;
@@ -167,33 +171,35 @@
     static unsigned char mask[] = {
 	0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
 
+    *out = 0;
     len = KRB5_UTF8_CHARLEN2(p, len);
 
     if (len == 0)
-	return KRB5_UCS4_INVALID;
+	return -1;
 
     ch = c[0] & mask[len];
 
     for (i = 1; i < len; i++) {
-	if ((c[i] & 0xc0) != 0x80) {
-	    return KRB5_UCS4_INVALID;
-	}
+	if ((c[i] & 0xc0) != 0x80)
+	    return -1;
 
 	ch <<= 6;
 	ch |= c[i] & 0x3f;
     }
 
-    return ch;
+    *out = ch;
+    return 0;
 }
 
-krb5_ucs2 krb5int_utf8_to_ucs2(const char *p)
+int krb5int_utf8_to_ucs2(const char *p, krb5_ucs2 *out)
 {
-    krb5_ucs4 ch = krb5int_utf8_to_ucs4(p);
+    krb5_ucs4 ch;
 
-    if (ch == KRB5_UCS4_INVALID || ch > SHRT_MAX)
-	return KRB5_UCS2_INVALID;
-
-    return (krb5_ucs2)ch;
+    *out = 0;
+    if (krb5int_utf8_to_ucs4(p, &ch) == -1 || ch > 0xFFFF)
+	return -1;
+    *out = (krb5_ucs2) ch;
+    return 0;
 }
 
 /* conv UCS-2 to UTF-8, not used */
@@ -446,10 +452,13 @@
 /* like strchr() */
 char *krb5int_utf8_strchr(const char *str, const char *chr)
 {
+    krb5_ucs4 chs, ch;
+
+    if (krb5int_utf8_to_ucs4(chr, &ch) == -1)
+	return NULL;
     for ( ; *str != '\0'; KRB5_UTF8_INCR(str)) {
-	if (krb5int_utf8_to_ucs4(str) == krb5int_utf8_to_ucs4(chr)) {
+	if (krb5int_utf8_to_ucs4(str, &chs) == 0 && chs == ch)
 	    return (char *)str;
-	} 
     }
 
     return NULL;
@@ -458,14 +467,14 @@
 /* like strcspn() but returns number of bytes, not characters */
 size_t krb5int_utf8_strcspn(const char *str, const char *set)
 {
-    const char *cstr;
-    const char *cset;
+    const char *cstr, *cset;
+    krb5_ucs4 chstr, chset;
 
     for (cstr = str; *cstr != '\0'; KRB5_UTF8_INCR(cstr)) {
 	for (cset = set; *cset != '\0'; KRB5_UTF8_INCR(cset)) {
-	    if (krb5int_utf8_to_ucs4(cstr) == krb5int_utf8_to_ucs4(cset)) {
+	    if (krb5int_utf8_to_ucs4(cstr, &chstr) == 0
+		&& krb5int_utf8_to_ucs4(cset, &chset) == 0 && chstr == chset)
 		return cstr - str;
-	    } 
 	}
     }
 
@@ -475,18 +484,16 @@
 /* like strspn() but returns number of bytes, not characters */
 size_t krb5int_utf8_strspn(const char *str, const char *set)
 {
-    const char *cstr;
-    const char *cset;
+    const char *cstr, *cset;
+    krb5_ucs4 chstr, chset;
 
     for (cstr = str; *cstr != '\0'; KRB5_UTF8_INCR(cstr)) {
 	for (cset = set; ; KRB5_UTF8_INCR(cset)) {
-	    if (*cset == '\0') {
+	    if (*cset == '\0')
 		return cstr - str;
-	    }
-
-	    if (krb5int_utf8_to_ucs4(cstr) == krb5int_utf8_to_ucs4(cset)) {
+	    if (krb5int_utf8_to_ucs4(cstr, &chstr) == 0
+		&& krb5int_utf8_to_ucs4(cset, &chset) == 0 && chstr == chset)
 		break;
-	    } 
 	}
     }
 
@@ -496,13 +503,14 @@
 /* like strpbrk(), replaces strchr() as well */
 char *krb5int_utf8_strpbrk(const char *str, const char *set)
 {
+    const char *cset;
+    krb5_ucs4 chstr, chset;
+
     for ( ; *str != '\0'; KRB5_UTF8_INCR(str)) {
-	const char *cset;
-
 	for (cset = set; *cset != '\0'; KRB5_UTF8_INCR(cset)) {
-	   if (krb5int_utf8_to_ucs4(str) == krb5int_utf8_to_ucs4(cset)) {
+	    if (krb5int_utf8_to_ucs4(str, &chstr) == 0
+		&& krb5int_utf8_to_ucs4(cset, &chset) == 0 && chstr == chset)
 		return (char *)str;
-	    } 
 	}
     }
 




More information about the cvs-krb5 mailing list