Fix bugs in UTF-8 code. Apply to getdn and charray.

This commit is contained in:
Kurt Zeilenga 2000-01-23 23:07:24 +00:00
parent fdcd8465c3
commit 42cc5e5333
5 changed files with 161 additions and 97 deletions

View file

@ -77,8 +77,8 @@ ldap_charray_dup LDAP_P(( char **a ));
LIBLDAP_F( char ** )
ldap_str2charray LDAP_P((
char *str,
char *brkstr ));
const char *str,
const char *brkstr ));
/* url.c */
LIBLDAP_F (void) ldap_pvt_hex_unescape LDAP_P(( char *s ));
@ -152,7 +152,9 @@ LIBLDAP_F (int) ldap_pvt_tls_start LDAP_P(( Sockbuf *sb, void *ctx_arg ));
LIBLDAP_F (ber_len_t) ldap_utf8_bytes( const char * );
/* returns the number of UTF-8 characters in the string */
LIBLDAP_F (ber_len_t) ldap_utf8_chars( const char * );
/* returns the length (in bytes) of a UTF-8 string */
/* returns the length (in bytes) of the UTF-8 character */
LIBLDAP_F (int) ldap_utf8_offset( const char * );
/* returns the length (in bytes) indicated by the UTF-8 character */
LIBLDAP_F (int) ldap_utf8_charlen( const char * );
/* copies a UTF-8 character and returning number of bytes copied */
LIBLDAP_F (int) ldap_utf8_copy( char *, const char *);
@ -174,6 +176,8 @@ LIBLDAP_F (int) ldap_utf8_isspace( const char * );
LIBLDAP_F (ber_len_t) ldap_utf8_strcspn( const char* str, const char *set);
/* span characters in set, return bytes spanned */
LIBLDAP_F (ber_len_t) ldap_utf8_strspn( const char* str, const char *set);
/* return first occurance of character in string */
LIBLDAP_F (char *) ldap_utf8_strchr( const char* str, const char *chr);
/* return first character of set in string */
LIBLDAP_F (char *) ldap_utf8_strpbrk( const char* str, const char *set);
/* reentrant tokenizer */
@ -183,14 +187,16 @@ LIBLDAP_F (char*) ldap_utf8_strtok( char* sp, const char* sep, char **last);
#define LDAP_UTF8_ISASCII(p) ( * (const unsigned char *) (p) < 0x100 )
#define LDAP_UTF8_CHARLEN(p) ( LDAP_UTF8_ISASCII(p) \
? 1 : ldap_utf8_charlen((p)) )
#define LDAP_UTF8_OFFSET(p) ( LDAP_UTF8_ISASCII(p) \
? 1 : ldap_utf8_offset((p)) )
#define LDAP_UTF8_COPY(p) ( LDAP_UTF8_ISASCII(p) \
? (*(d) = *(s), 1) : ldap_utf8_cpy((d),(s)) )
#define LDAP_UTF8_COPY(d,s) ( LDAP_UTF8_ISASCII(s) \
? (*(d) = *(s), 1) : ldap_utf8_copy((d),(s)) )
#define LDAP_UTF8_NEXT(p) ( LDAP_UTF8_ISASCII(p) \
? &(p)[1] : ldap_utf8_next((p)) )
? (char *)(p)+1 : ldap_utf8_next((p)) )
#define LDAP_UTF8_INCR(p) ( (p) = LDAP_UTF8_NEXT(p) )
#define LDAP_UTF8_INCR(p) ((p) = LDAP_UTF8_NEXT(p))
/* For symmetry */
#define LDAP_UTF8_PREV(p) (ldap_utf8_prev((p)))

View file

@ -165,22 +165,22 @@ ldap_charray_dup( char **a )
}
char **
ldap_str2charray( char *str, char *brkstr )
ldap_str2charray( const char *str_in, const char *brkstr )
{
char **res;
char *s;
char *str, *s;
char *lasts;
int i;
/* protect the input string from strtok */
str = LDAP_STRDUP( str );
str = LDAP_STRDUP( str_in );
if( str == NULL ) {
return NULL;
}
i = 1;
for ( s = str; *s; s++ ) {
if ( strchr( brkstr, *s ) != NULL ) {
if ( ldap_utf8_strchr( brkstr, s ) != NULL ) {
i++;
}
}
@ -194,9 +194,9 @@ ldap_str2charray( char *str, char *brkstr )
i = 0;
for ( s = ldap_pvt_strtok( str, brkstr, &lasts );
for ( s = ldap_utf8_strtok( str, brkstr, &lasts );
s != NULL;
s = ldap_pvt_strtok( NULL, brkstr, &lasts ) )
s = ldap_utf8_strtok( NULL, brkstr, &lasts ) )
{
res[i] = LDAP_STRDUP( s );

View file

@ -23,11 +23,11 @@
#include "ldap-int.h"
#define DN_TYPE_LDAP_RDN 0
#define DN_TYPE_LDAP_DN 1
#define DN_TYPE_DCE_DN 2
#define NAME_TYPE_LDAP_RDN 0
#define NAME_TYPE_LDAP_DN 1
#define NAME_TYPE_DCE_DN 2
static char **explode_name( const char *name, int notypes, int is_dn );
static char **explode_name( const char *name, int notypes, int is_type );
char *
ldap_get_dn( LDAP *ld, LDAPMessage *entry )
@ -64,27 +64,28 @@ ldap_dn2ufn( LDAP_CONST char *dn )
}
if ( ldap_is_dns_dn( dn ) ||
( p = strchr( dn, '=' ) ) == NULL )
( p = ldap_utf8_strpbrk( dn, "=" ) ) == NULL )
{
return( LDAP_STRDUP( dn ) );
}
ufn = LDAP_STRDUP( ++p );
if( ufn == NULL ) return NULL;
#define INQUOTE 1
#define OUTQUOTE 2
state = OUTQUOTE;
for ( p = ufn, r = ufn; *p; p++ ) {
for ( p = ufn, r = ufn; *p; LDAP_UTF8_INCR(p) ) {
switch ( *p ) {
case '\\':
if ( *++p == '\0' )
p--;
else {
if ( p[1] != '\0' ) {
*r++ = '\\';
*r++ = *p;
LDAP_UTF8_COPY(r,++p);
LDAP_UTF8_INCR(r);
}
break;
case '"':
if ( state == INQUOTE )
state = OUTQUOTE;
@ -92,6 +93,7 @@ ldap_dn2ufn( LDAP_CONST char *dn )
state = INQUOTE;
*r++ = *p;
break;
case ';':
case ',':
if ( state == OUTQUOTE )
@ -99,17 +101,22 @@ ldap_dn2ufn( LDAP_CONST char *dn )
else
*r++ = *p;
break;
case '=':
if ( state == INQUOTE )
if ( state == INQUOTE ) {
*r++ = *p;
else {
} else {
char *rsave = r;
*r-- = '\0';
while ( !isspace( (unsigned char) *r )
*r = '\0';
LDAP_UTF8_DECR( r );
while ( !ldap_utf8_isspace( r )
&& *r != ';' && *r != ',' && r > ufn )
r--;
r++;
{
LDAP_UTF8_DECR( r );
}
LDAP_UTF8_INCR( r );
if ( strcasecmp( r, "c" )
&& strcasecmp( r, "o" )
@ -122,8 +129,10 @@ ldap_dn2ufn( LDAP_CONST char *dn )
}
}
break;
default:
*r++ = *p;
LDAP_UTF8_COPY(r, p);
LDAP_UTF8_INCR(r);
break;
}
}
@ -184,14 +193,14 @@ ldap_explode_dn( LDAP_CONST char *dn, int notypes )
if ( ldap_is_dns_dn( dn ) ) {
return( ldap_explode_dns( dn ) );
}
return explode_name( dn, notypes, DN_TYPE_LDAP_DN );
return explode_name( dn, notypes, NAME_TYPE_LDAP_DN );
}
char **
ldap_explode_rdn( LDAP_CONST char *rdn, int notypes )
{
Debug( LDAP_DEBUG_TRACE, "ldap_explode_rdn\n", 0, 0, 0 );
return explode_name( rdn, notypes, DN_TYPE_LDAP_RDN );
return explode_name( rdn, notypes, NAME_TYPE_LDAP_RDN );
}
char *
@ -202,7 +211,7 @@ ldap_dn2dcedn( LDAP_CONST char *dn )
Debug( LDAP_DEBUG_TRACE, "ldap_dn2dcedn\n", 0, 0, 0 );
rdns = explode_name( dn, 0, DN_TYPE_LDAP_DN );
rdns = explode_name( dn, 0, NAME_TYPE_LDAP_DN );
if ( rdns == NULL ) {
return NULL;
}
@ -240,7 +249,7 @@ ldap_dcedn2dn( LDAP_CONST char *dce )
Debug( LDAP_DEBUG_TRACE, "ldap_dcedn2dn\n", 0, 0, 0 );
rdns = explode_name( dce, 0, DN_TYPE_DCE_DN );
rdns = explode_name( dce, 0, NAME_TYPE_DCE_DN );
if ( rdns == NULL ) {
return NULL;
}
@ -279,22 +288,35 @@ ldap_dcedn2dn( LDAP_CONST char *dce )
}
static char **
explode_name( const char *name, int notypes, int is_dn )
explode_name( const char *name, int notypes, int is_type )
{
const char *p, *q;
const char *p, *q, *rdn;
char **parts = NULL;
int state, count = 0, endquote, len;
int offset, state, have_equals, count = 0, endquote, len;
p = name-1;
/* safe guard */
if(name == NULL) name = "";
/* skip leading whitespace */
while( ldap_utf8_isspace( name )) {
LDAP_UTF8_INCR( name );
}
p = rdn = name;
offset = 0;
state = OUTQUOTE;
have_equals=0;
do {
/* step forward */
p += offset;
offset = 1;
++p;
switch ( *p ) {
case '\\':
if ( *++p == '\0' )
p--;
if ( p[1] != '\0' ) {
offset = LDAP_UTF8_OFFSET(++p);
}
break;
case '"':
if ( state == INQUOTE )
@ -302,23 +324,28 @@ explode_name( const char *name, int notypes, int is_dn )
else
state = INQUOTE;
break;
case '=':
if( state = OUTQUOTE ) have_equals++;
break;
case '+':
if (is_dn == DN_TYPE_LDAP_RDN)
if (is_type == NAME_TYPE_LDAP_RDN)
goto end_part;
break;
case '/':
if (is_dn == DN_TYPE_DCE_DN)
if (is_type == NAME_TYPE_DCE_DN)
goto end_part;
break;
case ';':
case ',':
if (is_dn == DN_TYPE_LDAP_DN)
if (is_type == NAME_TYPE_LDAP_DN)
goto end_part;
break;
case '\0':
end_part:
if ( state == OUTQUOTE ) {
++count;
have_equals=0;
if ( parts == NULL ) {
if (( parts = (char **)LDAP_MALLOC( 8
* sizeof( char *))) == NULL )
@ -329,31 +356,45 @@ explode_name( const char *name, int notypes, int is_dn )
== NULL )
return( NULL );
}
parts[ count ] = NULL;
endquote = 0;
if ( notypes ) {
for ( q = name;
q < p && *q != '='; ++q ) {
;
for ( q = rdn; q < p && *q != '='; ++q ) {
/* EMPTY */;
}
if ( q < p ) {
name = ++q;
rdn = ++q;
}
if ( *name == '"' ) {
++name;
if ( *rdn == '"' ) {
++rdn;
}
if ( *(p-1) == '"' ) {
if ( p[-1] == '"' ) {
endquote = 1;
--p;
}
}
len = p - name;
len = p - rdn;
if (( parts[ count-1 ] = (char *)LDAP_CALLOC( 1,
len + 1 )) != NULL ) {
SAFEMEMCPY( parts[ count-1 ], name,
len );
len + 1 )) != NULL )
{
SAFEMEMCPY( parts[ count-1 ], rdn, len );
if( !endquote ) {
/* skip trailing spaces */
while( len > 0 && ldap_utf8_isspace(
&parts[count-1][len-1] ) )
{
--len;
}
}
parts[ count-1 ][ len ] = '\0';
}
@ -365,11 +406,10 @@ explode_name( const char *name, int notypes, int is_dn )
if ( endquote == 1 )
p++;
name = *p ? p + 1 : p;
while ( isascii( *name ) && isspace( *name ) )
++name;
}
break;
rdn = *p ? &p[1] : p;
while ( ldap_utf8_isspace( rdn ) )
++rdn;
} break;
}
} while ( *p );
@ -380,9 +420,6 @@ explode_name( const char *name, int notypes, int is_dn )
int
ldap_is_dns_dn( LDAP_CONST char *dn )
{
return( dn[ 0 ] != '\0'
&& strchr( dn, '=' ) == NULL
&& strchr( dn, ',' ) == NULL
&& strchr( dn, ';' ) == NULL );
return dn[ 0 ] != '\0' && ldap_utf8_strpbrk( dn, "=,;" ) == NULL;
}

View file

@ -4,6 +4,11 @@
* COPYING RESTRICTIONS APPLY, see COPYRIGHT file
*/
/*
* Locale-specific 1-byte character versions
* See utf-8.c for UTF-8 versions
*/
#include "portable.h"
#include <ac/stdlib.h>

View file

@ -60,13 +60,19 @@ ber_len_t ldap_utf8_chars( const char * p )
/* could be optimized and could check for invalid sequences */
ber_len_t chars=0;
for( ; *p ; p=LDAP_UTF8_NEXT(p) ) {
for( ; *p ; LDAP_UTF8_INCR(p) ) {
chars++;
};
return chars;
}
/* return offset to next character */
int ldap_utf8_offset( const char * p )
{
return LDAP_UTF8_NEXT(p) - p;
}
/*
* Returns length indicated by first byte.
*
@ -111,7 +117,7 @@ ber_int_t ldap_utf8_to_ucs4( const char * p )
ber_int_t ch;
int len, i;
static unsigned char mask[] = {
0, 0x7f, 0x1F, 0x0F, 0x07, 0x03, 0x01 };
0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
len = LDAP_UTF8_CHARLEN(p);
@ -146,33 +152,33 @@ int ldap_ucs4_to_utf8( ber_int_t c, char *buf )
} else if( c < 0x800 ) {
p[len++] = 0xc0 | ( c >> 6 );
p[len++] = 0x80 | ( c & 0x3F );
p[len++] = 0x80 | ( c & 0x3f );
} else if( c < 0x10000 ) {
p[len++] = 0xe0 | ( c >> 12 );
p[len++] = 0x80 | ( (c >> 6) & 0x3F );
p[len++] = 0x80 | ( c & 0x3F );
p[len++] = 0x80 | ( (c >> 6) & 0x3f );
p[len++] = 0x80 | ( c & 0x3f );
} else if( c < 0x200000 ) {
p[len++] = 0xf0 | ( c >> 18 );
p[len++] = 0x80 | ( (c >> 12) & 0x3F );
p[len++] = 0x80 | ( (c >> 6) & 0x3F );
p[len++] = 0x80 | ( c & 0x3F );
p[len++] = 0x80 | ( (c >> 12) & 0x3f );
p[len++] = 0x80 | ( (c >> 6) & 0x3f );
p[len++] = 0x80 | ( c & 0x3f );
} else if( c < 0x400000 ) {
p[len++] = 0xf8 | ( c >> 24 );
p[len++] = 0x80 | ( (c >> 18) & 0x3F );
p[len++] = 0x80 | ( (c >> 12) & 0x3F );
p[len++] = 0x80 | ( (c >> 6) & 0x3F );
p[len++] = 0x80 | ( c & 0x3F );
p[len++] = 0x80 | ( (c >> 18) & 0x3f );
p[len++] = 0x80 | ( (c >> 12) & 0x3f );
p[len++] = 0x80 | ( (c >> 6) & 0x3f );
p[len++] = 0x80 | ( c & 0x3f );
} else /* if( c < 0x80000000 ) */ {
p[len++] = 0xfc | ( c >> 30 );
p[len++] = 0x80 | ( (c >> 24) & 0x3F );
p[len++] = 0x80 | ( (c >> 18) & 0x3F );
p[len++] = 0x80 | ( (c >> 12) & 0x3F );
p[len++] = 0x80 | ( (c >> 6) & 0x3F );
p[len++] = 0x80 | ( c & 0x3F );
p[len++] = 0x80 | ( (c >> 24) & 0x3f );
p[len++] = 0x80 | ( (c >> 18) & 0x3f );
p[len++] = 0x80 | ( (c >> 12) & 0x3f );
p[len++] = 0x80 | ( (c >> 6) & 0x3f );
p[len++] = 0x80 | ( c & 0x3f );
}
buf[len] = '\0';
@ -198,7 +204,7 @@ char* ldap_utf8_next( const char * p )
}
for( i=1; i<6; i++ ) {
if ( u[i] & 0xC0 != 0x80 ) {
if ( u[i] & 0xc0 != 0x80 ) {
return (char *) &p[i];
}
}
@ -221,7 +227,7 @@ char* ldap_utf8_prev( const char * p )
const unsigned char *u = p;
for( i=-1; i>-6 ; i-- ) {
if ( u[i] & 0xC0 != 0x80 ) {
if ( u[i] & 0xc0 != 0x80 ) {
return (char *) &p[i];
}
}
@ -251,7 +257,7 @@ int ldap_utf8_copy( char* dst, const char *src )
}
for( i=1; i<6; i++ ) {
if ( u[i] & 0xC0 != 0x80 ) {
if ( u[i] & 0xc0 != 0x80 ) {
return i;
}
dst[i] = src[i];
@ -340,7 +346,7 @@ int ldap_utf8_islower( const char * p )
{
unsigned c = * (const unsigned char *) p;
if(!UTF8_ISASCII(c)) return 0;
if(!ISASCII(c)) return 0;
return ( c >= 'a' && c <= 'z' );
}
@ -360,15 +366,26 @@ int ldap_utf8_isupper( const char * p )
* UTF-8 string routines
*/
/* like strchr() */
char * (ldap_utf8_strchr)( const char *str, const char *chr )
{
for( ; *str != '\0'; LDAP_UTF8_INCR(str) ) {
if( ldap_utf8_to_ucs4( str ) == ldap_utf8_to_ucs4( chr ) ) {
return (char *) str;
}
}
return NULL;
}
/* like strcspn() but returns number of bytes, not characters */
ber_len_t (ldap_utf8_strcspn)( const char *str, const char *set )
{
const char *cstr;
const char *cset;
for( cstr = str; *cstr != '\0'; cstr = LDAP_UTF8_NEXT(cstr) ) {
const char *cset;
for( cset = set; ; cset = LDAP_UTF8_NEXT(cset) ) {
for( cstr = str; *cstr != '\0'; LDAP_UTF8_INCR(cstr) ) {
for( cset = set; *cset != '\0'; LDAP_UTF8_INCR(cset) ) {
if( ldap_utf8_to_ucs4( cstr ) == ldap_utf8_to_ucs4( cset ) ) {
return cstr - str;
}
@ -382,9 +399,9 @@ ber_len_t (ldap_utf8_strcspn)( const char *str, const char *set )
ber_len_t (ldap_utf8_strspn)( const char *str, const char *set )
{
const char *cstr;
const char *cset;
for( cstr = str; *cstr != '\0'; LDAP_UTF8_INCR(cstr) ) {
const char *cset;
for( cset = set; ; LDAP_UTF8_INCR(cset) ) {
if( *cset == '\0' ) {
@ -404,14 +421,13 @@ ber_len_t (ldap_utf8_strspn)( const char *str, const char *set )
char *(ldap_utf8_strpbrk)( const char *str, const char *set )
{
int len;
const char *cstr;
for( cstr = str; *cstr != '\0'; LDAP_UTF8_INCR(cstr) ) {
for( ; *str != '\0'; LDAP_UTF8_INCR(str) ) {
const char *cset;
for( cset = set; ; LDAP_UTF8_INCR(cset) ) {
if( ldap_utf8_to_ucs4( cstr ) == ldap_utf8_to_ucs4( cset ) ) {
return (char *) cstr;
for( cset = set; *cset != '\0'; LDAP_UTF8_INCR(cset) ) {
if( ldap_utf8_to_ucs4( str ) == ldap_utf8_to_ucs4( cset ) ) {
return (char *) str;
}
}
}
@ -436,7 +452,7 @@ char *(ldap_utf8_strtok)(char *str, const char *sep, char **last)
return NULL;
}
end = &begin[ ldap_utf8_strcpn( begin, sep ) ];
end = &begin[ ldap_utf8_strcspn( begin, sep ) ];
if( *end != '\0' ) {
char *next = LDAP_UTF8_NEXT( end );