/*
* Copyright 1995, 1996 Perforce Software. All rights reserved.
*
* This file is part of Perforce - the FAST SCM System.
*/
/*
* strings.cc - support for StrBuf, StrPtr
*/
# include <stdhdrs.h>
# include <charman.h>
# include <math.h>
# include "strbuf.h"
# include "strdict.h"
# include "strops.h"
/*
* Null strings for StrRef::Null() and empty StrBuf
*/
char StrBuf::nullStrBuf[8] = "\0\0\0\0\0\0\0";
StrRef StrRef::null( "", 0 );
/*
* StrPtr::CCompare/SCompare/SCompareN/SEqual
*
* These following methods alone handle the case sensitivity switch
* between UNIX and NT servers. They include support for an experimental
* "hybrid" form of case handling.
*
* Even the case insensitive comparators start with a case sensitive
* compare, since that is faster and handles the bulk of data. Only
* if the case sensitive compare finds a difference, and we are being
* case insensitive, do we proceed to the slower case folding compare.
*
* Here are the case handling modes:
*
* ST_UNIX: Compare with case significant.
*
* Ax < ax
* aa > AX
*
* ST_WINDOWS: Compare with case ignored.
*
* Ax == ax
* aa < AX
*
* ST_HYBRID: First compare with case ignored, then significant.
*
* Ax < ax
* aa < AX
*
* This leads to an ordering like:
*
* //depot/main/a
* //depot/main/b
* //depot/Main/c
* //depot/main/c
* //depot/main/d
*
* That is, it looks like WINDOWS order, but still maintains
* uniqueness. Because ordering depends on seeing the whole
* string before going back and looking for case differences,
* this can complicate code which attempts to do its own char
* by char sorting. Thus the character based SCompare() and
* SEqual() differ only for the HYBRID case: SCompare ignores
* the case, while SEqual() heeds it.
*
* And the methods:
*
* StrPtr::CCompare()
* Compare with case ignored.
*
* StrPtr::NCompare()
* Compare with case ignored, natural order comparison.
*
* StrPtr::SCompare( string )
* Compare with case significant (UNIX).
* Compare with case ignored (WINDOWS)
* Compare with case ignored then significant (HYBRID)
*
* StrPtr::SCompareN( string, len )
* Compare with case significant (UNIX).
* Compare with case ignored (WINDOWS)
* Compare with case ignored; if EOS then significant (HYBRID)
*
* StrPtr::SCompare( char )
* Compare with case significant (UNIX).
* Compare with case ignored (WINDOWS,HYBRID)
*
* StrPtr::SEqual( char )
* Compare with case significant (UNIX,HYBRID).
* Compare with case ignored (WINDOWS)
*/
# ifdef CASE_INSENSITIVE
StrPtr::CaseUse StrPtr::caseUse = ST_WINDOWS;
# else
StrPtr::CaseUse StrPtr::caseUse = ST_UNIX;
# endif
bool StrPtr::foldingSet = 0;
int
StrPtr::CCompare( const char *sa, const char *sb )
{
register const unsigned char *a = (const unsigned char *)sa;
register const unsigned char *b = (const unsigned char *)sb;
// Case significant part (fast)
while( *a && *a == *b )
++a, ++b;
// Case ignored part (slow)
while( *a && tolowerq( *a ) == tolowerq( *b ) )
++a, ++b;
return tolowerq( *a ) - tolowerq( *b );
}
int
StrPtr::SCompare( const char *sa, const char *sb )
{
register const unsigned char *a = (const unsigned char *)sa;
register const unsigned char *b = (const unsigned char *)sb;
// Case significant compare for UNIX
while( *a && *a == *b )
++a, ++b;
int cmpCased = *a - *b;
if( caseUse == ST_UNIX )
return cmpCased;
// Proceed with case ignored compare for WINDOWS, HYBRID
while( *a && tolowerq( *a ) == tolowerq( *b ) )
++a, ++b;
int cmpFolded = tolowerq( *a ) - tolowerq( *b );
if( cmpFolded || caseUse == ST_WINDOWS )
return cmpFolded;
// If case-ignored matched and HYBRID, now return first case mismatch
return cmpCased;
}
/*
* NCompare(), original code taken from:
*
* strnatcmp.c -- Perform 'natural order' comparisons of strings in C.
* Copyright (C) 2000, 2004 by Martin Pool <mbp sourcefrog net>
*
* see:
* http://computer.perforce.com/newwiki/index.php?title=3rd_Party_Software
*
*/
int
StrPtr::NCompare( const char *a, const char *b )
{
const unsigned char *ca = (const unsigned char *)a;
const unsigned char *cb = (const unsigned char *)b;
int result;
for( ;; )
{
while( isAspace( ca ) ) ++ca;
while( isAspace( cb ) ) ++cb;
if( !*ca && !*cb )
return 0;
if ( isAdigit( ca ) && isAdigit( cb ) )
{
if( *ca == '0' || *cb == '0' )
{
if( ( result = NCompareLeft( ca, cb ) ) != 0 )
return result;
}
else
{
if( ( result = NCompareRight( ca, cb ) ) != 0 )
return result;
}
}
// For now, always ignore case
if( tolowerq( *ca ) < tolowerq( *cb ) ) return -1;
if( tolowerq( *ca ) > tolowerq( *cb ) ) return +1;
++ca; ++cb;
}
}
int
StrPtr::NCompareLeft( const unsigned char *a, const unsigned char *b )
{
for( ;; a++, b++ )
{
if ( !isAdigit( a ) && !isAdigit( b ) )
return 0;
else if( !isAdigit( a ) )
return -1;
else if( !isAdigit( b ) )
return +1;
else if( *a < *b )
return -1;
else if( *a > *b )
return +1;
}
// NOT-REACHED!
}
int
StrPtr::NCompareRight( const unsigned char *a, const unsigned char *b )
{
int bias = 0;
for( ;; a++, b++ )
{
if( !isAdigit( a ) && !isAdigit( b ) )
return bias;
else if( !isAdigit( a ) )
return -1;
else if( !isAdigit( b ) )
return +1;
else if( *a < *b )
{ if( !bias ) bias = -1; }
else if (*a > *b)
{ if( !bias ) bias = +1; }
else if ( !*a && !*b )
return bias;
}
// NOT-REACHED!
}
int
StrPtr::SCompareN( const StrPtr &s ) const
{
register const unsigned char *a = (const unsigned char *)buffer;
register const unsigned char *b = (const unsigned char *)s.buffer;
register int n = length;
// Case significant compare for UNIX
while( n && *a && *a == *b )
++a, ++b, --n;
if( !n )
return 0;
int cmpCased = *a - *b;
if( caseUse == ST_UNIX )
return cmpCased;
// Proceed with case folding compare for WINDOWS, HYBRID
while( n && *a && tolowerq( *a ) == tolowerq( *b ) )
++a, ++b, --n;
if( !n )
return 0;
int cmpFolded = tolowerq( *a ) - tolowerq( *b );
if( cmpFolded || caseUse == ST_WINDOWS )
return cmpFolded;
// If case-ignored matched and HYBRID, now return first case mismatch
return cmpCased;
}
int
StrPtr::SCompareF( unsigned char a, unsigned char b )
{
return caseUse == ST_UNIX ? a-b : tolowerq( a ) - tolowerq( b );
}
int
StrPtr::SEqualF( unsigned char a, unsigned char b )
{
return caseUse == ST_WINDOWS ? tolowerq( a ) == tolowerq( b ) : a==b;
}
/*
* StrPtr::Atoi64() - our own strtoll()
* StrPtr::Itoa64() - a cheesy sprintf()
*/
P4INT64
StrPtr::Atoi64( const char *p )
{
P4INT64 value = 0;
int sign = 0;
/* Skip any leading blanks. */
while( isAspace( p ) )
++p;
/* Check for a sign. */
if( *p == '+' )
++p;
else if( *p == '-' )
++p, sign = 1;
/* Convert the digits */
while( isAdigit( p ) )
value = value * 10 + ( *p++ - '0' );
/* Add the sign */
return sign ? -value : value;
}
char *
StrPtr::Itoa64( P4INT64 v, char *buffer )
{
// Our own cheesy sprintf( buf, "%d", v );
// We work backwards from the end of the buffer.
// We return the beginning of the buffer
int neg;
if( neg = ( v < 0 ) )
v = -v;
*--buffer = 0;
do
{
*--buffer = ( v % 10 ) + '0';
v = v / 10;
}
while( v );
if( neg )
*--buffer = '-';
return buffer;
}
char *
StrPtr::Itox( unsigned int v, char *buffer )
{
// Our own cheesy sprintf( buf, "%x", v );
// We work backwards from the end of the buffer.
// We return the beginning of the buffer
*--buffer = 0;
do
{
*--buffer = StrOps::OtoX( v & 0xf );
v >>= 4;
}
while( v );
*--buffer = 'x';
*--buffer = '0';
return buffer;
}
// A legal number follows the simple grammar: ( )*[+-]\d+
//
bool
StrPtr::IsNumeric() const
{
const char *p = Text();
while( isAspace( p ) ) ++p;
if( *p == '+' || *p == '-' ) ++p;
const char *q = p;
while( isAdigit( p ) ) ++p;
return *p == '\0' && ( p - q ) > 0;
}
int
StrPtr::EndsWith( const char *s, int l ) const
{
if( Length() < l )
return 0;
const char *p = Text() + ( Length() - l );
while( l-- > 0 )
if( *p++ != *s++ )
return 0;
return 1;
}
/*
* StrBuf::Append() - append to a buffer
*/
void
StrBuf::Append( const char *buf, p4size_t len )
{
// This code is the functional equivalent of:
//
// Extend( buf, len );
// Terminate();
//
char *s = Alloc( len + 1 );
memmove( s, buf, len );
s[ len ] = '\0';
--length;
}
void
StrBuf::Append( const char *buf )
{
// Use buf's EOS
int len = strlen( buf ) + 1;
memmove( Alloc( len ), buf, len );
--length;
}
void
StrBuf::Append( const StrPtr *t )
{
char *s = Alloc( t->Length() + 1 );
memmove( s, t->Text(), t->Length() );
s[ t->Length() ] = '\0';
--length;
}
void
StrBuf::UAppend( const char *buf, p4size_t len )
{
// This code is the functional equivalent of:
//
// Extend( buf, len );
// Terminate();
//
char *s = Alloc( len + 1 );
memcpy( s, buf, len );
s[ len ] = '\0';
--length;
}
void
StrBuf::UAppend( const char *buf )
{
// Use buf's EOS
int len = strlen( buf ) + 1;
memcpy( Alloc( len ), buf, len );
--length;
}
void
StrBuf::UAppend( const StrPtr *t )
{
char *s = Alloc( t->Length() + 1 );
memcpy( s, t->Text(), t->Length() );
s[ t->Length() ] = '\0';
--length;
}
/*
* StrBuf::Grow() - grow a string buffer
*/
void
StrBuf::Grow( p4size_t oldlen )
{
// Resize?
// If Growing an existing string, double needed size ('cause
// we'll grow again). But if allocating a string to begin
// with, add an extra byte for the null terminator that gets
// tacked on.
size = length;
if( buffer != nullStrBuf )
{
// geometric growth
if( size >= 0x70000000 )
size = 0xffffffff;
else
size = ( size + 30 ) * 3 / 2;
char *o = buffer;
buffer = new char[ size ];
memcpy( buffer, o, oldlen );
delete []o;
}
else
{
if( size < 4096 )
size += 1;
buffer = new char[ size ];
}
}
/*
* StrBuf::BlockAppend() - append a large block to a buffer
*/
void
StrBuf::BlockAppend( const char *buf, p4size_t len )
{
// This code is the functional equivalent of:
//
// Extend( buf, len );
// Terminate();
//
// except that it doesn't over-allocate
char *s = BlockAlloc( len + 1 );
memmove( s, buf, len );
s[ len ] = '\0';
--length;
}
void
StrBuf::BlockAppend( const char *buf )
{
// Use buf's EOS
int len = strlen( buf ) + 1;
memmove( BlockAlloc( len ), buf, len );
--length;
}
void
StrBuf::BlockAppend( const StrPtr *t )
{
char *s = BlockAlloc( t->Length() + 1 );
memmove( s, t->Text(), t->Length() );
s[ t->Length() ] = '\0';
--length;
}
void
StrBuf::UBlockAppend( const char *buf, p4size_t len )
{
// This code is the functional equivalent of:
//
// Extend( buf, len );
// Terminate();
//
// except that it doesn't over-allocate
char *s = BlockAlloc( len + 1 );
memcpy( s, buf, len );
s[ len ] = '\0';
--length;
}
void
StrBuf::UBlockAppend( const char *buf )
{
// Use buf's EOS
int len = strlen( buf ) + 1;
memcpy( BlockAlloc( len ), buf, len );
--length;
}
void
StrBuf::UBlockAppend( const StrPtr *t )
{
char *s = BlockAlloc( t->Length() + 1 );
memcpy( s, t->Text(), t->Length() );
s[ t->Length() ] = '\0';
--length;
}
/*
* StrBuf::Reserve() - Ensure that there is the requested space
* - don't over-allocate
* - intended for large requests (>= 128 KB)
*/
void
StrBuf::Reserve( p4size_t oldlen )
{
size = length;
if( buffer != nullStrBuf )
{
// geometric growth
char *o = buffer;
buffer = new char[ size ];
memcpy( buffer, o, oldlen );
delete []o;
}
else
{
buffer = new char[ size ];
}
}
void
StrBuf::Fill(const char *buf, p4size_t len)
{
if (len > Length() )
len = Length();
memset( Text(), *buf, len);
}
void
StrBuf::TruncateBlanks()
{
const char *p = buffer;
const char *blank = 0;
while( *p )
{
if( *p != ' ' )
blank = 0;
else if( !blank )
blank = p;
p++;
}
if( blank )
{
SetEnd( (char *)blank );
Terminate();
}
}
void
StrBuf::TrimBlanks()
{
const char *start = buffer;
const char *blank = 0;
while( *start == ' ' )
start++;
const char *p = start;
while( *p )
{
if( *p != ' ' )
blank = 0;
else if( !blank )
blank = p;
p++;
}
int l = blank ? blank - start : p - start;
if( l != length )
{
memmove( buffer, start, l );
buffer[ l ] = '\0';
length = l;
}
}
/**
* StrBuf::CompressTail
*
* @brief Chop the common substring off tail and
* prepend with the offset value of the substring
* in the passed string.
*
* @param the string to match
* @return offset into string argument
* -1 if error
* 0 if not compressed
*/
int
StrBuf::EncodeTail( StrPtr &s, const char *replaceBytes )
{
/*
* If replaceBytes is NULL, then ASSUME we shall compress
* the string. If replaceBytes is not NULL and the contents
* does not match the first two bytes of our buffer (e.g. "//")
* then we return assuming that the buffer has already been
* compressed. (This happens in DbOpen::Put since the record
* is written in two parts, the keys then the non key columns).
* To compress our string, compare a string with a supplied argument s.
* Find the count of trailing identical characters then
* truncate the common substring from the end. The
* get the offset value for the common string into the supplied
* argument, convert to 2-byte hex value and prepend to the leading
* non-matching piece. (max offset 255 for FF)
*/
// job075871: advance past the client name so that substring
// is not considered in the match process.
register int i = 2;
while( i < s.Length() && s.buffer[i] != '/' )
i++;
if( s.buffer[i] != '/' )
return 0;
register int shortest = length < (s.Length() - i)? length : (s.Length() - i);
// bail early if one string is empty
if( shortest == 0 )
return 0;
// reset counter
i = 0;
register const unsigned char *a = (const unsigned char *)(s.buffer + s.Length() - 1);
register const unsigned char *b = (const unsigned char *)(buffer + length - 1);
if( replaceBytes && strncmp(buffer, replaceBytes, 2) != 0)
return 0;
while( i < shortest && *a == *b )
{
a--;
b--;
i++;
}
// sanity check
if ( i > (length - 2) )
{
// If client path contains full depot path
// back off a character so that we have enough
// room to store offset.
if( i == (length - 1) )
i--;
else
return -1;
}
/*
* get the offset value into the passed StrPtr where the
* common substring starts.
*/
register int v = s.Length() - i;
if ((i == 0) || (v > 255) )
{
// nothing to compress or
// the offset is too large do not compress the tail
return 0;
}
// chop off common part of path
length -= i;
Terminate();
// prepend this value to the first 2 bytes of our buffer
*(buffer+1) = StrOps::OtoX( v & 0xf ); v >>= 4;
*(buffer) = v ? StrOps::OtoX( v & 0xf ) : '0';
return (s.Length() - i);
}
int
StrBuf::DecodeTail( StrPtr &s, const char *replaceBytes )
{
/*
* If replaceBytes is NULL, then ASSUMES a compressed string.
* If replaceBytes is not NULL and contains the
* identical first two bytes of our buffer (e.g. "//")
* then we return assuming that the buffer is uncompressed.
* This StrBuf will may grow to accommodate the common
* substring which will be appended. If replacedBytes
* is non-NULL then the first two bytes will replace
* those currently in the buffer.
*/
/* Sanity Checks */
// bail early if target string is empty
if( s.Length() == 0 )
return -1;
// bail if not enough room for offset in source
if( length < 2 )
return 0;
// bail if passed replacement string and it has already been replaced
if( replaceBytes && strlen(replaceBytes) >= 2 && strncmp(buffer, replaceBytes, 2) == 0)
return 0;
int v = ( StrOps::XtoO( buffer[0] ) << 4 )
| ( StrOps::XtoO( buffer[1] ) << 0 );
// santity check offset value
if( v <= 2 || v > 255 )
return -1;
if( replaceBytes && strlen(replaceBytes) >= 2 )
{
buffer[0] = replaceBytes[0];
buffer[1] = replaceBytes[1];
}
if( s.Length() >= v )
Append( s.buffer + v );
else
return -1;
return v;
}
void
StrBuf::Compress( StrPtr *s )
{
// Compare a string with a supplied argument
// Find the count of leading identical characters then
// convert to 2-byte hex value and append the trailing
// non-matching piece. (max 255 for FF)
register const unsigned char *a = (const unsigned char *)buffer;
register const unsigned char *b = (const unsigned char *)s->buffer;
register int n = length;
register int i = 0;
while( n && *a && *a == *b && ++i < 256 )
++a, ++b, --n;
// leading characters in common (max 255)
int v = ( length - n );
int newSize = (length - v) + 4;
int l = v;
char *newBuffer = new char[ newSize ];
*(newBuffer+1) = StrOps::OtoX( v & 0xf ); v >>= 4;
*(newBuffer) = v ? StrOps::OtoX( v & 0xf ) : '0';
memcpy( newBuffer + 2, buffer + l, n );
newBuffer[ n + 2 ] = '\0';
delete []buffer;
buffer = newBuffer;
length = n + 2;
size = newSize;
}
void
StrBuf::UnCompress( StrPtr *s )
{
// ASSUMES a compressed string, note uncompressed size
// could be much bigger since Alloc uses Grow(), but
// typically this strbuf will be copied into another
// record format where it will be correctly sized.
register int n = length;
int v = ( StrOps::XtoO( buffer[0] ) << 4 )
| ( StrOps::XtoO( buffer[1] ) << 0 );
int extra = v - 2;
if( extra > 0 )
Alloc( extra + 1 );
memmove( buffer + v, buffer + 2, n - 2 );
memcpy( buffer, s->buffer, v );
buffer[ n + extra ] = '\0';
length = n + extra;
}
void
StrFixed::SetBufferSize( p4size_t l )
{
if( length == l )
return;
delete []buffer;
this->length = l;
this->buffer = new char[ l ];
}
/*
* Converts a 64-bit integer to a "human-readable" format. Is passed the
* end of the buffer, works backward, returns the beginning of the buffer.
*/
char *
StrHuman::Itoa64( P4INT64 v, char *buffer, int f )
{
static const char suffix[] = "BKMGTP";
P4INT64 factor = f;
P4INT64 current = v;
int remainder = 0;
const char *s = suffix;
for( ; s < suffix + 6; s++)
{
if( current < factor )
break;
else
{
remainder = (current * 100 / factor ) % 100;
current = current / factor;
}
}
// remainder is a percentage (0 .. 99). Now round that to the nearest
// 10, and if we get a 10, carry that up to increment current.
# if OS_NT
// Visual Studio versions prior to VS2013 don't have "round"...
remainder = (int)floor( ( remainder / (double)10 ) + 0.5 );
# else
remainder = (int)round( remainder / (double)10 );
# endif
*--buffer = 0;
*--buffer = *s;
if( remainder == 10 )
current++;
else if( remainder )
{
do
{
*--buffer = ( remainder % 10 ) + '0';
remainder = remainder / 10;
}
while( remainder );
*--buffer = '.';
}
do
{
*--buffer = ( current % 10 ) + '0';
current = current / 10;
}
while( current );
return buffer;
}