// // Copyright 2001 Perforce Software. All rights reserved. // // This file is part of Perforce - the FAST SCM System. // // p4wStrBuf: // StrBuf with the p4w-specific ::Expand() modifications. // Also include utilities for escaping and unescaping characters // in urls and html. #include <p4wp4.h> #include "p4wStrBuf.h" #include "validate.h" // ------------------------------------- // p4w's ::Expand function. // p4wStrBuf& p4wStrBuf::Expand(const StrPtr &s, StrDict &d, int isUnicode, int bForceWrap) { // // Used for GetVar(var) return DoExpand(s, d, 0, 0, 0, isUnicode, bForceWrap); } p4wStrBuf& p4wStrBuf::ExpandList(const StrPtr &s, StrDict &d, int x, int isUnicode, int bForceWrap) { // // Used for GetVar(var, x) to get values from a list return DoExpand(s, d, 1, x, 0, isUnicode, bForceWrap); } p4wStrBuf& p4wStrBuf::ExpandArray(const StrPtr &s, StrDict &d, int x, int y, int isUnicode) { // // Used for GetVar(var, x, y) to get values from a list of a list item return DoExpand(s, d, 2, x, y, isUnicode); } p4wStrBuf& p4wStrBuf::DoExpand(const StrPtr &s, StrDict &d, int nargs, int x, int y, int isUnicode, int bForceWrap) { // General vars. StrBuf var; // the %var% variable StrPtr *val; // var's value const char *p = s.Text(); // our current location in s const char *start; // a pointer to the first % in a pair const char *end; // a pointer to the last % in a pair const char *option_start; // a pointer to the first option in a pair // // Search through the string until we either run out of string or // find the last %. while( (start = strchr(p, '%')) != NULL ) { // // Locate the terminating %. end = strchr(start + 1, '%'); if( end == NULL ) { // // No terminating %; this is an error, but we'll exit // gracefully. break; } else if( end == start + 1 ) { // // %% found; insert a single %. Append( p, end - p ); p = end + 1; } else { // // We found a complete %var% token. Extract the variable name. // First, we need to figure out where the variable name ends and // any options begin. if( (option_start = (char *)memchr(start + 1, '@', end - start - 1)) == NULL ) if( (option_start = (char *)memchr(start + 1, '?', end - start - 1)) == NULL ) option_start = end; // // Add everything before the start of the pair to our buffer and // update our pointer so that it points after this variable. Append(p, start - p); p = end + 1; // // Get the value for this variable. var.Set(start + 1, option_start - start - 1); switch( nargs ) { case 0: // default case val = d.GetVar(var); break; case 1: // with 1 int arg val = d.GetVar(var, x); break; case 2: // with 2 int args val = d.GetVar(var, x, y); break; } // // If we didn't get a value, see if we were given a '?' option // and use that instead. if( val == NULL ) { if( (option_start = (char *)memchr(option_start, '?', end - option_start)) != NULL ) Append(option_start + 1, end - option_start - 1); continue; } // // We were given a value. See if we have to process any options // for it. switch( *option_start ) { case '%': // append and be done. EscapeHTML(*val, isUnicode, bForceWrap); break; case '@': // convert time. // // Start a new time_format buffer. StrBuf time_format; char time_buffer[255 + 1]; time_t t; // // Convert all *'s to %'s. Convert **'s to *'s while // we're at it. for( const char * time_p = option_start + 1; time_p != end; time_p++ ) { // // Is this a *? if( *time_p == '*' ) { // Does it have a trailing *? if( (time_p + 1 != end) && (*(time_p + 1) == '*') ) { time_format.Append("*"); time_p++; // skip the trailing * } else time_format.Append("%"); } else { time_format.Append(time_p, 1); } } // // strftime() t = atoi(val->Text()); if( t < 0 ) break; if( strftime(time_buffer, sizeof(time_buffer), time_format.Text(), localtime(&t)) != 0 ) Append(time_buffer); break; } } } // // Append the rest of the string to our buffer and return the buffer. Append(p); return *this; } // helper function to assure the pointer is looking at a utf-8 follow-on char bool utf8Next(const char* p) { // should be 10xxxxxx return (((unsigned char)(*p)) & 0xC0) == 0x80; } // return the UTF-8 value and adjust p for the number of extra bytes we consumed int decodeUtf8(const char* &p) { unsigned char uP = *p; if (uP < 192) return uP; int ret = 0; // in utf-8 number of leading 1's indicates byte count // 110x xxxx >= 192 // 1110 xxxx >= 224 // 1111 0xxx >= 240 // the rest of the bytes are all 10xx xxxx if (uP >= 192 && uP < 224) { if (!utf8Next(p+1)) return uP; // 2 bytes - 110yyyxx 10xxxxxx ret = (((*p ) & 0x1F) << 6) + (*(p+1) & 0x3F); p += 1; } else if (uP < 240) { if (!utf8Next(p+1) || !utf8Next(p+2)) return uP; // 3 bytes - 1110yyyy 10yyyyxx 10xxxxxx ret = (((*p ) & 0x0F) << 12) + ((*(p+1) & 0x3F) << 6) + (*(p+2) & 0x3F); p += 2; } else { if (!utf8Next(p+1) || !utf8Next(p+2) || !utf8Next(p+3)) return uP; // 4 bytes - 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx ret = (((*p ) & 0x07) << 18) + ((*(p+1) & 0x3F) << 12) + ((*(p+2) & 0x3F) << 6) + (*(p+3) & 0x3F); p += 3; } return ret; } // ------------------------------------- // Escape <>&'s to > < and & // p4wStrBuf& p4wStrBuf::EscapeHTML(const StrPtr &s, int isUnicode, int bForceWrap) { bool validUtf8 = false; if (!isUnicode) { CharSetUTF8Valid validator; validUtf8 = (validator.Valid(s.Text(), s.Length()) == 1); } // // Search for, and escape, <>'s. int i=0; for( const char * p = s.Text(); *p != '\0'; p++, i++ ) { unsigned char first = *p; // // See if we need to escape this character. if( first == '<' ) { Append("<"); } else if( first == '>' ) { Append(">"); } else if( first == '&' ) { Append("&"); } else if( first == '"' ) { Append("""); } else if( first == '\'' ) { Append("'"); } else if( first == '/' ) { Append("/"); } else if( bForceWrap && first == '\n' ) { Append("<br>"); } else if ( first > 0x7F && !isUnicode ) { // // If it is outside of ascii, just use the numeric // value // Append( "&#" ); // for security reasons, only decode valid Utf8 if (validUtf8) *this << (unsigned int)decodeUtf8(p); else *this << (unsigned int)first; Append( ";" ); } else { Append(p, 1); } } // // We're done. return *this; } // ------------------------------------- // Escape <>&'s to > < and & and also convert "http://" // into real urls using "a href". // p4wStrBuf& p4wStrBuf::EscapeHTMLDoURLs(const StrPtr &s, int unicode) { bool validUtf8 = false; if (!unicode) { CharSetUTF8Valid validator; validUtf8 = (validator.Valid(s.Text(), s.Length()) == 1); } // // Search for, and escape, <>'s. If "http:" is found, // convert it to a real url StrBuf urlText; // // Use a lower-case copy of this string in order to search for // "http:" using case-sensitive match. StrBuf lowered; lowered.Set(s.Text()); StrOps::Lower(lowered); const char *pl = lowered.Text(); for( const char * p = s.Text(); *p != '\0'; p++, pl++ ) { unsigned char first = *p; // // See if we need to escape this character. if( first == '<' ) { Append("<"); } else if( first == '>' ) { Append(">"); } else if( first == '&' ) { Append("&"); // // Handle the case where we've found "http: or http: // or "https: or https: // by wrapping it within <A Href> directives to make it // an url } else if( ( ( first == '"' ) && ( !strncmp( pl, "\"http:", 6 ) || !strncmp( pl, "\"https:", 7 ) ) ) || ( ( *pl == 'h' ) && ( !strncmp( pl, "http:", 5 ) || !strncmp( pl, "https:", 6 ) ) ) ) { int fq = 0; int eq = 0; int l; if( first == '"' ) fq = 1; // // Found "http:" or "https:". Insure we handle case // correctly whether the url part is quoted or not. if( fq ) { if( !strncmp( pl, "\"http:", 6 ) ) l = 6; else l = 7; Append("<a href="); } else { if( !strncmp( pl, "http:", 5 ) ) l = 5; else l = 6; Append("<a href=\""); } Append(p, l); urlText.Set(p, l); p += l; pl += l; // // Look for the end of the url. It can be // terminated by a quotation mark if it was // started with one. Otherwise, it can be // terminated by a space, non-ascii, or // newline character. const char *t = p; int ap = 0; for( ; *t != '\0'; t++ ) { first = *t; if( fq && first == '"' ) { eq = 1; urlText.Append(t, 1); break; } if( !fq && first == ' ' ) { ++ap; break; } if( first > 0x7F && !unicode ) { ++ap; break; } if( first == '\n' ) { ++ap; break; } Append(t, 1); urlText.Append(t, 1); } // // url has ended so end the <a href>, output // the text, and generate the </a>. This should // finish the html needed to generate this url. Append("\">"); Append(urlText.Text()); Append("</a>"); if( *t == '\0' ) break; // // Don't forget to output the character that terminated // the url, after the url has been generated. if( ap ) Append(t, 1); pl += ( t - p ); p = t; } else if( first == '"' ) { Append("""); } else if ( first > 0x7F && !unicode ) { // // If it is outside of ascii, just use the numeric // value unless this is unicode mode // Append("&#"); const char* origP = p; // for security reasons, only decode valid Utf8 if (validUtf8) *this << (unsigned int)decodeUtf8(p); else *this << (unsigned int)first; pl += p - origP; Append(";"); } else { Append(p, 1); } } // // We're done. return *this; } // ------------------------------------- // Escape and unescape URL's. // p4wStrBuf& p4wStrBuf::EscapeURL(const StrPtr &s, int isUnicode) { // // Escape a subset of the reserved and unsafe // characters in urls. We do not escape the // following characters because we want them // to retain their special characteristics: // :?@=&#\. const char * q; char val[4]; // Reserved and unsafe characters. static const char * reservedChars = ";"; static const char * reservedCharsEncoding[] = { "%3B" }; static const char * unsafeChars = "<>\"%{}|^~[]`"; static const char * unsafeCharsEncoding[] = { "%3C", "%3E", "%22", "%25", "%7B", "%7D", "%7C", "%5E", "%7E", "%5B", "%5D", "%60" }; // // Search for, and escape, reserved and unsafe characters. // Note that a leading space should also be escaped, even // though it is not strictly considered reserved nor unsafe, // otherwise it won't work correctly when combined with base. int first = 1; for( const char * p = s.Text(); *p != '\0'; p++ ) { // // See if we need to escape this character. if( first && *p == ' ' ) { Append("%20"); } else if( (q = strchr(reservedChars, *p)) != NULL ) { Append(reservedCharsEncoding[q - reservedChars]); } else if( (q = strchr(unsafeChars, *p)) != NULL ) { Append(unsafeCharsEncoding[q - unsafeChars]); } else if( (unsigned char)*p > 127 && !isUnicode ) { sprintf(val, "%%%x", (unsigned char)*p); Append(val); } else { Append(p, 1); } first = 0; } // // We're done. return *this; } p4wStrBuf& p4wStrBuf::EscapeSpaces(const StrPtr &s) { // // Search for, and escape, spaces. for( const char * p = s.Text(); *p != '\0'; p++ ) { // // See if we need to escape this character. if( *p == ' ' ) { Append("%20"); } else { Append(p, 1); } } // // We're done. return *this; } p4wStrBuf& p4wStrBuf::EscapeURLAllChars(const StrPtr &s, int isUnicode) { // // Escape all reserved and unsafe // characters in urls. This differs from EscapeURL // in that we don't exclude escaping characters // which are special for p4web. const char * q; char val[4]; // Reserved and unsafe characters. static const char * reservedChars = ";/?:@=&"; static const char * reservedCharsEncoding[] = { "%3B", "%2F", "%3F", "%3A", "%40", "%3D", "%26" }; static const char * unsafeChars = "<>\"\'#%{}|\\^~[]`"; static const char * unsafeCharsEncoding[] = { "%3C", "%3E", "%22", "%27", "%23", "%25", "%7B", "%7D", "%7C", "%5C", "%5E", "%7E", "%5B", "%5D", "%60" }; // // Search for, and escape, reserved and unsafe characters. // Note that a leading space should also be escaped, even // though it is not strictly considered reserved nor unsafe, // otherwise it won't work correctly when combined with base. int first = 1; for( const char * p = s.Text(); *p != '\0'; p++ ) { // // See if we need to escape this character. if( first && *p == ' ' ) { Append("%20"); } else if( (q = strchr(reservedChars, *p)) != NULL ) { Append(reservedCharsEncoding[q - reservedChars]); } else if( (q = strchr(unsafeChars, *p)) != NULL ) { Append(unsafeCharsEncoding[q - unsafeChars]); } else if( (unsigned char)*p > 127 && !isUnicode) { sprintf(val, "%%%x", (unsigned char)*p); Append(val); } else { Append(p, 1); } first = 0; } // // We're done. return *this; } p4wStrBuf& p4wStrBuf::UnescapeURL(const StrPtr &s, int plus2sp) { // // Search for, and unescape, reserved and unsafe characters. for( const char * p = s.Text(); *p != '\0'; p++ ) { // // See if we need to unescape this character. if( (*p == '%') && (*(p + 1) != '\0') && (*(p + 2) != '\0') ) { // Convert the character. char tempHex[3] = { '\0', '\0', '\0' }; memmove(tempHex, p + 1, 2); tempHex[0] = (char)strtol(tempHex, (char **)NULL, 16); Append(tempHex, 1); // Skip over it. p += 2; // the other + 1 will happen in the loop } else if(plus2sp && *p == '+') { Append(" ", 1); } else { Append(p, 1); } } // // We're done. return *this; } // // Escape ampersands only p4wStrBuf& p4wStrBuf::EscapeAmp(const StrPtr &s) { // // Search for, and escape, ampersands if( !strchr( s.Text(), '&' ) ) { Append( s.Text() ); return *this; } for( const char * p = s.Text(); *p != '\0'; p++ ) { // // See if we need to escape this character. if( *p == '&' ) { Append("%26"); } else { Append(p, 1); } } // // We're done. return *this; } p4wStrBuf& p4wStrBuf::UnescapeAmp(const StrPtr &s) { // // Search for, and unescape escaped ampersands for( const char * p = s.Text(); *p != '\0'; p++ ) { // // See if we need to unescape this character, // only if it is an escaped ampersand if( ( *p == '%' ) && ( *( p + 1 ) != '\0' ) && ( *( p + 2 ) != '\0' ) && ( *( p + 1 ) == '2' ) && ( *( p + 2 ) == '6' ) ) { // Convert the character char tempHex[3] = { '\0', '\0', '\0' }; memmove( tempHex, p + 1, 2 ); tempHex[0] = (char)strtol( tempHex, (char **)NULL, 16 ); Append( tempHex, 1 ); // Skip over it. p += 2; // the other + 1 will happen in the loop } else { Append( p, 1 ); } } // // We're done. return *this; } // // Escape double quotes only p4wStrBuf& p4wStrBuf::EscapeDQuotes(const StrPtr &s) { // // Search for, and escape, double quotes if( !strchr( s.Text(), '"' ) ) { Append( s.Text() ); return *this; } for( const char * p = s.Text(); *p != '\0'; p++ ) { // // See if we need to escape this character. if( *p == '"' ) { Append("%22"); } else { Append(p, 1); } } // // We're done. return *this; } p4wStrBuf& p4wStrBuf::NormalizeBase(const StrPtr &s, int isUnicode) { // // Normalize base by escaping characters special to urls. if( !s.Length() ) return *this; Append( ( p4wStrBuf().EscapeURL( StrRef(s.Text()), isUnicode ) ).Text() ); return *this; } p4wStrBuf& p4wStrBuf::StripBlanks(const StrPtr &s) { // // Strip leading and trailing blanks from string const char *p; const char *e; if( !s.Length() ) return *this; for( p = s.Text(); *p; p++ ) { if( *p != ' ' ) break; } if( !*p ) return *this; for( e = p + strlen( p ) - 1; e > p; e-- ) { if( *e != ' ' ) break; } Append( p, e - p + 1 ); return *this; } p4wStrBuf& p4wStrBuf::EscapeP4Chars( const StrPtr &s ) { // // Escape characters which have special meaning // to p4, but are now allowed in filenames. // These characters are @,#,% and *. const char * q; char val[4]; static const char * unsafeChars = "@#%*"; static const char * unsafeCharsEncoding[] = { "%40", "%23", "%25", "%2A" }; // // Search for, and escape, p4's unsafe characters. for( const char * p = s.Text(); *p != '\0'; p++ ) { // // See if we need to escape this character. if( ( q = strchr( unsafeChars, *p ) ) != NULL ) { Append( unsafeCharsEncoding[q - unsafeChars] ); } else { Append( p, 1 ); } } // // We're done. return *this; } p4wStrBuf& p4wStrBuf::UnescapeP4Chars(const StrPtr &s) { // // Search for, and unescape characters that have special // meaning to p4, specifically @,%, *, % for( const char * p = s.Text(); *p != '\0'; p++ ) { // // See if we need to unescape this character sequence. if( ( *p == '%' ) && ( *( p + 1 ) != '\0' ) && ( *( p + 2 ) != '\0' ) ) { if( ( *( p + 1 ) == '4' && *( p + 2 ) == '0' ) || ( *( p + 1 ) == '2' && *( p + 2 ) == '3' ) || ( *( p + 1 ) == '2' && *( p + 2 ) == '5' ) || ( *( p + 1 ) == '2' && *( p + 2 ) == 'A' ) ) { // Convert the character. char tempHex[3] = { '\0', '\0', '\0' }; memmove(tempHex, p + 1, 2); tempHex[0] = (char)strtol(tempHex, (char **)NULL, 16); Append(tempHex, 1); // Skip over it. p += 2; // the other + 1 will happen in the loop } else { Append( p, 1 ); } } else { Append( p, 1 ); } } // // We're done. return *this; }
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#1 | 12234 | Matt Attaway |
Rejigger P4Web project in preparation for official sunsetting The bin directory contains the last official builds of P4Web from the Perforce download site. P4Web is soon to be completely sunsetted; these builds are here for folks who don't want to build their own. To better handle the archived builds the source code has been moved into a separate src directory. |
||
//guest/perforce_software/p4web/Main/p4wStrBuf.cpp | |||||
#1 | 8914 | Matt Attaway | Initial add of the P4Web source code |