/* * Copyright 1995, 2001 Perforce Software. All rights reserved. * * This file is part of Perforce - the FAST SCM System. */ /* * CharMan - Character manipulation support for i18n environments */ # include "charman.h" # include "i18napi.h" # include "charcvt.h" char * CharStep::Next() { return ++ptr; } int CharStep::CountChars( char *e ) { int ret = 0; if (Ptr() < e) do { ++ret; } while (Next() < e); return ret; } char * CharStep::Next( int cnt ) { while( cnt-- > 0 && *Next() ) ; return Ptr(); } char * CharStepUTF8::Next() { int c = 0xff & *ptr; /* * Note that if we see a starting byte like 0b10xxxxxx * We're in the middle of a UTF-8 sequence which should * not happen but may if we're not really dealing with UTF-8. * We could scan forward until the next valid UTF-8 char * is seen, but we'll instead skip byte by byte in * an attempt to handle other character sets with some * chance of success as well as not to overrun the buffer. * Also, the invalid 0xfe and 0xff are advanced only one byte. */ if ( c >= 192 ) { if ( c <= 223 ) ++ptr; else if ( c <= 239 ) ptr += 2; else if ( c <= 247 ) ptr += 3; else if ( c <= 251 ) ptr += 4; else if ( c <= 253 ) ptr += 5; } return ++ptr; } char * CharStepShiftJis::Next() { int c = 0xff & *ptr; if ( c >= 129 && c <= 239 && ( c <= 159 || c >= 224) ) if (*++ptr == 0) return ptr; return ++ptr; } char * CharStepEUCJP::Next() { int c = 0xff & *ptr; if ( c >= 161 && c <= 254 || c == 142 ) { if (*++ptr == 0) return ptr; } else if ( c == 143 ) { if (*++ptr == 0) return ptr; if (*++ptr == 0) return ptr; } return ++ptr; } char * CharStepCP949::Next() { int c = 0xff & *ptr; if ( c >= 0x81 && c <= 0xfd && c != 0xc9 ) if (*++ptr == 0) return ptr; return ++ptr; } char * CharStepCN::Next() { int c = 0xff & *ptr; if ( c >= 0x81 && c <= 0xfe ) if (*++ptr == 0) return ptr; return ++ptr; } CharStep * CharStep::Create( char * p, int charset ) { switch ((CharSetCvt::CharSet) charset) { case CharSetCvt::SHIFTJIS: return new CharStepShiftJis( p ); case CharSetCvt::UTF_8: return new CharStepUTF8( p ); case CharSetCvt::EUCJP: return new CharStepEUCJP( p ); case CharSetCvt::CP949: return new CharStepCP949( p ); default: return new CharStep( p ); } }