/** * Copyright (c) 2008 Perforce Software. All rights reserved. */ package com.perforce.p4java.server; import java.nio.charset.Charset; import java.util.HashMap; import java.util.Map; /** * Encapsulates Perforce-wide charset information for servers. * <p> * * Unfortunately, support for Unicode and associated charsets is somewhat * server- and installation-dependent, and not easy to divine unless you're * already talking to a Perforce server -- by which time it's too late. This * class is an attempt to make it easier to cut that Gordian knot... * <p> * * Note that the names below are not actually the standard name for the charset * in some cases: e.g. UTF-8 should be "utf-8" not "utf8", but we follow the * Perforce server's rules here. * * */ public class PerforceCharsets { /** * <pre> * auto (Guess a P4CHARSET based on client OS params) * none (same as unsetting P4CHARSET) * utf8 (UTF-8) * iso8859-1 (ISO-8859-1, Latin Alphabet No. 1) * utf16-nobom (UTF-16 client's byte ordering without Byte-Order-Mark) * shiftjis (Windows Japanese - MS932) * eucjp (JISX 0201, 0208 and 0212, EUC encoding Japanese) * winansi (Windows code page 1252) * winoem (Windows code page 437) * macosroman (Macintosh Roman) * iso8859-15 (ISO-8859-15, Latin Alphabet No. 9) * iso8859-5 (ISO-8859-5, Latin/Cyrillic Alphabet) * koi8-r (KOI8-R, Russian) * cp1251 (Windows code page 1251 - Cyrillic) * utf16le (UTF-16 with little endian byte ordering) * utf16be (UTF-16 with big endian byte ordering) * utf16le-bom (UTF-16 with little endian Byte-Order-Mark) * utf16be-bom (UTF-16 with big endian Byte-Order-Mark) * utf16 (UTF-16 with client's byte ordering and Byte-OrderMark) * utf8-bom (UTF-8 with Byte-Order-Mark) * utf32-nobom (UTF-32 client's byte ordering without Byte-Order-Mark) * utf32le (UTF-32 with little endian byte ordering) * utf32be (UTF-32 with big endian byte ordering) * utf32le-bom (UTF-32 with little endian Byte-Order-Mark) * utf32be-bom (UTF-32 with big endian Byte-Order-Mark) * utf32 (UTF-32 with client's byte ordering and Byte-OrderMark) * utf8unchecked (UTF-8 unchecked) * utf8unchecked-bom (UTF-8 unchecked with Byte-Order-Mark) * cp949 (Windows code page 949 - Korean) * cp936 (Windows code page 936 - Simplified Chinese) * cp950 (Windows code page 950 - Traditional Chinese) * cp850 (Windows code page 850 - MS-DOS Latin-1) * cp858 (Windows code page 858 - Variant of Cp850 with Euro character) * cp1253 (Windows code page 1253 - Windows Greek) * cp737 (Windows code page 737 - PC Greek) * iso8859-7 (ISO-8859-7, Latin/Greek Alphabet) * </pre> */ private static final String[] knownCharsets = { "auto", "none", "utf8", "iso8859-1", "utf16-nobom", "shiftjis", "eucjp", "winansi", "winoem", "macosroman", "iso8859-15", "iso8859-5", "koi8-r", "cp1251", "utf16le", "utf16be", "utf16le-bom", "utf16be-bom", "utf16", "utf8-bom", "utf32-nobom", "utf32le", "utf32be", "utf32le-bom", "utf32be-bom", "utf32", "utf8unchecked", "utf8unchecked-bom", "cp949", "cp936", "cp950", "cp850", "cp858", "cp1253", "cp737", "iso8859-7" }; private static final Map<String, String> p4ToJavaCharsets; static { // Conversions from p4 charsets to Java charsets take from from // http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html p4ToJavaCharsets = new HashMap<String, String>(); int count = 0; // auto p4ToJavaCharsets.put(knownCharsets[count++], "auto"); // none p4ToJavaCharsets.put(knownCharsets[count++], "none"); // utf8 p4ToJavaCharsets.put(knownCharsets[count++], "UTF-8"); // iso8859-1 p4ToJavaCharsets.put(knownCharsets[count++], "ISO-8859-1"); // utf16-nobom p4ToJavaCharsets.put(knownCharsets[count++], "UTF-16"); // shiftjis (JDK Shift_JIS charset is NOT the same as Perforce shiftjis) // Perforce shiftjis (MS932) is a superset of Shift_JIS (SJIS). //p4ToJavaCharsets.put(knownCharsets[count++], "Shift_JIS"); // shiftjis (Perforce implementation of Microsoft code page 932) // P4-ShiftJIS is a charset wrapped around the JDK MS932 charset, // with some Perforce specific updates. // Note: Perforce shiftjis is suppose to be a full MS932 implementation. p4ToJavaCharsets.put(knownCharsets[count++], "P4-ShiftJIS"); // eucjp p4ToJavaCharsets.put(knownCharsets[count++], "EUC-JP"); // winansi p4ToJavaCharsets.put(knownCharsets[count++], "windows-1252"); // winoem p4ToJavaCharsets.put(knownCharsets[count++], "IBM437"); // macosroman p4ToJavaCharsets.put(knownCharsets[count++], "x-MacRoman"); // iso8859-15 p4ToJavaCharsets.put(knownCharsets[count++], "ISO-8859-15"); // iso8859-5 p4ToJavaCharsets.put(knownCharsets[count++], "ISO-8859-5"); // koi8-r p4ToJavaCharsets.put(knownCharsets[count++], "KOI8-R"); // cp1251 p4ToJavaCharsets.put(knownCharsets[count++], "windows-1251"); // utf16le p4ToJavaCharsets.put(knownCharsets[count++], "UTF-16LE"); // utf16be p4ToJavaCharsets.put(knownCharsets[count++], "UTF-16BE"); // utf16le-bom p4ToJavaCharsets.put(knownCharsets[count++], "UTF-16LE"); // utf16be-bom p4ToJavaCharsets.put(knownCharsets[count++], "UTF-16BE"); // utf16 p4ToJavaCharsets.put(knownCharsets[count++], "UTF-16"); // utf8-bom p4ToJavaCharsets.put(knownCharsets[count++], "UTF-8"); // utf32-nobom p4ToJavaCharsets.put(knownCharsets[count++], "UTF-32"); // utf32le p4ToJavaCharsets.put(knownCharsets[count++], "UTF-32LE"); // utf32be p4ToJavaCharsets.put(knownCharsets[count++], "UTF-32BE"); // utf32le-bom p4ToJavaCharsets.put(knownCharsets[count++], "UTF-32LE"); // utf32be-bom p4ToJavaCharsets.put(knownCharsets[count++], "UTF-32BE"); // utf32 p4ToJavaCharsets.put(knownCharsets[count++], "UTF-32"); // utf8unchecked p4ToJavaCharsets.put(knownCharsets[count++], "UTF-8"); // utf8unchecked-bom p4ToJavaCharsets.put(knownCharsets[count++], "UTF-8"); // cp949 p4ToJavaCharsets.put(knownCharsets[count++], "x-windows-949"); // cp936 p4ToJavaCharsets.put(knownCharsets[count++], "x-mswin-936"); // cp950 p4ToJavaCharsets.put(knownCharsets[count++], "x-windows-950"); // cp850 p4ToJavaCharsets.put(knownCharsets[count++], "IBM850"); // cp858 p4ToJavaCharsets.put(knownCharsets[count++], "IBM00858"); // cp1253 p4ToJavaCharsets.put(knownCharsets[count++], "windows-1253"); // cp737 p4ToJavaCharsets.put(knownCharsets[count++], "x-IBM737"); // iso8859-7 p4ToJavaCharsets.put(knownCharsets[count++], "ISO-8859-7"); } /** * Get known P4 charsets * * @return - array of p4 charset names */ public static String[] getKnownCharsets() { return knownCharsets; } /** * Get the first matching Perforce equivalent charset name for a given Java * charset name. Multiple Perforce charsets can be mapped to a Java charset * (i.e. Perforce "utf8-bom" and "utf8unchecked" are mapped to Java "UTF-8") * * @param javaCharsetName * @return - Perforce charset name */ public static String getP4CharsetName(String javaCharsetName) { String p4CharsetName = null; if (javaCharsetName != null) { for(Map.Entry<String, String> entry : p4ToJavaCharsets.entrySet()) { if (entry.getValue() != null) { if (entry.getValue().equalsIgnoreCase(javaCharsetName)) { p4CharsetName = entry.getKey(); break; } } } } return p4CharsetName; } /** * Get Java equivalent charset name for a p4 charset name * * @param p4CharsetName * @return - Java charset name */ public static String getJavaCharsetName(String p4CharsetName) { String javaCharsetName = null; if (p4CharsetName != null) { javaCharsetName = p4ToJavaCharsets.get(p4CharsetName); } return javaCharsetName; } /** * Get Java equivalent charset name for a p4 charset index (knownCharsets) * * @param p4CharsetIndex * @return - Java charset name */ public static String getJavaCharsetName(int p4CharsetIndex) { String javaCharsetName = null; if (p4CharsetIndex >=0 && p4CharsetIndex < knownCharsets.length) { javaCharsetName = p4ToJavaCharsets.get(knownCharsets[p4CharsetIndex]); } return javaCharsetName; } /** * Get Java equivalent charset for a p4 charset index (knownCharsets) * * @param p4CharsetIndex * @return - Java charset */ public static Charset getJavaCharset(int p4CharsetIndex) { Charset javaCharset = null; if (p4CharsetIndex >=0 && p4CharsetIndex < knownCharsets.length) { javaCharset = Charset.forName(p4ToJavaCharsets.get(knownCharsets[p4CharsetIndex])); } return javaCharset; } /** * Is the P4 charset name specified supported? * * @param p4CharsetName * @return - true if supported, false otherwise */ public static boolean isSupported(String p4CharsetName) { boolean supported = false; if (p4CharsetName != null) { supported = p4ToJavaCharsets.containsKey(p4CharsetName); } return supported; } }
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#1 | 19903 | stuartrowe |
Branching //guest/perforce_software/p4java/... to //guest/stuartrowe/p4java/... |
||
//guest/perforce_software/p4java/r14.1/src/main/java/com/perforce/p4java/server/PerforceCharsets.java | |||||
#1 | 12541 | Matt Attaway | Initial add of the 14.1 p4java source code |