#! /usr/bin/env python3.3
"""Functions for dealing with character encoding."""
from p4gf_l10n import NTR
def _encoding_list():
"""Return a list of character encodings, in preferred order,
to use when attempting to read bytes of unknown encoding.
"""
### Zig warns that 'latin_1' can decode ANY byte array.
### 'latin-1' is a straight pass-through of
### \xXX 8-bit bytes in ==> U+00XX Unicode chars out
###
### Therefore we'll never attempt 'shift_jis', nor
### will decode() ever give up and re-raise the original
### utf8 encoding exception.
return NTR(['utf8', 'latin_1', 'shift_jis'])
def decode(bites):
"""Attempt to decode using one of several code pages."""
for encoding in _encoding_list():
try:
s = bites.decode(encoding)
return s
except UnicodeDecodeError:
pass
# Give up, re-create and raise the first error.
bites.decode(_encoding_list[0])