#!/usr/bin/python # -*- coding: utf-8 -*- """A small filter that converts between various byte-encodings of Unicode text. Default behavior translates Unicode-escaped text ("\u2603") to UTF-8 ("☃") to better read Python repr() output. """ from __future__ import unicode_literals import io import optparse import sys def getopt(): parser = optparse.OptionParser(__doc__) parser.add_option( '-i', '--input', dest='input_encoding', default='utf-8', help='Input encoding. [default: %default]', ) parser.add_option( '-o', '--output', dest='output_encoding', default='utf-8', help='Output encoding. [default: %default]', ) options, args = parser.parse_args() if not args: args = ['/dev/stdin'] return args, options def show_unicode(infiles, input_encoding='utf-8', output_encoding='utf-8'): output = io.open(sys.stdout.fileno(), 'w', encoding=output_encoding) for arg in infiles: input = io.open(arg, 'r', encoding=input_encoding) for line in input: line = line.encode('ascii', 'backslashreplace') line = line.decode('unicode_escape') output.write(line) output.flush() def main(): args, options = getopt() return show_unicode(args, **vars(options)) if __name__ == '__main__': exit(main())