-
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- """A small filter that converts between various byte-encodings of Unicode text.
-
- Default behavior translates Unicode-escaped text ("\u2603") to UTF-8 ("☃") to
- better read Python repr() output.
- """
- from __future__ import unicode_literals
-
- import io
- import optparse
- import sys
-
- def getopt():
- parser = optparse.OptionParser(__doc__)
- parser.add_option(
- '-i',
- '--input',
- dest='input_encoding',
- default='utf-8',
- help='Input encoding. [default: %default]',
- )
- parser.add_option(
- '-o',
- '--output',
- dest='output_encoding',
- default='utf-8',
- help='Output encoding. [default: %default]',
- )
- options, args = parser.parse_args()
-
- if not args:
- args = ['/dev/stdin']
-
- return args, options
-
- def show_unicode(infiles, input_encoding='utf-8', output_encoding='utf-8'):
- output = io.open(sys.stdout.fileno(), 'w', encoding=output_encoding)
- for arg in infiles:
- input = io.open(arg, 'r', encoding=input_encoding)
- for line in input:
- line = line.encode('ascii', 'backslashreplace')
- line = line.decode('unicode_escape')
- output.write(line)
- output.flush()
-
- def main():
- args, options = getopt()
- return show_unicode(args, **vars(options))
-
- if __name__ == '__main__':
- exit(main())
-