spacepaste

  1.  
  2. #!/usr/bin/python
  3. # -*- coding: utf-8 -*-
  4. """A small filter that converts between various byte-encodings of Unicode text.
  5. Default behavior translates Unicode-escaped text ("\u2603") to UTF-8 ("☃") to
  6. better read Python repr() output.
  7. """
  8. from __future__ import unicode_literals
  9. import io
  10. import optparse
  11. import sys
  12. def getopt():
  13. parser = optparse.OptionParser(__doc__)
  14. parser.add_option(
  15. '-i',
  16. '--input',
  17. dest='input_encoding',
  18. default='utf-8',
  19. help='Input encoding. [default: %default]',
  20. )
  21. parser.add_option(
  22. '-o',
  23. '--output',
  24. dest='output_encoding',
  25. default='utf-8',
  26. help='Output encoding. [default: %default]',
  27. )
  28. options, args = parser.parse_args()
  29. if not args:
  30. args = ['/dev/stdin']
  31. return args, options
  32. def show_unicode(infiles, input_encoding='utf-8', output_encoding='utf-8'):
  33. output = io.open(sys.stdout.fileno(), 'w', encoding=output_encoding)
  34. for arg in infiles:
  35. input = io.open(arg, 'r', encoding=input_encoding)
  36. for line in input:
  37. line = line.encode('ascii', 'backslashreplace')
  38. line = line.decode('unicode_escape')
  39. output.write(line)
  40. output.flush()
  41. def main():
  42. args, options = getopt()
  43. return show_unicode(args, **vars(options))
  44. if __name__ == '__main__':
  45. exit(main())
  46.