spacepaste

  1.  
  2. #!/usr/bin/env bash
  3. # -*- mode: scheme -*-
  4. exec guile $0
  5. # !#
  6. ;;; wordcount.scm --- meeting the challenge with Guile, fast version
  7. ;; Copyright (C) 2017
  8. ;; Arne Babenhauserheide <arne_bab@web.de> and Linus Björnstam
  9. ;; Author: Arne Babenhauserheide <arne_bab@web.de> and Linus Björnstam
  10. ;; This library is free software; you can redistribute it and/or
  11. ;; modify it under the terms of the GNU Lesser General Public
  12. ;; License as published by the Free Software Foundation; either
  13. ;; version 3 of the License, or (at your option) any later version.
  14. ;; This library is distributed in the hope that it will be useful,
  15. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. ;; Lesser General Public License for more details.
  18. ;; You should have received a copy of the GNU Lesser General Public
  19. ;; License along with this library. If not, see
  20. ;; <http://www.gnu.org/licenses/>.
  21. ;;; Commentary:
  22. ;; The wordcount challenge pits programming languages against each
  23. ;; other in a battle of counting words in unicode text.
  24. ;;
  25. ;; See https://github.com/juditacs/wordcount
  26. ;;
  27. ;; wordcount_reference.scm holds an alternative version written for
  28. ;; clarity but not speed.
  29. ;;; Code:
  30. (import (ice-9 rdelim)
  31. (ice-9 hash-table))
  32. (define (count-or-unicode>? a b)
  33. (let ((Na (car a))(Nb (car b)))
  34. (or (> Na Nb)
  35. (and (= Na Nb)
  36. (string<? (cdr a) (cdr b))))))
  37. (define (skip-whitespace port)
  38. (let ([ch (peek-char port)])
  39. (or (eof-object? ch) (not (char-whitespace? ch))
  40. (begin (read-char port)
  41. (skip-whitespace port)))))
  42. (define (count-words port)
  43. (define buf '())
  44. (define (read-word port)
  45. (skip-whitespace port)
  46. (cond
  47. ((null? buf)
  48. (let ((line (read-delimited "\t" port)))
  49. (cond
  50. ((eof-object? line) line)
  51. (else (set! buf (apply append (map (λ (x) (string-split x #\space))
  52. (string-split line #\newline))))
  53. (read-word port)))))
  54. (else
  55. (let ((word (car buf)))
  56. (set! buf (cdr buf))
  57. word))))
  58. (define count (make-hash-table 100000))
  59. (define (add-word next)
  60. (hash-set! count next
  61. (1+ (hash-ref count next 0))))
  62. (let loop ((next (read-word port)))
  63. (unless (eof-object? next)
  64. (unless (string-null? next)
  65. (add-word next))
  66. (loop (read-word port))))
  67. (sort! ;; destructive sort to save memory
  68. (hash-map->list (lambda (key val) (cons val key)) count)
  69. count-or-unicode>?))
  70. (for-each
  71. (lambda (x) (format #t "~a\t~a\n" (cdr x) (car x)))
  72. (count-words (current-input-port)))
  73.