#!/home/arne/wisp/wisp-multiline.sh ; !# ;; This file might need to be licensed permissively for inclusion in ;; an SRFI. Only change it, if you agree to this possible relicensing ;; of your contribution to this file. I will not accept changes here ;; which do not allow that. ; we need to be able to replace end-of-line characters in brackets and strings ;; TODO: Check whether I can offload the string processing to the ;; read-function. That’s a source of endless complications. Required: ;; A kind of unrolling step which appends the string-representation of ;; the read strings back into the code. I would have to process a list ;; of strings instead of one big string. Or rather, each line would be ;; a list of strings. ;; bootstrap via python3 wisp.py wisp-guile.w > 1 && guile 1 wisp-guile.w > 2 && guile 2 wisp-guile.w > 3 && diff 2 3 ;; ;; -Author: Arne Babenhauserheide (define (endsinunevenbackslashes text ); comment (if (= 0 (string-length text)) #f (let counter ((last (string-take-right text 1)) (rest (string-append " " (string-drop-right text 1))) (count 0)) (cond ((= 0 (string-length rest )); end clause: read all (odd? count)) ; end clause: no \ ((not (equal? last (string #\\))) (odd? count)) (else (counter (string-take-right rest 1) (string-drop-right rest 1) (+ 1 count))))))) (define (nostringandbracketbreaks inport) "Replace all linebreaks inside strings and brackets with placeholders." (let ((expressions (list (nostringandbracketbreaksreader inport)))) (while (not (eof-object? (peek-char inport))) (set! expressions (append expressions (list (nostringandbracketbreaksreader inport))))) (string-join expressions "\n"))) (define (nostringandbracketbreaksreader inport) "Read one wisp-expression from the inport. Ends with three consecutive linebreaks or eof." ; Replace end of line characters in brackets and strings ; FIXME: Breaks if the string is shorter than 2 chars ; FIXME: Breaks if the text begins with a comment. (let* ((lastchar (read-char inport)) (nextchar (read-char inport)) (text (if (eof-object? lastchar) "" (string lastchar))) (incomment #f) (incommentfirstchar #f ); first char of a comment (instring #f) (inbrackets 0) (incharform 0 )); #\ (while (not (or (eof-object? nextchar) (and (or (char=? nextchar #\newline ) (char=? nextchar #\return ) ) (or (char=? lastchar #\newline ) (char=? lastchar #\return ) ) (string-suffix? "\n\n" text )))); text includes lastchar ; incommentfirstchar is only valid for exactly one char (when incommentfirstchar (set! incommentfirstchar #f )) ; but add incommentfirstchar if we just started the text (when (equal? text ";" ); initial comment (set! incommentfirstchar #f) (set! incomment #t) (set! text (string-append text "\\REALCOMMENTHERE"))) ; already started char forms win over everything, so process them first. ; already started means: after the #\ ; FIXME: Fails to capture #t and #f which can kill line splitting if it happens inside brackets (when (= incharform 1) (when (not (and (char=? lastchar #\# ) (or (char=? #\f nextchar) (char=? #\t nextchar)))) ; format #t "1: set incharform 0: lastchar ~a nextchar ~a instring ~a incomment ~a incharform ~a" lastchar nextchar instring incomment incharform ; newline (set! incharform 0))) (when (>= incharform 2) (if (or (char=? nextchar #\space) (char=? nextchar #\newline ) (char=? nextchar #\return ) ) (begin ; format #t "2: set incharform 0: lastchar ~a nextchar ~a instring ~a incomment ~a incharform ~a" lastchar nextchar instring incomment incharform ; newline (set! incharform 0)) ; else (set! incharform (+ incharform 1)))) ; check if we switch to a string: last char is space, linebreak or in a string, not in a charform, not in a comment (when (and (char=? nextchar #\") (not incomment) (< incharform 1) (or (and instring ; when I’m in a string, I can get out (or (not (char=? lastchar #\\ )); if the last char is not a backslash (escaped quote) ; or the last char is a backslash preceded by an uneven number of backslashes (so the backslash is actually an escaped backslash) (and (char=? lastchar #\\) ; not : equal? #f : string-match "\\([^\\]\\)+\\(\\\\\\\\\\)*[\\]$" text ; matches [^\](\\)*\$ - non-backslash + arbitrary number of pairs of backslashes + final backslash which undoes the escaping from the lastchar (by actually escaping the lastchar) (endsinunevenbackslashes text)))) (char=? lastchar #\space ); when the last char was a space, I can get into a string (char=? lastchar #\newline ); same for newline chars (char=? lastchar #\return ) (and (not instring ); outside of strings, brackets are pseudo-whitespace, too (or (char=? lastchar #\( ) (char=? lastchar #\)) (char=? lastchar #\[ ) (char=? lastchar #\]) ; TODO: Only match for braces {} if curly infix is enabled (char=? lastchar #\{ ) (char=? lastchar #\}))))) (set! instring (not instring))) ; check if we switch to a comment (when (and ; FIXME: this should be ; char=? nextchar #\; (equal? ";" (string nextchar)) (not incomment) (not instring) (< incharform 2)) (set! incomment #t) (set! incommentfirstchar #t) ; this also closes any potential charform (set! incharform 0)) (when (and incomment (or (char=? nextchar #\return) (char=? nextchar #\newline))) (set! incomment #f)) ; check for the beginning of a charform (when (and (not instring) (not incomment) (char=? lastchar #\space) (char=? nextchar #\#)) (set! incharform 1)) ; check whether a charform is continued (when (and (= incharform 1) (char=? lastchar #\#) (char=? nextchar #\\)) (set! incharform 2)) ; check for brackets ; FIXME: This only fixes a single linebreak inside parens, but if a second occurs on the same line it breaks. I do not know why. Maybe something with having lastchar as linebreak. (when (not (or instring incomment)) (when (and (not (string-suffix? text "#")) (not (char=? #\\ lastchar)) (not (endsinunevenbackslashes (string-drop-right text (min 1 (string-length text)))))) ; TODO: Only match for braces {} if curly infix is enabled ; FIXME: Catch wrong ordering of parens/brackets/braces like ({)} (when (or (equal? "[" (string nextchar)) (equal? "(" (string nextchar)) (equal? "{" (string nextchar))) (set! inbrackets (+ inbrackets 1))) (when (or (equal? "}" (string nextchar)) (equal? ")" (string nextchar)) (equal? "]" (string nextchar))) (set! inbrackets (- inbrackets 1))))) (if (or instring (> inbrackets 0)) (if (char=? nextchar #\newline) ; we have to actually construct the escape ; sequence here to be able to parse ourselves. (set! text (string-append text (string-append "\\LINE_" "BREAK_N"))) (if (char=? nextchar #\return) (set! text (string-append text (string-append "\\LINE_" "BREAK_R"))) ; else (set! text (string-append text (string nextchar))))) ; mark the start of a comment, so we do not have to ; repeat the string matching in later code. We include ; the comment character! ; not (instring or inbrackets) = neither instring nor inbrackets (if incommentfirstchar (set! text (string-append text ( string nextchar ) "\\REALCOMMENTHERE")) ; when not in brackets or string or starting a ; comment: just append the char (set! text (string-append text (string nextchar))))) (set! lastchar nextchar) (set! nextchar (read-char inport))) ; return the text text)) (define (splitlines inport ) (let ((lines '()) (nextchar (read-char inport)) (nextline "")) (while (not (eof-object? nextchar)) (if (not (or (char=? nextchar #\return ) (char=? nextchar #\newline ))) (set! nextline (string-append nextline (string nextchar))) (begin (set! lines (append lines (list nextline))) (set! nextline ""))) (set! nextchar (read-char inport))) (append lines (list nextline)))) ; skip the leading indentation (define (skipindent inport) (let skipper ((inunderbars #t) (indent 0) (nextchar (read-char inport))) ; when the file ends, do not do anything else (if (not (eof-object? nextchar )) ; skip underbars (if inunderbars (if (char=? nextchar #\_ ); still in underbars? (skipper #t ; still in underbars? (+ indent 1) (read-char inport)) ; else, reevaluate without inunderbars (skipper #f indent nextchar)) ; else: skip remaining spaces (if (char=? nextchar #\space) (skipper #f (+ indent 1) (read-char inport)) (begin (unread-char nextchar inport) indent))) indent))) ; Now we have to split a single line into indentation, content and comment. (define (splitindent inport) (let ((indent (skipindent inport))) (let ((nextchar (read-char inport)) (inindent #t ); it always begins in indent (incomment #f ); but not in a comment (commentstart #f) (commentstartidentifier "\\REALCOMMENTHERE") (commentstartidentifierlength 16) (commentidentifierindex 0) (content "") (comment "")) ; (newline) (while (not (eof-object? nextchar)) ; check whether we leave the content ; FIXME: (wisp.py) the reader cuts the ; here, when I write it as this: ; when : and ( not incomment ) : char=? nextchar #\; ; FIXME: THIS mistreats #\; as comment! (shown 4 lines after this comment…) (when (and (not incomment) ; FIXME: this should be but would break ; char=? nextchar #\; (equal? ";" (string nextchar)) (not (string-suffix? ( string #\# #\\ ) content))) (set! commentstart #t) (set! comment (string-append comment (string nextchar))) (set! nextchar (read-char inport)) (continue)) ; check whether we stay in the commentcheck (when (and commentstart (char=? nextchar (string-ref commentstartidentifier commentidentifierindex))) (set! commentidentifierindex (+ commentidentifierindex 1)) (set! comment (string-append comment (string nextchar))) (when (= commentidentifierindex commentstartidentifierlength) (set! commentstart #f) (set! incomment #t) ; reset used variables (set! commentidentifierindex 0) (set! comment "")) (set! nextchar (read-char inport)) (continue)) ; if we cannot complete the commentcheck, we did not start a real comment. Append it to the content (when (and commentstart (not (char=? nextchar (string-ref commentstartidentifier commentidentifierindex)))) (set! commentstart #f) (set! content (string-append content comment (string nextchar))) (set! comment "") (set! commentidentifierindex 0) (set! nextchar (read-char inport)) (continue)) ; if we are in the comment, just append to the comment (when incomment (write (eof-object? nextchar)) (set! comment (string-append comment (string nextchar))) (set! nextchar (read-char inport)) (continue)) ; if nothing else is true, we are in the content (set! content (string-append content (string nextchar))) (set! nextchar (read-char inport))) (when commentstart (set! content (string-append content comment)) (set! comment "")) ; return the indentation, the content and the comment (list indent content comment)))) ; Now use the function to split a list of lines (define (linestoindented lines) (let splitter ((unprocessed lines) (processed '())) (if (equal? unprocessed '()) processed ; else: let-recursion (splitter (list-tail unprocessed 1) (append processed (list (call-with-input-string (list-ref unprocessed 0) splitindent))))))) (define (read-whole-file filename) (let ((origfile (open-file filename "r"))) (let reader ((text "") (nextchar (read-char origfile))) (if (eof-object? nextchar) text (reader (string-append text (string nextchar)) (read-char origfile)))))) (define (wisp2lisp text ) (let* ((nobreaks (call-with-input-string text nostringandbracketbreaks )) (textlines (call-with-input-string nobreaks splitlines)) (lines (linestoindented textlines))) #t)) ; first step: Be able to mirror a file to stdout (if (< 1 (length (command-line))) (let* ((filename (list-ref ( command-line ) 1)) (text (read-whole-file filename)) (lisp (wisp2lisp text))) #t))