Megatest

Check-in [4ea543015e]
Login
Overview
Comment:Pulled csv-xml in to facilitate migrating to ck5
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.6584-ck5
Files: files | file ages | folders
SHA1: 4ea543015e112f33d7ef93c304bed12180914bc4
User & Date: matt on 2021-04-03 16:20:50
Other Links: branch diff | manifest | tags
Context
2021-04-03
16:50
Adding call-with-environment-variables for the transition check-in: 8856c49357 user: matt tags: v1.6584-ck5
16:20
Pulled csv-xml in to facilitate migrating to ck5 check-in: 4ea543015e user: matt tags: v1.6584-ck5
06:28
Minor cleanup check-in: 867b8b4e9e user: matt tags: v1.6584-ck5
Changes

Added csv-xml/csv-out.impl version [a1397a6ff6].











































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
;;;; cvs-out.impl  -*- Hen -*-
;;;; Kon Lovett, Jun '17

;;;; *** included source file ***

;;Issues
;;
;;- missing explicit types for exports; too much '*' type

;;

(define-constant CRLF-STR "\r\n")
(define-constant LF-STR "\n")
(define-constant CR-STR "\r") ;old MacOS

(define *system-newline*
  (cond-expand
    (windows
      CRLF-STR )
    (unix
      LF-STR )
    (else
      LF-STR ) ) )

(define-constant +newline-char-default+ #t)               ;#t - <system> | #\n | ...
(define-constant +separator-char-default+ #\,)
(define-constant +quote-char-default+ #\")                ;#f | #\" | ...
(define-constant +comment-char-default+ #\#)              ;#f | #\# | ...
(define-constant +quote-doubling-escapes?-default+ #t)
(define-constant +quote-controls?-default+ #t)
(define-constant +always-quote?-default+ #t)

#|
(define-constant +sxml-top-symbol+ '|*TOP*|)
(define-constant +sxml-row-element-default+ 'row)
(define-constant +sxml-col-elements-limit-default+ 32) ; arbitrary (see csv.ss)
|#

;;

;very loose : newline-char | separator-char | quote-char
;see "csv-xml.scm"
(define csv-writer-spec? alist?)
(define-check+error-type csv-writer-spec)

(define csv-writer? procedure?)
(define-check+error-type csv-writer)

;;

(define *default-writer-spec* (writer-spec-with-defaults '()))

(define (list->csv ls #!optional (writer-or-out (current-output-port)))
  (let (
      (writer
        (cond
          ((csv-writer? writer-or-out)
            writer-or-out )
          ((output-port? writer-or-out)
            (make-csv-line-writer 'list->csv writer-or-out *default-writer-spec*) )
          (else
            (error 'list->csv "invalid csv-writer or output-port" writer-or-out) ) ) ) )
    (for-each writer ls) ) )

#|
;;

(define (list->sxml ls
          #!optional
          (row-element (sxml-row-element-default))
          (column-elements (sxml-col-elements-default))
          (writer-spec *default-writer-spec*))
  (append!
    `(,(sxml-top-symbol))
    (map (cut list->sxml-element <> row-element column-elements writer-spec) ls)) )
|#

;;

(define (writer-spec
          #!key
          (newline-char +newline-char-default+)
          (separator-char +separator-char-default+)
          (quote-char +quote-char-default+)
          (comment-char +comment-char-default+)
          (quote-doubling-escapes? +quote-doubling-escapes?-default+)
          (quote-controls? +quote-controls?-default+)
          (always-quote? +always-quote?-default+))
  ;FIXME checking the input types
  `((newline-char . ,newline-char)
    (separator-char . ,separator-char)
    (quote-char . ,quote-char)
    (comment-char . ,comment-char)
    (quote-doubling-escapes? . ,quote-doubling-escapes?)
    (quote-controls? . ,quote-controls?)
    (always-quote? . ,always-quote?)) )

;;

(define (make-csv-writer out-or-str #!optional (writer-spec '()))
  (let ((make-spec-csv-writer (make-csv-writer-maker writer-spec)))
    (make-spec-csv-writer out-or-str) ) )

(define (make-csv-writer-maker #!optional (writer-spec '()))
  (let ((writer-spec
          (writer-spec-with-defaults
            (check-csv-writer-spec 'make-csv-writer-maker writer-spec)) ) )
    (lambda (out-or-str)
      (let (
          (out
            (cond
              ((string? out-or-str)
                (open-output-file out-or-str) )
              ((output-port? out-or-str)
                out-or-str )
              (else
                (error
                  'csv-writer-maker
                  "invalid output-port or string" out-or-str) ) ) ) )
        (make-csv-line-writer 'csv-writer-maker out writer-spec) ) ) ) )

;;

(define (make-csv-line-writer loc out writer-spec)
  (let (
    (writer-spec
      (check-csv-writer-spec loc writer-spec) )
    (newline-obj
      (select-newline-object loc (alist-ref 'newline-char writer-spec eq?)) )
    (separator-char
      (alist-ref 'separator-char writer-spec eq?) )
    (quote-char
      (alist-ref 'quote-char writer-spec eq?) )
    (comment-char
      (alist-ref 'comment-char writer-spec eq?) )
    (quote-doubling-escapes?
      (alist-ref 'quote-doubling-escapes? writer-spec eq?) )
    (quote-controls?
      (alist-ref 'quote-controls? writer-spec eq?) )
    (always-quote?
      (alist-ref 'always-quote? writer-spec eq?) ) )
    ;
    (let* (
      (quote-char-str (unicode-char->string quote-char) )
      (quote-char-str-2 (string-append quote-char-str quote-char-str)) )
      ;
      (define (csv-line-object->string obj)
        ;
        (define (quote-doubling? str)
          (and quote-doubling-escapes? (string-index str quote-char)) )
        ;
        (define (quoting? str)
          (or
            always-quote?
            (quote-doubling? str)
            (and separator-char (string-index str separator-char))
            (and quote-controls? (string-index str char-set:iso-control))) )
        ;
        (type-case obj
          ((char)
            (csv-line-object->string (unicode-char->string obj)) )
          ((symbol)
            (csv-line-object->string (symbol->string obj)) )
          ((string)
            (if (and quote-char (quoting? obj))
              (let (
                (str
                  (if (quote-doubling? obj)
                    (string-translate* obj `((,quote-char-str . ,quote-char-str-2)))
                    obj ) ) )
                ;
                (conc quote-char str quote-char) )
              obj ) )
          (number
            (csv-line-object->string (number->string obj)) )
          (else
            (csv-line-object->string (->string obj)) ) ) )
      ;
      (lambda (obj)
        (let (
          ;build row to output as a string with a line-ending sequence
          (lin
            ;comment desired?
            (if (list? obj)
              ;row data
              (let ((qstrs (map csv-line-object->string (check-list loc obj))))
                (apply
                  conc
                  (append!
                    (intersperse qstrs separator-char)
                    `(,newline-obj))) )
              ;are we supposed to do comments?
              (if comment-char
                (conc comment-char obj newline-obj)
                obj
                #;
                (begin
                  (warning loc "comments not active" obj writer-spec)
                  "" ) ) ) ) )
            ;
            (display lin out) ) ) ) ) )

;;

(define (select-newline-object loc spec)
  (case spec
    ((cr)
      #\return )
    ((lf)
      #\newline )
    ((crlf)
      CRLF-STR )
    (else
      *system-newline* ) ) )

;;

(define (writer-spec-with-defaults writer-spec)
  `((newline-char . ,(alist-ref 'newline-char writer-spec eq? +newline-char-default+))
    (separator-char . ,(alist-ref 'separator-char writer-spec eq? +separator-char-default+))
    (quote-char . ,(alist-ref 'quote-char writer-spec eq? +quote-char-default+))
    (comment-char . ,(alist-ref 'comment-char writer-spec eq? +comment-char-default+))
    (quote-doubling-escapes? . ,(alist-ref 'quote-doubling-escapes? writer-spec eq? +quote-doubling-escapes?-default+))
    (quote-controls? . ,(alist-ref 'quote-controls? writer-spec eq? +quote-controls?-default+))
    (always-quote? . ,(alist-ref 'always-quote? writer-spec eq? +always-quote?-default+))) )

#|
;;

(define (list->sxml-element ls row-element col-elements writer-spec)
  (if (list? ls)
    ;row data
    `(,row-element ,@(map list col-elements (map ->string ls)))
    ;are we supposed to do comments?
    (if (alist-ref 'comment-char writer-spec eq?)
      `(*COMMENT* ,(->string ls))
      ls ) ) )

(define (make-sxml-col-symbol n)
  (string->symbol (string-append "col-" (number->string n))) )

(define +sxml-col-elements-default+
  (map make-sxml-col-symbol (sxml-col-iota)) )

(define (sxml-top-symbol)
  +sxml-top-symbol+ )

(define (sxml-row-element-default)
  +sxml-row-element-default+ )

(define (sxml-col-elements-default)
  +sxml-col-elements-default+ )

(define (sxml-col-iota)
  (iota +sxml-col-elements-limit-default+) )
#;
(define (sxml-col-iota)
  (do ((i 0 add1)
       (ls '() (cons (make-sxml-col-symbol i) ls)) )
      ((= i +sxml-col-elements-limit-default+) ls) ) )
|#

Added csv-xml/csv-xml.meta version [fd2d3bfc5b].







































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
;;;; csv-xml.meta  -*- Hen -*-

((egg "csv-xml.egg")
 (date "2011-07-02")
 (category parsing)
 (author "Neil van Dyke")
 (license "LGPL 3")
 (doc-from-wiki)
 (synopsis "Parsing comma-separated values")
 (depends
  (setup-helper "1.5.2")
  (check-errors "2.0.2")
  (moremacros "1.4.2")
  (string-utils "1.5.5"))
 (test-depends testeez test)
 (files
  "csv-xml.meta" "csv-xml.setup"
  "csv-xml.scm" "csv-out.impl" "csv.ss"
  "test/run.scm" "test/test-csv.ss") )

Added csv-xml/csv-xml.scm version [010c5c25e1].

























































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
;;;; csv-xml.scm  -*- Hen -*-
;;;; Kon Lovett, Jun '17
;;;; Kon Lovett, ??? '??

(module csv-xml

(;export
  ;
  reader-spec
  ;
  make-csv-reader
  make-csv-reader-maker
  ;
  csv->list
  csv->sxml
  csv-for-each
  csv-map
  ;
  csv-reader? check-csv-reader error-csv-reader
  csv-reader-spec? check-csv-reader-spec error-csv-reader-spec
  ;
  writer-spec
  ;
  make-csv-writer-maker
  make-csv-writer
  ;
  list->csv
  #;list->sxml
  ;
  csv-writer? check-csv-writer error-csv-writer
  csv-writer-spec? check-csv-writer-spec error-csv-writer-spec)

(import scheme)

#;(import (except chicken provide))
(import chicken)

;;;

;Need to process `#lang' as well. So just "commented out" the "offending"
;sections in the source.
#;(define-syntax provide (syntax-rules () ((_ ?x0 ...) (begin))))
(define null '())

(include "csv.ss")

;;;

(import (only data-structures conc intersperse ->string alist-ref string-translate*))
(require-library data-structures)

#;(import (only list-utils alist?))
(import (only (srfi 1) every iota append! map))
(require-library (srfi 1))

(import (only (srfi 13) string-index))
(require-library (srfi 13))

(import (only (srfi 14) char-set:iso-control))
(require-library (srfi 14))

(import (only type-checks define-check+error-type check-string check-list))
(require-library type-checks)

(import (only unicode-utils unicode-char->string))
(require-library unicode-utils)

(require-extension moremacros)

;(from list-utils egg)
(define (alist? obj)
  (if (pair? obj)
    (every pair? obj)
    (null? obj) ) )

;very loose ...
(define csv-reader-spec? alist?)
(define-check+error-type csv-reader-spec)

(define csv-reader? procedure?)
(define-check+error-type csv-reader)

(define (reader-spec
          #!key
          (newline-type 'lax)
          (separator-chars '(#\,))
          (quote-char #\")
          (quote-doubling-escapes? #t)
          (comment-chars '())
          (whitespace-chars '(#\space))
          (strip-leading-whitespace? #f)
          (strip-trailing-whitespace? #f)
          (newlines-in-quotes? #t))
  `((newline-type . ,newline-type)
    (separator-chars . ,separator-chars)
    (quote-char . ,quote-char)
    (quote-doubling-escapes? . ,quote-doubling-escapes?)
    (comment-chars . ,comment-chars)
    (whitespace-chars . ,whitespace-chars)
    (strip-leading-whitespace? . ,strip-leading-whitespace?)
    (strip-trailing-whitespace? . ,strip-trailing-whitespace?)
    (newlines-in-quotes? . ,newlines-in-quotes?)) )

;;;

(include "csv-out.impl")

) ;csv-xml

Added csv-xml/csv-xml.setup version [510ab3ecb8].

























>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
;;;; csv-xml.setup  -*- Hen -*-

(use setup-helper-mod)

(verify-extension-name "csv-xml")

(setup-shared+static-extension-module (extension-name) (extension-version "0.12.1")
  #:types? #t
  #:inline? #t
  #:compile-options '(
    -optimize-level 3 -debug-level 2
    -no-procedure-checks-for-toplevel-bindings -no-procedure-checks-for-usual-bindings))

Added csv-xml/csv.ss version [817d757d1f].



















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
;;; @Package     csv
;;; @Subtitle    Comma-Separated Value (CSV) Utilities in Scheme
;;; @HomePage    http://www.neilvandyke.org/csv-scheme/
;;; @Author      Neil Van Dyke
;;; @Version     0.10
;;; @Date        2010-04-13
;;; @PLaneT      neil/csv:1:6

;; $Id: csv.ss,v 1.199 2010/04/13 17:56:20 neilpair Exp $

;;; @legal
;;; Copyright @copyright{} 2004--2009 Neil Van Dyke.  This program is Free
;;; Software; you can redistribute it and/or modify it under the terms of the
;;; GNU Lesser General Public License as published by the Free Software
;;; Foundation; either version 3 of the License (LGPL 3), or (at your option)
;;; any later version.  This program is distributed in the hope that it will be
;;; useful, but without any warranty; without even the implied warranty of
;;; merchantability or fitness for a particular purpose.  See
;;; @indicateurl{http://www.gnu.org/licenses/} for details.  For other licenses
;;; and consulting, please contact the author.
;;; @end legal

;#lang scheme/base

;;; @section Introduction

;;; The @b{csv} Scheme library provides utilities for reading various kinds of
;;; what are commonly known as ``comma-separated value'' (CSV) files.  Since
;;; there is no standard CSV format, this library permits CSV readers to be
;;; constructed from a specification of the peculiarities of a given variant.
;;; A default reader handles the majority of formats.
;;;
;;; One of the main uses of this library is to import data from old crusty
;;; legacy applications into Scheme for data conversion and other processing.
;;; To that end, this library includes various conveniences for iterating over
;;; parsed CSV rows, and for converting CSV input to the
;;; @uref{http://pobox.com/~oleg/ftp/Scheme/SXML.html, SXML 3.0} Scheme XML
;;; format.
;;;
;;; This library requires R5RS, SRFI-6, SRFI-23, and an @code{integer->char}
;;; procedure that accepts ASCII values.
;;;
;;; Other implementations of some kind of CSV reading for Scheme include
;;; Gauche's @code{text.csv} module, and Scsh's @code{record-reader} and
;;; related procedures.  This library intends to be portable and more
;;; comprehensive.

;; TODO: Briefly introduce terms "row", "column", and "field".

(define-syntax %csv:error
  (syntax-rules () ((_ p m o)
                    (error (string-append p " : " m) o)
                    ;; Bigloo: (error p m o)
                    )))

(define-syntax %csv:type-error
  (syntax-rules ()
    ((_ proc-str expected-str got-value)
     (%csv:error proc-str
                 (string-append "expected " expected-str ", received:")
                 got-value))))

(define %csv:a2c integer->char)

(define %csv:cr (%csv:a2c 13))
(define %csv:lf (%csv:a2c 10))

(define-syntax %csv:gosc
  (syntax-rules ()
    ((_ os-stx)
     (let* ((os  os-stx)
            (str (get-output-string os)))
       (close-output-port os)
       str))))

(define (%csv:in-arg proc-name in)
  (cond ((input-port? in) in)
        ((string?     in) (open-input-string in))
        (else (%csv:type-error proc-name "input port or string" in))))

(define (%csv:reader-or-in-arg proc-name reader-or-in)
  (cond ((procedure?  reader-or-in) reader-or-in)
        ((input-port? reader-or-in) (make-csv-reader reader-or-in))
        ((string?     reader-or-in) (make-csv-reader (open-input-string
                                                      reader-or-in)))
        (else (%csv:type-error proc-name
                               "csv reader or input port or string"
                               reader-or-in))))

;;; @section Reader Specs

;;; CSV readers are constructed using @dfn{reader specs}, which are sets of
;;; attribute-value pairs, represented in Scheme as association lists keyed on
;;; symbols.  Each attribute has a default value if not specified otherwise.
;;; The attributes are:

;;; @table @code
;;;
;;; @item newline-type
;;; Symbol representing the newline, or record-terminator, convention.  The
;;; convention can be a fixed character sequence (@code{lf}, @code{crlf}, or
;;; @code{cr}, corresponding to combinations of line-feed and carriage-return),
;;; any string of one or more line-feed and carriage-return characters
;;; (@code{lax}), or adaptive (@code{adapt}).  @code{adapt} attempts to detect
;;; the newline convention at the start of the input and assume that convention
;;; for the remainder of the input.  Default: @code{lax}
;;;
;;; @item separator-chars
;;; Non-null list of characters that serve as field separators.  Normally, this
;;; will be a list of one character.  Default: @code{(#\,)} (list of the comma
;;; character)
;;;
;;; @item quote-char
;;; Character that should be treated as the quoted field delimiter character,
;;; or @code{#f} if fields cannot be quoted.  Note that there can be only one
;;; quote character.  Default: @code{#\"} (double-quote)
;;;
;;; @item quote-doubling-escapes?
;;; Boolean for whether or not a sequence of two @code{quote-char} quote
;;; characters within a quoted field constitute an escape sequence for
;;; including a single @code{quote-char} within the string.  Default: @code{#t}
;;;
;;; @item comment-chars
;;; List of characters, possibly null, which comment out the entire line of
;;; input when they appear as the first character in a line.  Default:
;;; @code{()} (null list)
;;;
;;; @item whitespace-chars
;;; List of characters, possibly null, that are considered @dfn{whitespace}
;;; constituents for purposes of the @code{strip-leading-whitespace?} and
;;; @code{strip-trailing-whitespace?} attributes described below.
;;; Default: @code{(#\space)} (list of the space character)
;;;
;;; @item strip-leading-whitespace?
;;; Boolean for whether or not leading whitespace in fields should be
;;; stripped.  Note that whitespace within a quoted field is never stripped.
;;; Default: @code{#f}
;;;
;;; @item strip-trailing-whitespace?
;;; Boolean for whether or not trailing whitespace in fields should be
;;; stripped.  Note that whitespace within a quoted field is never stripped.
;;; Default: @code{#f}
;;;
;;; @item newlines-in-quotes?
;;; Boolean for whether or not newline sequences are permitted within quoted
;;; fields.  If true, then the newline characters are included as part of the
;;; field value; if false, then the newline sequence is treated as a premature
;;; record termination.  Default: @code{#t}
;;;
;;; @end table

;; TODO: Do not expose this procedure for now.  We expect it to go away and be
;; replaced with two other procedures.
;;
;; @defproc %csv:csv-spec-derive orig-spec changes
;;
;; Yields a new CSV spec that is derived from @var{orig-spec} by applying spec
;; @var{changes} as attribute substitions and additions to the original.  For
;; example, given an original CSV reader spec:
;;
;; @lisp
;; (define my-first-csv-spec
;;   '((newline-type            . lax)
;;     (separator-chars         . (#\,))
;;     (quote-char              . #\")
;;     (quote-doubling-escapes? . #t)
;;     (whitespace-chars        . (#\space))))
;; @end lisp
;;
;; a derived spec with a different @code{separator-chars} attribute and an
;; added @code{comment-chars} attribute can be created like:
;;
;; @lisp
;; (%csv:csv-spec-derive my-first-csv-spec
;;                  '((separator-chars . (#\%))
;;                    (comment-chars   . (#\#))))
;; @result{}
;; ((separator-chars         . (#\%))
;;  (comment-chars           . (#\#))
;;  (newline-type            . lax)
;;  (quote-char              . #\")
;;  (quote-doubling-escapes? . #t)
;;  (whitespace-chars        . (#\space)))
;; @end lisp
;;
;; In that the yielded spec might share some structure with @var{orig-spec}
;; and/or @var{changes}.  Most applications will not use this procedure
;; directly.

(define (%csv:csv-spec-derive orig-spec changes)
  ;; TODO: Make this not share structure.  Error-check and normalize at the
  ;; same time we clone.
  (let ((new-spec '()))
    (let ((add-to-new-spec
           (lambda (alist)
             (for-each (lambda (cell)
                         (or (assq (car cell) new-spec)
                             (set! new-spec (cons cell new-spec))))
                       alist))))
      (add-to-new-spec changes)
      (add-to-new-spec orig-spec)
      (reverse new-spec))))

;;; @section Making Reader Makers

;;; CSV readers are procedures that are constructed dynamically to close over a
;;; particular CSV input and yield a parsed row value each time the procedure
;;; is applied.  For efficiency reasons, the reader procedures are themselves
;;; constructed by another procedure, @code{make-csv-reader-maker}, for
;;; particular CSV reader specs.

(define (%csv:csv-error code extra)
  ;; TODO: Maybe make the CSV error handler user-specifiable, or allow user to
  ;; specify some errors that should be disregarded.
  ;;
  ;; TODO: Add position information.  Keep track of character position while
  ;; reading.
  (%csv:error
   "[csv-reader]"
   (string-append "Erroneous CSV format: "
                  (case code
                    ((junk-after-quote-close)
                     "Junk after close of quoted field:")
                    (else (string-append "INTERNAL ERROR: Unknown code: "
                                         (symbol->string code)))))
   extra))

(define (%csv:newline-check-step0 newline-type c port)
  ;; (display "*DEBUG* (equal? newline-type 'lax) = ")
  ;; (write (equal? newline-type 'lax))
  ;; (newline)
  ;; (display "*DEBUG* (eqv? newline-type 'lax) = ")
  ;; (write (eqv? newline-type 'lax))
  ;; (newline)
  (case newline-type
    ((cr)   (eqv? c %csv:cr))
    ((lf)   (eqv? c %csv:lf))
    ((crlf) (if (eqv? c %csv:cr)
                (let ((c2 (peek-char port)))
                  (cond ((eof-object? c2)
                         ;; Note: This is a CR-EOF in an input that uses CR-LF
                         ;; for terminating records.  We are discarding the CR,
                         ;; so it will not be added to the field string.  We
                         ;; might want to signal an error.
                         #t)
                        ((eqv? c2 %csv:lf)
                         (read-char port)
                         #t)
                        (else #f)))
                #f))
    ((lax detect) (cond ((eqv? c %csv:cr)
                         (let ((c2 (peek-char port)))
                           (cond ((eof-object? c2) #t)
                                 ((eqv? c2 %csv:lf)
                                  (read-char port)
                                  'crlf)
                                 (else 'cr))))
                        ((eqv? c %csv:lf) 'lf)
                        (else #f)))
    (else (%csv:error
           "%csv:make-portreader/positional"
           "unrecognized newline-type"
           newline-type))))

(define %csv:make-portreader/positional
  (letrec-syntax
      ((newline-check
        (syntax-rules ()
          ((_ newline-type c port detected-newline-type)
           ;; Note: "port" and "detected-newline-type" must be identifiers.
           ;; "newline-type" and "c" must be identifiers or self-evals.
           (if (eqv? newline-type 'detect)
               (begin (set! detected-newline-type
                            (%csv:newline-check-step0 newline-type c port))
                      detected-newline-type)
               (%csv:newline-check-step0 newline-type c port)))))
       (gosc-cons
        ;; Note: This is to ensure the output string is gotten and closed
        ;; before consing it with the result of a recursive call.
        (syntax-rules ()
          ((_ os b) (let ((s (%csv:gosc os))) (cons s b))))))
    (lambda (newline-type
             separator-chars
             quote-char
             quote-doubling-escapes?
             comment-chars
             whitespace-chars
             strip-leading-whitespace?
             strip-trailing-whitespace?
             newlines-in-quotes?)
      (lambda (port)
        (let ((dnlt #f)
              (escape-char #\\))
          (let read-fields-or-eof ((c (read-char port)))
            (cond
             ((eof-object? c) '())
             ((and strip-leading-whitespace? (memv c whitespace-chars))
              ;; It's leading whitespace char when we're ignoring leading
              ;; whitespace in fields, and there might just be whitespace and
              ;; then an EOF, which should probably be considered just an EOF
              ;; rather than a row with one empty field, so just skip this
              ;; whitespace char.
              (read-fields-or-eof (read-char port)))
             ((and (not (null? comment-chars)) (memv c comment-chars))
              ;; It's a comment char in the first column (or in the first
              ;; non-whitespace column, if "strip-leading-whitespace?" is
              ;; true), so skip to end of line.
              (let ((fake-dnlt #f))
                (let loop ((c (read-char port)))
                  (cond ((eof-object? c) '())
                        ((newline-check newline-type c port fake-dnlt)
                         (read-fields-or-eof (read-char port)))
                        (else (loop (read-char port)))))))
             (else
              ;; It's not going to be just an EOF, so try to read a row.
              (let ((row
                     (let read-fields ((c c))
                       (cond
                        ;; If an EOF or newline in an unquoted field, consider
                        ;; the field and row finished.  (We don't consider EOF
                        ;; before newline to be an error, although perhaps that
                        ;; would be a useful check for a freak premature
                        ;; end-of-input when dealing with "well-formed" CSV).
                        ((or (eof-object? c)
                             (newline-check newline-type c port dnlt))
                         (list ""))
                        ;; If a field separator, finish this field and cons
                        ;; with value of recursive call to get the next field.
                        ((memv c separator-chars)
                         (cons "" (read-fields (read-char port))))
                        ;; If we're ignoring leading whitespace, and it's a
                        ;; whitespace-chars character, then recurse to keep
                        ;; finding the field start.
                        ((and strip-leading-whitespace?
                              (memv c whitespace-chars))
                         (read-fields (read-char port)))
                        ;; If a quote, read a quoted field.
                        ((and quote-char (eqv? c quote-char))
                         (let ((os (open-output-string)))
                           (let loop ((c (read-char port)))
                             (cond
                              ((or (eof-object? c)
                                   (and (not newlines-in-quotes?)
                                        (newline-check newline-type
                                                       c port dnlt)))
                               (list (%csv:gosc os)))
                              ((and escape-char (eqv? c escape-char))
                               ;FIXME can become unsynchronized
                               (write-char (read-char port) os)
                               (loop (read-char port)))
                              ((and quote-char (eqv? c quote-char))
                               (if quote-doubling-escapes?
                                   (let ((c2 (read-char port)))
                                     (if (eqv? c2 quote-char)
                                         (begin (write-char c2 os)
                                                (loop (read-char port)))
                                         (gosc-cons
                                          os
                                          (let skip-after ((c c2))
                                            (cond
                                             ((or (eof-object? c)
                                                  (newline-check
                                                   newline-type c port dnlt))
                                              '())
                                             ((memv c separator-chars)
                                              (read-fields (read-char port)))
                                             ((memv c whitespace-chars)
                                              ;; Note: We tolerate
                                              ;; whitespace after field
                                              ;; close quote even if
                                              ;; skip-trailing-whitespace?
                                              ;; is false.
                                              (skip-after (read-char port)))
                                             (else (%csv:csv-error
                                                    'junk-after-quote-close
                                                    c)))))))
                                   (gosc-cons os
                                              (read-fields (read-char port)))))
                              (else (write-char c os)
                                    (loop (read-char port)))))))
                        ;; It's the start of an unquoted field.
                        (else
                         (let ((os (open-output-string)))
                           (write-char c os)
                           (let loop ((c (read-char port)))
                             (cond
                              ((or (eof-object? c)
                                   (newline-check newline-type c port dnlt))
                               (list (get-output-string os)))
                              ((memv c separator-chars)
                               (gosc-cons os (read-fields (read-char port))))
                              ((and strip-trailing-whitespace?
                                    (memv c whitespace-chars))
                               ;; TODO: Maybe optimize to avoid creating a new
                               ;; output string every time we see whitespace.
                               ;; We could use a string collector with unwrite.
                               ;; And/or do lookahead to see whether whitespace
                               ;; is only one character.  Do this after we have
                               ;; a better regression test suite.
                               (let ((ws-os (open-output-string)))
                                 (write-char c ws-os)
                                 (let ws-loop ((c (read-char port)))
                                   (cond
                                    ((or (eof-object? c)
                                         (newline-check
                                          newline-type c port dnlt))
                                     (close-output-port ws-os)
                                     (list (%csv:gosc os)))
                                    ((memv c separator-chars)
                                     (close-output-port ws-os)
                                     (gosc-cons os (read-fields (read-char
                                                                 port))))
                                    ((memv c whitespace-chars)
                                     (write-char c ws-os)
                                     (ws-loop (read-char port)))
                                    (else
                                     (display (%csv:gosc ws-os) os)
                                     (write-char c os)
                                     (loop (read-char port)))))))
                              (else (write-char c os)
                                    (loop (read-char port)))))))))))
                (if (null? row)
                    row
                    (if (eq? newline-type 'detect)
                        (cons dnlt row)
                        row)))))))))))

(define %csv:make-portreader
  ;; TODO: Make a macro for the three times we list the spec attributes.
  (letrec ((pb (lambda (x) (if x #t #f)))
           (pc (lambda (x)
                 (cond ((char?   x) x)
                       ((string? x) (case (string-length x)
                                      ((1)  (string-ref  x 0))
                                      (else (%csv:type-error
                                             "make-csv-reader-maker"
                                             "character"
                                             x))))
                       (else (%csv:type-error "make-csv-reader-maker"
                                              "character"
                                              x)))))
           (pc-f (lambda (x)
                   (cond ((not     x) x)
                         ((char?   x) x)
                         ((string? x) (case (string-length x)
                                        ((0)  #f)
                                        ((1)  (string-ref  x 0))
                                        (else (%csv:type-error
                                               "make-csv-reader-maker"
                                               "character or #f"
                                               x))))
                         (else (%csv:type-error "make-csv-reader-maker"
                                                "character or #f"
                                                x)))))
           (pe (lambda (x acceptable)
                 (if (memq x acceptable)
                     x
                     (%csv:type-error
                      "make-csv-reader-maker"
                      (let ((os (open-output-string)))
                        (display "symbol from the set " os)
                        (write acceptable os)
                        (%csv:gosc os))
                      x))))
           (plc-n (lambda (x)
                    (or (list? x)
                        (%csv:type-error "make-csv-reader-maker"
                                         "list of characters"
                                         x))
                    (map pc x)))
           (plc (lambda (x)
                  (let ((result (plc-n x)))
                    (if (null? result)
                        (%csv:type-error "make-csv-reader-maker"
                                         "non-null list of characters"
                                         x)
                        result)))))
    (lambda (reader-spec)
      (let ((newline-type               'lax)
            (separator-chars            '(#\,))
            (quote-char                 #\")
            (quote-doubling-escapes?    #t)
            (comment-chars              '())
            (whitespace-chars           '(#\space))
            (strip-leading-whitespace?  #f)
            (strip-trailing-whitespace? #f)
            (newlines-in-quotes?        #t))
        ;; TODO: It's erroneous to have two entries for the same attribute in a
        ;; spec.  However, it would be nice if we error-detected duplicate
        ;; entries, or at least had assq semantics (first, rather than last,
        ;; wins).  Use csv-spec-derive's descendants for that.
        (for-each
         (lambda (item)
           (let ((v (cdr item)))
             (case (car item)
               ((newline-type)
                (set! newline-type (pe v '(cr crlf detect lax lf))))
               ((separator-chars)
                (set! separator-chars (plc v)))
               ((quote-char)
                (set! quote-char (pc-f v)))
               ((quote-doubling-escapes?)
                (set! quote-doubling-escapes? (pb v)))
               ((comment-chars)
                (set! comment-chars (plc-n v)))
               ((whitespace-chars)
                (set! whitespace-chars (plc-n v)))
               ((strip-leading-whitespace?)
                (set! strip-leading-whitespace?  (pb v)))
               ((strip-trailing-whitespace?)
                (set! strip-trailing-whitespace? (pb v)))
               ((newlines-in-quotes?)
                (set! newlines-in-quotes? (pb v))))))
         reader-spec)
        (%csv:make-portreader/positional
         newline-type
         separator-chars
         quote-char
         quote-doubling-escapes?
         comment-chars
         whitespace-chars
         strip-leading-whitespace?
         strip-trailing-whitespace?
         newlines-in-quotes?)))))

;;; @defproc make-csv-reader-maker reader-spec
;;;
;;; Constructs a CSV reader constructor procedure from the @var{reader-spec},
;;; with unspecified attributes having their default values.
;;;
;;; For example, given the input file @code{fruits.csv} with the content:
;;;
;;; @example
;;; apples  |  2 |  0.42
;;; bananas | 20 | 13.69
;;; @end example
;;;
;;; a reader for the file's apparent format can be constructed like:
;;;
;;; @lisp
;;; (define make-food-csv-reader
;;;   (make-csv-reader-maker
;;;    '((separator-chars            . (#\|))
;;;      (strip-leading-whitespace?  . #t)
;;;      (strip-trailing-whitespace? . #t))))
;;; @end lisp
;;;
;;; The resulting @code{make-food-csv-reader} procedure accepts one argument,
;;; which is either an input port from which to read, or a string from which to
;;; read.  Our example input file then can be be read by opening an input port
;;; on a file and using our new procedure to construct a reader on it:
;;;
;;; @lisp
;;; (define next-row
;;;   (make-food-csv-reader (open-input-file "fruits.csv")))
;;; @end lisp
;;;
;;; This reader, @code{next-row}, can then be called repeatedly to yield a
;;; parsed representation of each subsequent row.  The parsed format is a list
;;; of strings, one string for each column.  The null list is yielded to
;;; indicate that all rows have already been yielded.
;;;
;;; @lisp
;;; (next-row) @result{} ("apples" "2" "0.42")
;;; (next-row) @result{} ("bananas" "20" "13.69")
;;; (next-row) @result{} ()
;;; @end lisp

(define (make-csv-reader-maker reader-spec)
  (let ((make-portread
         (if (let ((p (assq 'newline-type reader-spec))) (and p (cdr p)))
             ;; Newline-adapting portreader-maker.
             (letrec
                 ((detect-portread
                   (%csv:make-portreader
                    (%csv:csv-spec-derive reader-spec
                                          '((newline-type . detect)))))
                  ;; TODO: The set of cr/crlf/lf newline-type portreaders are
                  ;; constructed optimistically right now for two reasons:
                  ;; 1. we don't yet sanitize reader-specs of shared structure
                  ;; that can be mutated behind our backs; 2. eventually, we
                  ;; want to add a "lots-o-shots?" argument that, when true,
                  ;; would do this anyway.  Consider.
                  (cr-portread
                   (%csv:make-portreader
                    (%csv:csv-spec-derive reader-spec
                                          '((newline-type . cr)))))
                  (crlf-portread
                   (%csv:make-portreader
                    (%csv:csv-spec-derive reader-spec
                                          '((newline-type . crlf)))))
                  (lf-portread
                   (%csv:make-portreader
                    (%csv:csv-spec-derive reader-spec
                                          '((newline-type . lf))))))
               (lambda ()
                 (let ((actual-portread #f))
                   (let ((adapt-portread
                          (lambda (port)
                            (let ((dnlt-row (detect-portread port)))
                              (if (null? dnlt-row)
                                  dnlt-row
                                  (begin (set! actual-portread
                                               (case (car dnlt-row)
                                                 ((cr)   cr-portread)
                                                 ((crlf) crlf-portread)
                                                 ((lf)   lf-portread)
                                                 (else   actual-portread)))
                                         (cdr dnlt-row)))))))
                     (set! actual-portread adapt-portread)
                     (lambda (port) (actual-portread port))))))
             ;; Stateless portreader-maker.
             (let ((reusable-portread
                    (%csv:make-portreader reader-spec)))
               (lambda () reusable-portread)))))
    (lambda (in)
      (let ((port     (%csv:in-arg "[csv-reader]" in))
            (portread (make-portread)))
        (lambda () (portread port))))))

;;; @section Making Readers

;;; In addition to being constructed from the result of
;;; @code{make-csv-reader-maker}, CSV readers can also be constructed using
;;; @code{make-csv-reader}.

;;; @defproc make-csv-reader in [reader-spec]
;;;
;;; Construct a CSV reader on the input @var{in}, which is an input port or a
;;; string.  If @var{reader-spec} is given, and is not the null list, then a
;;; ``one-shot'' reader constructor is constructed with that spec and used.  If
;;; @var{reader-spec} is not given, or is the null list, then the default CSV
;;; reader constructor is used.  For example, the reader from the
;;; @code{make-csv-reader-maker} example could alternatively have been
;;; constructed like:
;;;
;;; @lisp
;;; (define next-row
;;;   (make-csv-reader
;;;    (open-input-file "fruits.csv")
;;;    '((separator-chars            . (#\|))
;;;      (strip-leading-whitespace?  . #t)
;;;      (strip-trailing-whitespace? . #t))))
;;; @end lisp

(define make-csv-reader
  (let ((default-maker (make-csv-reader-maker '())))
    (lambda (in . rest)
      (let ((spec (cond ((null? rest)       '())
                        ((null? (cdr rest)) (car rest))
                        (else (%csv:error "make-csv-reader"
                                          "extraneous arguments"
                                          (cdr rest))))))
        ((if (null? spec)
             default-maker
             (make-csv-reader-maker spec))
         (%csv:in-arg "make-csv-reader" in))))))

;;; @section High-Level Conveniences

;;; Several convenience procedures are provided for iterating over the CSV rows
;;; and for converting the CSV to a list.
;;;
;;; To the dismay of some Scheme purists, each of these procedures accepts a
;;; @var{reader-or-in} argument, which can be a CSV reader, an input port, or a
;;; string.  If not a CSV reader, then the default reader constructor is used.
;;; For example, all three of the following are equivalent:
;;;
;;; @lisp
;;; (csv->list                                     STRING  )
;;; @equiv{}
;;; (csv->list (make-csv-reader                    STRING ))
;;; @equiv{}
;;; (csv->list (make-csv-reader (open-input-string STRING )))
;;; @end lisp

;;; @defproc csv-for-each proc reader-or-in
;;;
;;; Similar to Scheme's @code{for-each}, applies @var{proc}, a procedure of one
;;; argument, to each parsed CSV row in series.  @var{reader-or-in} is the CSV
;;; reader, input port, or string.  The return value is undefined.

;; TODO: Doc an example for this.

(define (csv-for-each proc reader-or-in)
  (let ((reader (%csv:reader-or-in-arg "csv-for-each" reader-or-in)))
    (let loop ((row (reader)))
      (or (null? row)
          (begin (proc row)
                 (loop (reader)))))))

;;; @defproc csv-map proc reader-or-in
;;;
;;; Similar to Scheme's @code{map}, applies @var{proc}, a procedure of one
;;; argument, to each parsed CSV row in series, and yields a list of the values
;;; of each application of @var{proc}, in order.  @var{reader-or-in} is the CSV
;;; reader, input port, or string.

;; TODO: Doc an example for this.

;; (define (csv-map proc reader-or-in)
;;   (let ((reader (%csv:reader-or-in-arg "csv-for-each" reader-or-in)))
;;     (let ((head '()))
;;       (let ((row (reader)))
;;         (if (null? row)
;;             head
;;             (let ((pair (cons (proc row) '())))
;;               (set! head pair)
;;               (let loop ((prior pair))
;;                 (let ((row (reader)))
;;                   (if (null? row)
;;                       head
;;                       (let ((pair (cons (proc row) '())))
;;                         (set-cdr! prior pair)
;;                         (loop pair)))))))))))

(define (csv-map proc reader-or-in)
  (let ((reader (%csv:reader-or-in-arg "csv-for-each" reader-or-in)))
    (let loop ((row (reader)) (ret null))
      (if (null? row)
          (reverse ret)
          (let ((ret (cons (proc row) ret)))
            (loop (reader) ret))))))

;;; @defproc csv->list reader-or-in
;;;
;;; Yields a list of CSV row lists from input @var{reader-or-in}, which is a
;;; CSV reader, input port, or string.

;; TODO: Doc an example for this.

;; (define (csv->list reader-or-in)
;;   (let ((reader (%csv:reader-or-in-arg "csv->list" reader-or-in)))
;;     (let ((head '()))
;;       (let ((row (reader)))
;;         (if (null? row)
;;             head
;;             (let ((pair (cons row '())))
;;               (set! head pair)
;;               (let loop ((prior pair))
;;                 (let ((row (reader)))
;;                   (if (null? row)
;;                       head
;;                       (let ((pair (cons row '())))
;;                         (set-cdr! prior pair)
;;                         (loop pair)))))))))))

(define (csv->list reader-or-in)
  (csv-map values reader-or-in))

;;; @section Converting CSV to SXML

;;; The @code{csv->sxml} procedure can be used to convert CSV to SXML format,
;;; for processing with various XML tools.

;;; @defproc csv->sxml reader-or-in [row-element [col-elements]]
;;;
;;; Reads CSV from input @var{reader-or-in} (which is a CSV reader, input port,
;;; or string), and yields an SXML representation.  If given, @var{row-element}
;;; is a symbol for the XML row element.  If @var{row-element} is not given,
;;; the default is the symbol @code{row}.  If given @var{col-elements} is a
;;; list of symbols for the XML column elements.  If not given, or there are
;;; more columns in a row than given symbols, column element symbols are of the
;;; format @code{col-@var{n}}, where @var{n} is the column number (the first
;;; column being number 0, not 1).
;;;
;;; For example, given a CSV-format file @code{friends.csv} that has the
;;; contents:
;;;
;;; @example
;;; Binoche,Ste. Brune,33-1-2-3
;;; Posey,Main St.,555-5309
;;; Ryder,Cellblock 9,
;;; @end example
;;;
;;; with elements not given, the result is:
;;;
;;; @lisp
;;; (csv->sxml (open-input-file "friends.csv"))
;;; @result{}
;;; (*TOP*
;;;  (row (col-0 "Binoche") (col-1 "Ste. Brune")  (col-2 "33-1-2-3"))
;;;  (row (col-0 "Posey")   (col-1 "Main St.")    (col-2 "555-5309"))
;;;  (row (col-0 "Ryder")   (col-1 "Cellblock 9") (col-2 "")))
;;; @end lisp
;;;
;;; With elements given, the result is like:
;;;
;;; @lisp
;;; (csv->sxml (open-input-file "friends.csv")
;;;            'friend
;;;            '(name address phone))
;;; @result{}
;;; (*TOP* (friend (name    "Binoche")
;;;                (address "Ste. Brune")
;;;                (phone   "33-1-2-3"))
;;;        (friend (name    "Posey")
;;;                (address "Main St.")
;;;                (phone   "555-5309"))
;;;        (friend (name    "Ryder")
;;;                (address "Cellblock 9")
;;;                (phone   "")))
;;; @end lisp

(define csv->sxml
  (let* ((top-symbol
          (string->symbol "*TOP*"))
         (make-col-symbol
          (lambda (n)
            (string->symbol (string-append "col-" (number->string n)))))
         (default-col-elements
           (let loop ((i 0))
             (if (= i 32) ; arbitrary magic number
                 '()
                 (cons (make-col-symbol i) (loop (+ 1 i)))))))
    ;; TODO: Have option to error when columns count doesn't match provided
    ;; column name list.
    (lambda (reader-or-in . rest)
      (let ((reader       (%csv:reader-or-in-arg "csv->sxml"
                                                 reader-or-in))
            (row-element  'row)
            (col-elements #f))
        ;; TODO: Maybe use case-lambda.
        (or (null? rest)
            (begin (set! row-element (car rest))
                   (let ((rest (cdr rest)))
                     (or (null? rest)
                         (begin (set! col-elements (car rest))
                                (let ((rest (cdr rest)))
                                  (or (null? rest)
                                      (%csv:error
                                       "csv->sxml"
                                       "extraneous arguments"
                                       rest))))))))
        ;; TODO: We could clone and grow default-col-elements for the duration
        ;; of this procedure.
        (cons top-symbol
              (csv-map (lambda (row)
                         (cons row-element
                               (let loop ((vals  row)
                                          (i     0)
                                          (names (or col-elements
                                                     default-col-elements)))
                                 (if (null? vals)
                                     '()
                                     (cons (list (if (null? names)
                                                     (make-col-symbol i)
                                                     (car names))
                                                 (car vals))
                                           (loop (cdr vals)
                                                 (+ 1 i)
                                                 (if (null? names)
                                                     '()
                                                     (cdr names))))))))
                       reader))))))

;; TODO: Make a define-csv-reader/positional, for great constant-folding.
;; That's part of the reason some things are done the way they are.

;; TODO: Make a csv-bind, as a newbie convenience for people without advanced
;; match forms, which looks good in examples.  This is better than a
;; csv-map/bind and a csv-for-each/bind.
;;
;; (csv-for-each/bind ((column-binding ...) body ...)
;;               { (else => closure) | (else body ...) | }
;;               input-port
;;               [ csv-reader ])
;;
;; (csv-for-each/bind
;;  ((lastname firstname email)
;;   ...)
;;  (else => (lambda (row) (error "CSV row didn't match pattern" row)))
;;  my-input-port
;;  my-csv-reader)

;; TODO: Handle escapes, once we find an actual example or specification of any
;; flavor of escapes in CSV other than quote-doubling inside a quoted field.

;; TODO: Add a spec attribute for treating adjacent separators as one, or
;; skipping empty fields.  This would probably only be used in practice for
;; parsing whitespace-separated input.

;; TODO: Get access to MS Excel or documentation, and make this correct.
;;
;; (define msexcel-csv-reader-spec
;;   '((newline-type               . crlf)
;;     (separator-chars            . (#\,))
;;     (quote-char                 . #\")
;;     (quote-doubling-escapes?    . #t)
;;     (comment-chars              . ())
;;     (whitespace-chars           . (#\space))
;;     (strip-leading-whitespace?  . #f)
;;     (strip-trailing-whitespace? . #f)
;;     (newlines-in-quotes?        . #t)))

;; TODO: Maybe put this back in.
;;
;; (define default-csv-reader-spec
;;   '((newline-type               . lax)
;;     (separator-chars            . (#\,))
;;     (quote-char                 . #\")
;;     (quote-doubling-escapes?    . #t)
;;     (comment-chars              . ())
;;     (whitespace-chars           . (#\space))
;;     (strip-leading-whitespace?  . #f)
;;     (strip-trailing-whitespace? . #f)
;;     (newlines-in-quotes?        . #t)))

;; TODO: Implement CSV writing, after CSV reading is field-tested and polished.

;; TODO: Call "close-input-port" once eof-object is hit, but make sure we still
;; can return an empty list on subsequent calls to the CSV reader.

;; TODO: Consider switching back to returning eof-object at the end of input.
;; We originally changed to returning the null list because we might want to
;; synthesize the EOF, and there is no R5RS binding for the eof-object.

;; TODO: [2005-12-09] In one test, Guile has a stack overflow when parsing a
;; row with 425 columns.  Wouldn't hurt to see if we can make things more
;; tail-recursive.

;;; @unnumberedsec History

;;; @table @asis
;;;
;;; @item Version 0.10 -- 2010-04-13 -- PLaneT @code{(1 6)}
;;; Documentation fix.
;;;
;;; @item Version 0.9 -- 2009-03-14 -- PLaneT @code{(1 5)}
;;; Documentation fix.
;;;
;;; @item Version 0.8 -- 2009-02-23 -- PLaneT @code{(1 4)}
;;; Documentation changes.
;;;
;;; @item Version 0.7 -- 2009-02-22 -- PLaneT @code{(1 3)}
;;; License is now LGPL 3.  Moved to author's new Scheme administration system.
;;;
;;; @item Version 0.6 -- 2008-08-12 -- PLaneT @code{(1 2)}
;;; For PLT 4 compatibility, new versions of @code{csv-map} and
;;; @code{csv->list} that don't use @code{set-cdr!} (courtesy of Doug
;;; Orleans). PLT 4 @code{if} compatibility change.  Minor documentation fixes.
;;;
;;; @item Version 0.5 --- 2005-12-09
;;; Changed a non-R5RS use of @code{letrec} to @code{let*}, caught by Guile and
;;; David Pirotte.
;;;
;;; @item Version 0.4 --- 2005-06-07
;;; Converted to Testeez.  Minor documentation changes.
;;;
;;; @item Version 0.3 --- 2004-07-21
;;; Minor documentation changes.  Test suite now disabled by default.
;;;
;;; @item Version 0.2 --- 2004-06-01
;;; Work-around for @code{case}-related bug observed in Gauche 0.8 and 0.7.4.2
;;; that was tickled by @code{csv-internal:make-portreader/positional}.  Thanks
;;; to Grzegorz Chrupa@l{}a for reporting.
;;;
;;; @item Version 0.1 --- 2004-05-31
;;; First release, for testing with real-world input.
;;;
;;; @end table

#;(provide
 csv->list
 csv->sxml
 csv-for-each
 csv-map
 make-csv-reader
 make-csv-reader-maker)