;; jfilter.scm (Japanese character code conversion Filters) ;; V 2.1 ;; Copyright (C) 1997, 1998, 2002 May 19. Dai INUKAI ;; Author: Dai INUKAI (inukai.d@jeans.ocn.ne.jp) ;; ;; This program is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 2 of the License, or ;; any later version. ;; ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with this program; if not, write to the Free Software ;; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ;; このプログラムは役に立つことを願って、GNU General Public Licenseに ;; したがって一切の保証なしに配布されています。 ;; 詳しくはGNU General Public Licenseをごらん下さい。 ;; ;; The BIT twiddling operations are provided by SLIB:LOGICAL ;; Comment out when compiling with HOBBIT (require 'logical) ;; ;; logsleft and logsright are defined in scmhob.scm ;; Comment out when compiling with HOBBIT (define (logsleft x y) (ash x y)) (define (logsright x y) (ash x (- 0 y))) ;; ;; ;; Pocedures helping to convert 3 different Japanese character codes ;; JIS(ISO2022), Japanese EUC, Shift-JIS each other. ;; ;; ;;CHARACTERS not defined in R5RS ;; (define CHAR:ESCAPE (integer->char #X1B)) (define CHAR:RETURN (integer->char #X0D)) (define (jcccf:esc . chars) (cons CHAR:ESCAPE chars)) ;; ;; ;; JIS(ISO2022) escape sequences ;; (define jcccf:ascii (jcccf:esc #\( #\B)) ;#x1B #x28 #x42 (define jcccf:roman (jcccf:esc #\( #\J)) ;#x1B #x28 #x4a (define jcccf:x0201 (jcccf:esc #\( #\I)) ;#x1B #x28 #x49 (define jcccf:latin1 (jcccf:esc #\- #\A)) ;#x1B #x2D #x41 (define jcccf:x0208-1978 (jcccf:esc #\$ #\@)) ;#x1B #x24 #x40 (define jcccf:x0208-1983 (jcccf:esc #\$ #\B)) ;#x1B #x24 #x42 (define jcccf:x0208-1978-2 (jcccf:esc #\$ #\( #\@)) ;#x1B #x24 #x28 #x40 (define jcccf:x0208-1983-2 (jcccf:esc #\$ #\( #\B)) ;#x1B #x24 #x28 #x42 ;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;; JCCCF PROCEDURES ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define (eucj1? c) ; "returns #t if the char c is the 1st byte of euc 2-byte char, otherwise #f." (let ((c (char->integer c))) (and (>= c #x8E ) (<= c #xFE) (not (= c #x8F)) (not (= c #xA0))))) (define (eucj2? c) ; "returns #t if the char c is the 2nd byte of euc 2-byte char, otherwise #f." (let ((c (char->integer c))) (and (>= c #xA0) (<= c #xFE)))) (define (sjis1? c) ; "returns #t if the char c is the 1st byte of shift-jis 2-byte char, otherwise #f." (let ((c (char->integer c))) (or (and (>= c #x81) (<= c #x9F)) (and (>= c #xE0) (<= c #xFC)) ))) (define (sjis2? c) ; "returns #t if the char c is the 2nd byte of shift-jis 2-byte char, otherwise #f." (let ((c (char->integer c))) (and (>= c #x40) (<= c #xFC) (not (= c #x7F))))) (define (hankana? c) ; "returns #t if the char is hankaku character." (let ((c (char->integer c))) (and (>= c #xA0) (<= c #xDF)))) (define (upper-byte w) ; "Returns upper byte of the 2 bytes integer w." (logand (logsright w 8) #xFF)) (define (lower-byte w) ; "Returns lower byte of the 2 bytes integer w." (logand w #xFF)) (define (byte->word uw lw) ; "Returns 2 bytes integer whose upper char code is uw and lower char code lw." (logior (logsleft (char->integer uw) 8) (char->integer lw))) ;; ;; ;; HANKAKU to ZENKAKU ;; han->zen returns a list of ZENKAKU characters (define (han->zen s to-code) ; "Returns 2 byte ZENKAKU character list in to-code, corresponding to hankaku character s. s is modified." ;; hankaku-character constants (define hankana-s '#(#xA7 #xB1 #xA8 #xB2 #xA9 #xB3 #xAA #xB4 #xAB #xB5 #xB6 #xB6DE #xB7 #xB7DE #xB8 #xB8DE #xB9 #xB9DE #xBA #xBADE #xBB #xBBDE #xBC #xBCDE #xBD #xBDDE #xBE #xBEDE #xBF #xBFDE #xC0 #xC0DE #xC1 #xC1DE #xAF #xC2 #xC2DE #xC3 #xC3DE #xC4 #xC4DE #xC5 #xC6 #xC7 #xC8 #xC9 #xCA #xCADE #xCADF #xCB #xCBDE #xCBDF #xCC #xCCDE #xCCDF #xCD #xCDDE #xCDDF #xCE #xCEDE #xCEDF #xCF #xD0 #x20 #xD1 #xD2 #xD3 #xAC #xD4 #xAD #xD5 #xAE #xD6 #xD7 #xD8 #xD9 #xDA #xDB #x20 #xDC #xB2 #xB4 #xA6 #xDD #xB3DE)) (define hankana-j '#(#xA7 #xB1 #xA8 #xB2 #xA9 #xB3 #xAA #xB4 #xAB #xB5 #xB6 #xB6DE #xB7 #xB7DE #xB8 #xB8DE #xB9 #xB9DE #xBA #xBADE #xBB #xBBDE #xBC #xBCDE #xBD #xBDDE #xBE #xBEDE #xBF #xBFDE #xC0 #xC0DE #xC1 #xC1DE #xAF #xC2 #xC2DE #xC3 #xC3DE #xC4 #xC4DE #xC5 #xC6 #xC7 #xC8 #xC9 #xCA #xCADE #xCADF #xCB #xCBDE #xCBDF #xCC #xCCDE #xCCDF #xCD #xCDDE #xCDDF #xCE #xCEDE #xCEDF #xCF #xD0 #xD1 #xD2 #xD3 #xAC #xD4 #xAD #xD5 #xAE #xD6 #xD7 #xD8 #xD9 #xDA #xDB #x20 #xDC #xB2 #xB4 #xA6 #xDD #xB3DE)) (let* ( (zen 0) (hankana (if (eq? to-code 'sjis) hankana-s hankana-j)) (zenkana-a (case to-code ((jis) #x2521) ((sjis) #x8340) ((eucj) #xA5A1))) (prefix (case to-code ((jis) (integer->char #x21)) ((sjis) (integer->char #x81)) ((eucj) (integer->char #xA1)))) (c (cond ((= 2 (length s))(byte->word (car s)(cadr s))) (else (char->integer (car s))))) ) (case c ;;characters other than katakana but can appear in its context ((#xA1) ;。 (set-car! s prefix) (set-cdr! s (list (case to-code ((jis) (integer->char #x23)) ((sjis) (integer->char #x42)) ((eucj) (integer->char #xa3))))) s ) ((#xA2) ;「 (set-car! s prefix) (set-cdr! s (list (case to-code ((jis)(integer->char #x56)) ((sjis)(integer->char #x75)) ((eucj)(integer->char #xD6))))) s ) ((#xA3) ;」 (set-car! s prefix) (set-cdr! s (list (case to-code ((jis)(integer->char #x57)) ((sjis)(integer->char #x76)) ((eucj)(integer->char #xD7))))) s ) ((#xA4) ;、 (set-car! s prefix) (set-cdr! s (list (case to-code ((jis)(integer->char #x21)) ((sjis)(integer->char #x41)) ((eucj)(integer->char #xA2))))) s ) ((#xA5) ;・ (set-car! s prefix) (set-cdr! s (list (case to-code ((jis)(integer->char #x26)) ((sjis)(integer->char #x45)) ((eucj)(integer->char #xA6))))) s ) ((#xB0) ;ー (set-car! s prefix) (set-cdr! s (list (case to-code ((jis)(integer->char #x3c)) ((sjis)(integer->char #x5B)) ((eucj)(integer->char #xBC))))) s ) ((#x2D) ;ー (set-car! s prefix) (set-cdr! s (list (case to-code ((jis)(integer->char #x5D)) ((sjis)(integer->char #x5C)) ((eucj)(integer->char #xDD))))) s ) ((#xDE) ;゛ (set-car! s prefix) (set-cdr! s (list (case to-code ((jis)(integer->char #x2B)) ((sjis)(integer->char #x4A)) ((eucj)(integer->char #xAB))))) s ) ((#xDF) ;゜ (set-car! s prefix) (set-cdr! s (list (case to-code ((jis)(integer->char #x2C)) ((sjis)(integer->char #x4B)) ((eucj)(integer->char #xAC))))) s ) (else ;; hankaku (do ((i 0 (+ i 1))) ((or (>= i (vector-length hankana))(= c (vector-ref hankana i))) (cond ((>= i (vector-length hankana)) (set-car! s #\?) (set-cdr! s (list #\?)) s ;(slib:error "No corresponding chraacter exists!" c); for debug ) (else (set! zen (+ zenkana-a i)) (set-car! s (integer->char (upper-byte zen))) (set-cdr! s (list (integer->char (lower-byte zen)))) s ))) ) ;do-jis-case ) ;case-else ) ;case ) ;let ) ;define ; ; JIS to SJS (define (jis->sjis s) ; "Converts jis character list s into shift-jis character list and returns it." (let* ( (cu (char->integer (car s ))) (cl (if (null? (cdr s)) '() (char->integer (cadr s)))) ) (cond ((null? cl) (if (<= cu #x7E) s (han->zen s 'sjis))) ((and (hankana? (car s )) (or (eq? cl #xDE)(eq? cl #xDF))) (han->zen s 'sjis)) (else (if (logbit? 0 cu) (if (< cl #x60) (set! cl (+ cl #x1f)) (set! cl (+ cl #x20))) (set! cl (+ cl #x7e))) (if (< cu #x5f) (set! cu (logsright (+ cu #xe1) 1)) (set! cu (logsright (+ cu #x161) 1))) (set-car! s (integer->char cu)) (set-cdr! s (list (integer->char cl))) s );else ))) ;Shift JIS to JIS (define (sjis->jis s) ; "Converts shift-jis char list s to jis char list and returns it." (let* ( (cu (char->integer (car s ))) (cl (if (null? (cdr s)) '() (char->integer (cadr s)))) ) (cond ((null? cl) (if (<= cu #x7E) s (han->zen s 'jis))) ((and (hankana? (car s)) (or (eq? cl #xDE)(eq? cl #xDF))) (han->zen s 'jis)) (else (if (< cu #xA0) (set! cu (- cu #x70)) (if (< cu #xF0) (set! cu (- cu #xB0)))) (if (>= cl #x80) (set! cl (- cl 1))) (set! cu (logsleft cu 1)) (if (>= cl #x9E) (set! cl (- cl #x5E)) (set! cu (- cu 1))) (set! cl (- cl #x1F)) (set-car! s (integer->char cu)) (set-cdr! s (list (integer->char cl))) s )))) ;; ;;JIS to EUC (define (jis->eucj s) ; "Converts s from jis to Japanese euc and returns it." (let* ( (cu (char->integer (car s))) (cl (if (null? (cdr s)) '() (char->integer (cadr s)))) (c (if (null? cl) (char->integer (car s)) (byte->word (car s)(cadr s)))) ) (cond ((null? cl) (if (<= cu #x7E) s (han->zen s 'eucj))) ((and (hankana? (car s)) (or (= cl #xDE)(= cl #xDF))) (han->zen s 'eucj)) (else (set! c (logior c #x8080)) (set-car! s (integer->char (upper-byte c))) (set-cdr! s (list (integer->char (lower-byte c)))) s )))) ;; ;; EUC to JIS (define (eucj->jis s) ; "Converts max 4 bytes Japanese euc character list to jis and returns them." (cond ((= 4 (length s)) (set-car! s (cadr s)) (set-cdr! s (cdddr s)) (han->zen s 'jis)) ((= #x8E (char->integer (car s))) (set-car! s (cadr s)) (set-cdr! s '()) (han->zen s 'jis)) ((= 1 (length s)) s) (else (let ((cu (char->integer (car s))) (cl (char->integer (cadr s)))) (set-car! s (integer->char (logand cu #x7F))) (set-cdr! s (list (integer->char (logand cl #x7F)))) s )))) ;; ;; ;;SJIS to EUC (define (sjis->eucj s) ; "Converts sjis character list to Japanese euc and returns it." (cond ((hankana? (car s)) (han->zen s 'eucj)) ((null? (cdr s)) s) (else (sjis->jis s) (let ((c1 (char->integer (car s))) (c2 (char->integer (cadr s)))) (set! c1 (logior #x80 c1)) (set! c2 (logior #x80 c2)) (set-car! s (integer->char c1)) (set-cdr! s (list (integer->char c2))) s );let );else )) (define (eucj->sjis s) ; "Converts Japanese euc character to sjis and returns it" (cond ((= 4 (length s)) (set-car! s (cadr s)) (set-cdr! s (cdddr s)) (han->zen s 'sjis)) ((= #x8E (char->integer (car s))) (set-car! s (cadr s)) (set-cdr! s '()) (han->zen s 'sjis)) ((= 1 (length s)) s) (else (jis->sjis (eucj->jis s))))) (define (jcccf:convert s from-code to-code prev-sequence cur-sequence add-cr zen2han?) ; "Converts character list in from-code into to-code and returns it." (define alpun-e '#( #xa1a1 #xa1a4 #xa1a5 #xa1a7 #xa1a8 #xa1a9 #xa1aa #xa1ab ;; ,.:;?!"^ #xa1b0 #xa1b1 #xa1b2 #xa1bd #xa1bf #xa1c3 #xa1c6 #xa1c7 ;;~_-/|`' #xa1ca #xa1cb #xa1ce #xa1cf #xa1d0 #xa1d1 #xa1d2 #xa1d3 ;;()[]{}<> #xa1dc #xa1dd #xa1e1 #xa1e3 #xa1e4 #xa1f0 #xa1f3 #xa1f4 ;;+-=<>$%# #xa1f5 #xa1f6 #xa1f7 ;;&*@ #xa3b0 #xa3b1 #xa3b2 #xa3b3 #xa3b4 #xa3b5 #xa3b6 #xa3b7 ;0..9A.. #xa3b8 #xa3b9 #xa3c1 #xa3c2 #xa3c3 #xa3c4 #xa3c5 #xa3c6 ;Za..z #xa3c7 #xa3c8 #xa3c9 #xa3ca #xa3cb #xa3cc #xa3cd #xa3ce #xa3cf #xa3d0 #xa3d1 #xa3d2 #xa3d3 #xa3d4 #xa3d5 #xa3d6 #xa3d7 #xa3d8 #xa3d9 #xa3da #xa3e1 #xa3e2 #xa3e3 #xa3e4 #xa3e5 #xa3e6 #xa3e7 #xa3e8 #xa3e9 #xa3ea #xa3eb #xa3ec #xa3ed #xa3ee #xa3ef #xa3f0 #xa3f1 #xa3f2 #xa3f3 #xa3f4 #xa3f5 #xa3f6 #xa3f7 #xa3f8 #xa3f9 #xa3fa )) (define alpun-j '#( #x2121 #x2124 #x2125 #x2127 #x2128 #x2129 #x212a #x212b #x2130 #x2131 #x2132 #x213d #x213f #x2143 #x2146 #x2147 #x214a #x214b #x214e #x214f #x2150 #x2151 #x2152 #x2153 #x215c #x215d #x2161 #x2163 #x2164 #x2170 #x2173 #x2174 #x2175 #x2176 #x2177 #x2330 #x2331 #x2332 #x2333 #x2334 #x2335 #x2336 #x2337 #x2338 #x2339 #x2341 #x2342 #x2343 #x2344 #x2345 #x2346 #x2347 #x2348 #x2349 #x234a #x234b #x234c #x234d #x234e #x234f #x2350 #x2351 #x2352 #x2353 #x2354 #x2355 #x2356 #x2357 #x2358 #x2359 #x235a #x2361 #x2362 #x2363 #x2364 #x2365 #x2366 #x2367 #x2368 #x2369 #x236a #x236b #x236c #x236d #x236e #x236f #x2370 #x2371 #x2372 #x2373 #x2374 #x2375 #x2376 #x2377 #x2378 #x2379 #x237a )) (define alpun-s '#( #x8140 #x8143 #x8144 #x8146 #x8147 #x8148 #x8149 #x814a #x814f #x8150 #x8151 #x815c #x815e #x8162 #x8165 #x8166 #x8169 #x816a #x816d #x816e #x816f #x8170 #x8171 #x8172 #x817b #x817c #x8181 #x8183 #x8184 #x8190 #x8193 #x8194 #x8195 #x8196 #x8197 #x824f #x8250 #x8251 #x8252 #x8253 #x8254 #x8255 #x8256 #x8257 #x8258 #x8260 #x8261 #x8262 #x8263 #x8264 #x8265 #x8266 #x8267 #x8268 #x8269 #x826a #x826b #x826c #x826d #x826e #x826f #x8270 #x8271 #x8272 #x8273 #x8274 #x8275 #x8276 #x8277 #x8278 #x8279 #x8281 #x8282 #x8283 #x8284 #x8285 #x8286 #x8287 #x8288 #x8289 #x828a #x828b #x828c #x828d #x828e #x828f #x8290 #x8291 #x8292 #x8293 #x8294 #x8295 #x8296 #x8297 #x8298 #x8299 #x829a )) (define han-char '#( #\space #\, #\. #\: #\; #\? #\! #\" #\^ #\~ #\_ #\- #\/ #\| #\` #\' #\( #\) #\[ #\] #\{ #\} #\< #\> #\+ #\- #\= #\< #\> #\$ #\% #\# #\& #\* #\@ #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\A #\B #\C #\D #\E #\F #\G #\H #\I #\J #\K #\L #\M #\N #\O #\P #\Q #\R #\S #\T #\U #\V #\W #\X #\Y #\Z #\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m #\n #\o #\p #\q #\r #\s #\t #\u #\v #\w #\x #\y #\z )) (define (zen->han char-list char-vector cur-sequence prev-sequence code) (cond ((null? (cdr char-list)) (if (not (equal? cur-sequence jcccf:ascii)) (jcccf:set-sequence! cur-sequence prev-sequence jcccf:ascii) char-list)) (else (let ((n (byte->word (car char-list) (cadr char-list))) (mid #f)) (let loop ((low 0) (high (- (vector-length char-vector) 1))) (set! mid (quotient (+ low high) 2)) (cond ((> low high) char-list) ((< n (vector-ref char-vector mid)) (loop low (- mid 1))) ((> n (vector-ref char-vector mid)) (loop (+ mid 1) high)) (else (let ((new-char (vector-ref han-char mid))) (set-car! char-list new-char) (set-cdr! char-list ()) (if (not (equal? cur-sequence jcccf:ascii)) (jcccf:set-sequence! cur-sequence prev-sequence jcccf:ascii)) char-list)))))))) (cond ((null? s) s) (else (if zen2han? (zen->han s (case from-code ((jis) alpun-j) ((sjis) alpun-s) ((eucj) alpun-e)) cur-sequence prev-sequence from-code)) (case from-code ((jis) (case to-code ((jis) (if (equal? prev-sequence cur-sequence) s (let ((tmp (append cur-sequence s ()))) (set-car! s (car tmp)) (set-cdr! s (cdr tmp)) s))) ((sjis) (if (and add-cr (= 1 (length s)) (char=? #\newline (car s))) (begin (set-car! s CHAR:RETURN) (set-cdr! s (list #\newline)) s) (jis->sjis s))) ((eucj) (jis->eucj s)))) ((sjis) (case to-code ((sjis) s) ((jis) (if (equal? prev-sequence cur-sequence) (sjis->jis s) (let ((tmp (append cur-sequence (sjis->jis s) ()))) (set-car! s (car tmp)) (set-cdr! s (cdr tmp)) s ))) ((eucj) (sjis->eucj s)))) ((eucj) (case to-code ((eucj) s) ((jis) (if (equal? prev-sequence cur-sequence) (eucj->jis s) (let ((tmp (append cur-sequence (eucj->jis s) ()))) (set-car! s (car tmp)) (set-cdr! s (cdr tmp)) s ))) ((sjis) (if (and add-cr (= 1 (length s)) (char=? #\newline (car s))) (begin (set-car! s CHAR:RETURN) (set-cdr! s (list #\newline)) s) (eucj->sjis s))))) (else s))))) ;; ;; Writes character list s to the port ;; (define (jcccf:write-list s port) ; "Writes character list s to port." (do ((char-list s (cdr char-list))) ((null? char-list)) (write-char (car char-list) port))) (define (judge-file input . c-length) ; "Judges the code of input reading upto c-length characters if it exists." (let ( (inport (if (string? input) (open-input-file input) input)) (check-length (if (or (null? c-length)(not (number? (car c-length)))) 5000 (inexact->exact (car c-length)))) (jis-char 0) (sjis-char 0) (euc-char 0) (bin-char 0) (ascii-char 0) ) (let loop ( (s (list (read-char inport))) (chars 0) (done #f) ) (cond ((or (eof-object? (list-ref s (- (length s) 1))) (>= chars check-length) done) (close-input-port inport) (cond ((> bin-char 20) (string->symbol "binary")) ((positive? jis-char) (string->symbol "jis")) ((> euc-char sjis-char) (string->symbol "eucj")) ((> sjis-char euc-char) (string->symbol "sjis")) (else (string->symbol "ascii")))) ((memq (char->integer (car s)) '(#x00 #x01 #x02 #x03 #x04 #x05 #x06 #x07)) (set! bin-char (+ bin-char 1)) (if (> bin-char 20) (loop s chars #t) (loop (list (read-char inport)) (+ chars 1) #f))) ((char=? (car s) CHAR:ESCAPE);; is it JIS escape sequence? (if (null? (cdr s)) (begin (set-cdr! s (list (read-char inport))) (loop s (+ chars 1) #f)) (case (cadr s) ((#\() (if (null? (cddr s)) (begin (set-cdr! (cdr s) (list (read-char inport))) (loop s (+ chars 1) #f)) (case (caddr s) ((#\B #\J #\I) (set! jis-char (+ jis-char 1)) (loop s chars #t)) (else ;; no, rewind file (set-car! s (read-char inport)) (set-cdr! s '()) (loop s (+ chars 1) #f))))) ((#\-) (if (null? (cddr s)) (begin (set-cdr! (cdr s) (list (read-char inport))) (loop s (+ chars 1) #f)) (case (caddr s) ((#\A) (set! jis-char (+ jis-char 1)) (loop s chars #t)) (else ;; no, rewind file (set-car! s (read-char inport)) (set-cdr! s '()) (loop s (+ chars 1) #f))))) ((#\$) (if (null? (cddr s)) (begin (set-cdr! (cdr s) (list (read-char inport))) (loop s (+ chars 1) #f)) (case (caddr s) ((#\() (if (null? (cdddr s)) (begin (set-cdr! (cddr s) (list (read-char inport))) (loop s (+ chars 1) #f)) (case (cadddr s) ((#\@ #\B) (set! jis-char (+ jis-char 1)) (loop s chars #t)) (else ;; no, rewind file (set-car! s (read-char inport)) (set-cdr! s '()) (loop s (+ chars 1) #f))))) ((#\@ #\B) (set! jis-char (+ jis-char 1)) (loop s chars #t)) (else ;; no, rewind file (set-car! s (read-char inport)) (set-cdr! s '()) (loop s (+ chars 1) #f))))) (else (set-car! s (read-char inport)) (set-cdr! s '()) (loop s (+ chars 1) #f)) );case ) );#x1b ((or (eucj1? (car s))(sjis1? (car s))) (if (null? (cdr s));;is it really the 1st byte? (begin (set-cdr! s (list (read-char inport))) (loop s (+ chars 1) #f)) (cond ((and (eucj1? (car s))(eucj2? (cadr s))) (set! euc-char (+ euc-char 1)) (set-car! s (read-char inport)) (set-cdr! s '()) (loop s (+ chars 1) #f)) ((and (sjis1? (car s))(sjis2? (cadr s))) (set! sjis-char (+ sjis-char 1)) (set-car! s (read-char inport)) (set-cdr! s '()) (loop s (+ chars 1) #f)) (else ;; no, go ahead (set! ascii-char (+ ascii-char 2)) (set-car! s (read-char inport)) (set-cdr! s '()) (loop s (+ chars 1) #f)) );cond );if ); ((char<=? (car s) #\~) (set! ascii-char (+ ascii-char 1)) (set-car! s (read-char inport)) (set-cdr! s '()) (loop s (+ chars 1) #f)) (else (set! bin-char (+ bin-char 1)) (set-car! s (read-char inport)) (set-cdr! s '()) (loop s (+ chars 1) #f)) );cond );let loop );let );judge-file ;; ;jcccf:set-sequence! ; (define (jcccf:set-sequence! cur-sequence prev-sequence sequence) (set-car! prev-sequence (car cur-sequence)) (set-cdr! prev-sequence (cdr cur-sequence)) (set-car! cur-sequence (car sequence)) (set-cdr! cur-sequence (cdr sequence))) ;; ;; cv-file ;; ;;(define (cv-file input output from-code to-code remove-cr add-cr check-length zen2han?) (define (cv-file input output from-code to-code . options) ; "Writes an entire file to outport, conversion made into to-code , the original file being written in from-code. Options provided for adding or removing cr character if Microsoft JIS is involved in the conversion." (let* ( (remove-cr (or (null? options) ;; default is #t (and (boolean? (car options)) (car options)))) (add-cr (and (not (null? options)) ;; default is #f (not (null? (cdr options))) (boolean? (cadr options)) (cadr options))) (check-length (or (null? options) ;; default is 5000 (null? (cdr options)) (null? (cddr options)) (caddr options))) (zen2han? (and (not (null? options)) ;; default is #f (not (null? (cdr options))) (not (null? (cddr options))) (not (null? (cdddr options))) (boolean? (cadddr options)) (cadddr options))) (from-code (if (or (null? from-code) (not from-code)) (judge-file input check-length) from-code)) (to-code (if (or (null? to-code) (not to-code)) 'eucj to-code)) (inport (cond ((or (null? input)(not input)) (current-input-port)) ((string? input)(open-input-file input)))) (outport (cond ((or (null? output)(not output))(current-output-port)) ((string? output)(open-output-file output))))) (jfilter:cv inport outport from-code to-code remove-cr add-cr zen2han?) (if (string? input)(close-input-port inport)) (if (string? output)(close-output-port outport)))) ;; ;; ;;cv : body of cv-file and cv-string ;; (define (jfilter:cv inport outport from-code to-code remove-cr add-cr zen2han?) (let loop ( (s (list (read-char inport))) (cur-sequence (list (list))) (prev-sequence (list (list))) ) (cond ((eof-object? (list-ref s (- (length s) 1))) (if (= 1 (length s))(set! s '()) (do ((lst s (cdr lst))) ((eof-object? (cadr lst)) (set-cdr! lst '())))) (jcccf:write-list (jcccf:convert s from-code to-code prev-sequence cur-sequence add-cr zen2han? ) outport) (if (and (eq? 'jis to-code) (not (or (equal? cur-sequence jcccf:ascii) (equal? cur-sequence jcccf:roman)))) (jcccf:write-list jcccf:ascii outport)) ; (jcccf:write-list (append jcccf:ascii (list #\newline)) outport)) ); eof-object? (else (case from-code ((jis) (cond ((char=? (car s) CHAR:ESCAPE) ;;Is it escape sequence? (if (null? (cdr s)) (begin (set-cdr! s (list (read-char inport))) (loop s cur-sequence prev-sequence)) (case (cadr s) ((#\() (if (null? (cddr s)) (begin (set-cdr! (cdr s) (list (read-char inport))) (loop s cur-sequence prev-sequence)) (case (caddr s) ((#\B #\J #\I) (jcccf:set-sequence! cur-sequence prev-sequence (case (caddr s) ((#\B) jcccf:ascii) ((#\J) jcccf:roman) ((#\I) jcccf:x0201) );case );jcccf:set-sequence! (set-car! s (read-char inport)) (set-cdr! s '()) (loop s cur-sequence prev-sequence) ) (else (slib:error "Sequence not supported!" s) );else );case );if );#\open-paren ((#\-) (if (null? (cddr s)) (begin (set-cdr! (cdr s) (list (read-char inport))) (loop s cur-sequence prev-sequence)) (case (caddr s) ((#\A) (jcccf:set-sequence! cur-sequence prev-sequence jcccf:latin1) (set-car! s (read-char inport)) (set-cdr! s '()) (loop s cur-sequence prev-sequence)) (else (slib:error "Sequence not supported!" s) );else );case );if );#\- ((#\$) (if (null? (cddr s)) (begin (set-cdr! (cdr s) (list (read-char inport))) (loop s cur-sequence prev-sequence)) (case (caddr s) ((#\() (if (null? (cdddr s)) (begin (set-cdr! (cddr s) (list (read-char inport))) (loop s cur-sequence prev-sequence)) (case (cadddr s) ((#\@ #\B) (jcccf:set-sequence! cur-sequence prev-sequence (case (cadddr s) ((#\@) jcccf:x0208-1978-2) ((#\B) jcccf:x0208-1983-2) );case );jcccf:set-sequence! (set-car! s (read-char inport)) (set-cdr! s '()) (loop s cur-sequence prev-sequence) ) (else (slib:error "Sequence not supported!" s) );else );case );if );#\( (else (case (caddr s) ((#\@ #\B) (jcccf:set-sequence! cur-sequence prev-sequence (case (caddr s) ((#\@) jcccf:x0208-1978) ((#\B) jcccf:x0208-1983) );case );jcccf:set-sequence! (set-car! s (read-char inport)) (set-cdr! s '()) (loop s cur-sequence prev-sequence) ) (else (slib:error "Sequence not supported!" s) );else );case );else );case );if );#\$ (else (slib:error "Sequence not supported!" s) );else );case );if );CHAR:ESCAPE (else ; not CHAR:ESCAPE (cond ( (or (equal? cur-sequence jcccf:x0208-1978) (equal? cur-sequence jcccf:x0208-1983) (equal? cur-sequence jcccf:x0208-1978-2) (equal? cur-sequence jcccf:x0208-1983-2)) (if (null? (cdr s)) (begin (set-cdr! s (list (read-char inport))) (loop s cur-sequence prev-sequence)) (begin (jcccf:write-list (jcccf:convert s from-code to-code prev-sequence cur-sequence add-cr zen2han? ) outport ) (jcccf:set-sequence! cur-sequence prev-sequence cur-sequence) (set-car! s (read-char inport)) (set-cdr! s '()) (loop s cur-sequence prev-sequence) );begin );if );or (else ;; ascii (jcccf:write-list (jcccf:convert s from-code to-code prev-sequence cur-sequence add-cr zen2han? ) outport ) (jcccf:set-sequence! cur-sequence prev-sequence cur-sequence) (set-car! s (read-char inport)) (set-cdr! s '()) (loop s cur-sequence prev-sequence) );else );cond );else ; not CHAR:ESCAPE );case );(jis) ((sjis) (cond ((hankana? (car s)) (if (null? (cdr s)) (begin (set-cdr! s (list (read-char inport))) (loop s cur-sequence prev-sequence)) (cond ;sonant mark for voiced consonant ((or (= (char->integer (cadr s)) #xDE) (= (char->integer (cadr s)) #xDF)) (jcccf:set-sequence! cur-sequence prev-sequence jcccf:x0208-1983) (jcccf:write-list (jcccf:convert s from-code to-code prev-sequence cur-sequence add-cr zen2han? ) outport ) (set-car! s (read-char inport)) (set-cdr! s '()) (loop s cur-sequence prev-sequence) );or (else ;;not voiced consonant but HANKAKU (jcccf:set-sequence! cur-sequence prev-sequence jcccf:x0208-1983) (jcccf:write-list (jcccf:convert (list (car s)) from-code to-code prev-sequence cur-sequence add-cr zen2han? ) outport ) (set-car! s (cadr s)) (set-cdr! s '()) (loop s cur-sequence prev-sequence) );else );cond );if );hankana? ((sjis1? (car s)) ;;Is it really the 1st byte? (if (null? (cdr s)) (begin (set-cdr! s (list (read-char inport))) (loop s cur-sequence prev-sequence)) (cond ((sjis2? (cadr s)) (jcccf:set-sequence! cur-sequence prev-sequence jcccf:x0208-1983) (jcccf:write-list (jcccf:convert s from-code to-code prev-sequence cur-sequence add-cr zen2han? ) outport ) (set-car! s (read-char inport)) (set-cdr! s '()) (loop s cur-sequence prev-sequence) );sjis2? (else ;;no, it's ascii (jcccf:set-sequence! cur-sequence prev-sequence jcccf:ascii) (jcccf:write-list (jcccf:convert (list (car s)) from-code to-code prev-sequence cur-sequence add-cr zen2han? ) outport ) (set-car! s (cadr s)) (set-cdr! s '()) (loop s cur-sequence prev-sequence) );else );cond );if );sjis1? (else ;;ascii (cond ((and remove-cr (char=? (car s) CHAR:RETURN)) (set-car! s (read-char inport)) (set-cdr! s '()) (loop s cur-sequence prev-sequence)) (else (jcccf:set-sequence! cur-sequence prev-sequence jcccf:ascii) (jcccf:write-list (jcccf:convert s from-code to-code prev-sequence cur-sequence add-cr zen2han? ) outport ) (set-car! s (read-char inport)) (set-cdr! s '()) (loop s cur-sequence prev-sequence)) );cond );else );cond );sjis ((eucj) (cond ((= (char->integer (car s)) #x8E) ;;HANKAKU katakana (if (null? (cdr s)) (begin (set-cdr! s (list (read-char inport))) (loop s cur-sequence prev-sequence)) (cond ((hankana? (cadr s)) (if (null? (cddr s)) (begin (set-cdr! (cdr s) (list (read-char inport))) (loop s cur-sequence prev-sequence)) (cond ((= (char->integer (caddr s)) #x8E) (if (null? (cdddr s)) (begin (set-cdr! (cddr s) (list (read-char inport))) (loop s cur-sequence prev-sequence)) (case (char->integer (cadddr s)) ((#xDE #xDF) ;; and it is a voiced consonant (jcccf:set-sequence! cur-sequence prev-sequence jcccf:x0208-1983) (jcccf:write-list (jcccf:convert s from-code to-code prev-sequence cur-sequence add-cr zen2han? ) outport ) (set-car! s (read-char inport)) (set-cdr! s '()) (loop s cur-sequence prev-sequence)) (else ;;no voiced consonant. (jcccf:set-sequence! cur-sequence prev-sequence jcccf:x0208-1983) (jcccf:write-list (jcccf:convert (list (car s)(cadr s)) from-code to-code prev-sequence cur-sequence add-cr zen2han? ) outport ) (set-car! s (caddr s)) (set-cdr! s (cdddr s)) (loop s cur-sequence prev-sequence)) );case );if );#x8E (else ;not #x8E (jcccf:set-sequence! cur-sequence prev-sequence jcccf:x0208-1983) (jcccf:write-list (jcccf:convert (list (car s)(cadr s)) from-code to-code prev-sequence cur-sequence add-cr zen2han? ) outport ) (set-car! s (caddr s)) (set-cdr! s '()) (loop s cur-sequence prev-sequence)) );cond ); if );hankana? (else ;ascii (jcccf:set-sequence! cur-sequence prev-sequence jcccf:ascii) (jcccf:write-list (jcccf:convert (list (car s)) from-code to-code prev-sequence cur-sequence add-cr zen2han? ) outport ) (set-car! s (cadr s)) (set-cdr! s '()) (loop s cur-sequence prev-sequence)) );cond );if );#x8e ((eucj1? (car s)) ;; is it really 1st byte? (if (null? (cdr s)) (begin (set-cdr! s (list (read-char inport))) (loop s cur-sequence prev-sequence)) (cond ((eucj2? (cadr s)) ;; yes (jcccf:set-sequence! cur-sequence prev-sequence jcccf:x0208-1983) (jcccf:write-list (jcccf:convert s from-code to-code prev-sequence cur-sequence add-cr zen2han? ) outport ) (set-car! s (read-char inport)) (set-cdr! s '()) (loop s cur-sequence prev-sequence)) (else ;; no, (jcccf:set-sequence! cur-sequence prev-sequence jcccf:ascii) (jcccf:write-list (jcccf:convert (list (car s)) from-code to-code prev-sequence cur-sequence add-cr zen2han? ) outport ) (set-car! s (cadr s)) (set-cdr! s ()) (loop s cur-sequence prev-sequence)) );cond );if );eucj1? (else ;; ascii (jcccf:set-sequence! cur-sequence prev-sequence jcccf:ascii) (jcccf:write-list (jcccf:convert s from-code to-code prev-sequence cur-sequence add-cr zen2han? ) outport ) (set-car! s (read-char inport)) (set-cdr! s '()) (loop s cur-sequence prev-sequence)) );cond );euc (else ;; ascii (jcccf:set-sequence! cur-sequence prev-sequence jcccf:ascii) (jcccf:write-list (jcccf:convert s from-code to-code prev-sequence cur-sequence add-cr zen2han? ) outport ) (set-car! s (read-char inport)) (set-cdr! s '()) (loop s cur-sequence prev-sequence))))))) ;; ;; cv-string : converts Scheme string s from from-code to to-code ;; and returns the converted string. ;; The bridge between file and string for cv-string ;; comment out when compiling with hobbit (require 'string-port) ;; ;; comment out the following procedure when compiling with HOBBIT ;; and execute it by SCM interpreter ;; (define (cv-string s from-code to-code) (call-with-output-string (lambda (outport) (call-with-input-string s (lambda (inport) (jfilter:cv inport outport from-code to-code #t #f)))))) (provide 'jfilter)