;;; cl-io.el --- Common Lisp style buffered file/stream input/output for Emacs Lisp ;; Copyright (C) 2010 Free Software Foundation, Inc. ;; Author: Devon Sean McCullough ;; Version: 0 ;; DOCS ;; Common Lispy OPEN, CLOSE, WITH-OPEN-STREAM, WITH-OPEN-FILE, ;; and READ-LINE with buffered i/o to support huge files. ;; Works with READ and similar existing operations. ;; Tested with Emacs versions 21, 22 and 23. ;; If you have an earlier Emacs, ;; please report what the *Messages* ;; buffer says when you byte compile this file. ;; BUGS ;; Expect an incompatible change with support for character i/o as well as literal. ;; Maybe should inhibit find-file-hook and so on? ;; Untested with remote file systems. ;; TODO ;; ;; support :direction 'output, might need ;; kill-buffer-hook ;; buffer-save-without-query ;; write-file-functions ;; ;; Process streams and emacsclient pipes ;; Maybe hack read-from-minibuffer keymap so user can signal EoF ;; Maybe add more optional or rest or keyword args for read-from-minibuffer ;; Maybe be smart about buffer size if emacs lisp can measure available memory ;; Confirm the uselessness of the not quite documented mystery parameter recursive-p (eval-when-compile (require 'cl) (unless (fboundp 'substring-no-properties) (defalias 'substring-no-properties 'substring)) (unless (fboundp 'buffer-local-value) (defun buffer-local-value (variable buffer) (save-current-buffer (set-buffer buffer) (symbol-value variable))))) (defvar cl-buffer-default-size 32768 "Suggested file buffer size in octets. When a buffer gets bigger than this, consider shrinking it.") (defvar cl-stream-hash (make-hash-table :test 'eq :weakness 'key) "Hash of cl streams objects to identify them as such. Key is byte-compiled cl stream object, value is t. Weak key so gc removes unused streams.") (defvar cl-buffer-hash (make-hash-table :test 'eq :weakness 'value) "Map buffers to cl streams for `cl-buffer-recycle'. Key is buffer, value is byte-compiled cl stream object. Weak value so gc removes unused streams.") (add-hook 'post-gc-hook 'cl-buffer-recycle) (defun cl-buffer-recycle () "Kill the buffers of dead streams. Run by `post-gc-hook' so dead streams gone from `cl-buffer-hash' and gc disabled." (let ((inhibit-changing-match-data t)) (dolist (buffer (buffer-list)) (and (eq (buffer-local-value 'major-mode buffer) 'cl-stream-buffer) ;; (string-match "^ \\*\\(In\\|Out\\)put .*\\*\\(<[0-9]+>\\)?$" (buffer-name buffer)) (not (gethash buffer cl-buffer-hash)) (kill-buffer buffer))))) (defmacro* cl-with-open-stream ((stream object) &body body) "Bind STREAM to an already open stream OBJECT and execute BODY like `progn'. Automatically close the stream when done, regardless of normal or abnormal exit. Abnormal exit while writing a new output file deletes it, except no output stream support yet. TODO ** make sure this works like in cl **" (declare (indent defun) (debug ((symbolp form &rest [form form]) body))) (let ((abort (make-symbol "abort"))) `(let ((,stream ,object) (,abort t)) (unwind-protect (prog1 (progn .,body) (setq ,abort nil)) (when ,stream (cl-close ,stream :abort ,abort)))))) (defmacro* cl-with-open-file ((stream file . options) &body body) "Bind STREAM to a newly created stream opened to FILE with options as in `cl-open' and execute BODY like `progn'. Automatically close the file when done, regardless of normal or abnormal exit. Abnormal exit while writing a new output file deletes it, except no output stream support yet." (declare (indent defun) (debug ((symbolp form &rest [form form]) body))) (let ((abort (make-symbol "abort"))) `(let ((,stream (cl-open ,file .,options)) (,abort t)) (unwind-protect (prog1 (progn .,body) (setq ,abort nil)) (when ,stream (cl-close ,stream :abort ,abort)))))) (defun* cl-open (file &key (direction :input) (buffer-size cl-buffer-default-size) (element-type 'character) (external-format :default) (if-exists :error) (if-does-not-exist (cond ((eq direction :input) :error) ((and (memq direction '(:output :io)) (not (memq if-exists '(:overwrite :append)))) :create) ;; ((eq direction :probe) nil) (t nil)))) "Create and return a newly opened file stream to FILE. No optional keyword argument support yet except IF-DOES-NOT-EXIST nil to return nil if fail (no error) and BUFFER-SIZE n which is a debug hack. DIRECTION defaults to :input, no :output, :io nor :probe support yet. ELEMENT-TYPE defaults to character, no signed-byte, unsigned-byte nor type specfier support yet. IF-EXISTS is ignored, no :error, :new-version, :rename, :rename-and-delete, :overwrite, :append, :supersede, nor nil support yet. IF-DOES-NOT-EXIST defaults to :error, nil to return nil instead of erring, no :create support yet. Default to :error for DIRECTION :input or IF-EXISTS :overwrite or :append; default to :create for DIRECTION :output or :io, unless IF-EXISTS :overwrite or :append; default to nil for DIRECTION :probe. EXTERNAL-FORMAT defaults to :default for raw octets treated as characters. No file-coding-system support yet." (unless (and (eq direction :input) (eq element-type 'character) (eq if-does-not-exist :error) (eq external-format :default)) (error "cl-open keyword args not supported yet.")) (case direction (:input (case if-does-not-exist ((nil) (condition-case nil (cl-input-stream-open) (file-error nil))) (:error (cl-input-stream-open)) (t (error "open :direction ':input :if-does-not-exist %S clash" if-does-not-exist)))) (:probe) (:output) (:io))) (defun cl-input-stream-open () ;; Shamelessly use dynamic bindings as parameters. (defvar file) ;; (defvar element-type) ;; (defvar external-format) (defvar buffer-size) ;; Shamelessly use buffer local bindings as closure state. (defvar cl-buffer-file-name) (defvar cl-buffer-size) (defvar cl-stream-position-offset) (defvar cl-stream) ;; Shamelessly cons a lambda to close over the buffer. (with-current-buffer (get-buffer-create (generate-new-buffer-name (format " *Input %s*" (file-name-nondirectory file)))) (set (make-local-variable 'cl-buffer-file-name) (condition-case err (car (insert-file-contents-literally file nil 0 buffer-size)) (error (kill-buffer (current-buffer)) (signal (car err) (cdr err))))) (set (make-local-variable 'cl-buffer-size) buffer-size) (set (make-local-variable 'cl-stream-position-offset) 0) (setq major-mode 'cl-stream-buffer list-buffers-directory cl-buffer-file-name buffer-read-only t buffer-undo-list t) (set-buffer-modified-p nil) (let ((cl-stream (byte-compile `(lambda (&optional method &rest args) (apply #'cl-input-stream ,(current-buffer) method args))))) (puthash (current-buffer) cl-stream cl-buffer-hash) (puthash cl-stream t cl-stream-hash) cl-stream))) (defun cl-input-stream (buffer &optional method &rest args) "Implement file input stream functions closed over a BUFFER with a METHOD argument and optional ARGS which are method-specific. Documented Emacs stream function protocol: (stream) read and return a character examples show eof returning nil (stream character) unread character tradition allows no more than a single unread between reads We extend the protocol: (stream 'read-line) read and return a line or nil on eof (stream 'read-line t) read and return a line or signal an error on eof (stream 'read-line nil x) read and return a line or return x on eof (stream 'close) close the stream (stream 'close t) abort the not yet implemented output stream Future methods may include read-delimited, read-regexp, write, etc." ;; Shamelessly use current buffer locals as closure state; ;; our methods should declare method vars to placate compiler. ;; (defvar cl-buffer-size) ;; (defvar cl-stream-position-offset) (assert (eq (buffer-local-value 'major-mode buffer) 'cl-stream-buffer)) (with-current-buffer buffer (cond ((null method) ; tyi (when (eobp) (cl-input-buffer-read (1- (buffer-size)))) (if (eobp) nil (forward-char) (preceding-char))) ((integerp method) ; untyi (backward-char)) ((eq method 'close) ; close (apply #'cl-input-stream-close args)) ((eq method 'read-line) ; read-line (apply #'cl-input-stream-read-line args)) (t (error "unknown input method %S" method))))) (defun* cl-input-stream-close (&key abort) "Implement file input stream close method." (declare (ignore abort)) ; only useful for output streams (kill-buffer (current-buffer))) (defun* cl-input-stream-read-line (&optional eof-error-p eof-value) "Implement file input stream read-line method." (and (eobp) (zerop (cl-input-buffer-read (buffer-size))) (if eof-error-p (signal 'end-of-file nil) (return-from cl-input-stream-read-line eof-value))) (let* ((bol (point)) (eol (if (search-forward "\n" nil 0) 1 (while (and (plusp (cl-input-buffer-read 0)) (not (search-forward "\n" nil 0)))) (if (= (preceding-char) ?\n) 1 0))) (line (buffer-substring-no-properties bol (- (point) eol)))) (cl-input-buffer-shrink (- (point) bol)) line)) (defun cl-input-buffer-shrink (n) "Delete N bytes of old input from the input stream buffer." ;; maybe only shrink when memory-full ;; dynamic buffer local as closure state or method vars (defvar cl-stream-position-offset) (assert (< n (point))) (incf cl-stream-position-offset n) (let ((buffer-read-only nil)) (delete-region 1 (1+ n))) (set-buffer-modified-p nil)) (defun cl-input-buffer-read (n) "Read new input into the buffer after deleting N bytes of old input. Returns number of new bytes read in." ;; dynamic buffer local as closure state or method vars (defvar cl-buffer-file-name) (defvar cl-buffer-size) (defvar cl-stream-position-offset) (assert (eobp)) (cl-input-buffer-shrink n) (let ((buffer-read-only nil) (end (+ cl-stream-position-offset (buffer-size)))) (prog1 (cadr (insert-file-contents-literally cl-buffer-file-name nil end (+ end cl-buffer-size))) (set-buffer-modified-p nil)))) (defun* cl-close (stream &key abort) "Like close in Common Lisp." (if (and (byte-code-function-p stream) (gethash stream cl-stream-hash)) (funcall stream 'close :abort abort) t)) (defun* cl-read-line (&optional input-stream (eof-error-p t) eof-value recursive-p (prompt "Line: ")) "Read a line and return as a string without the newline. Optional arguments: INPUT-STREAM a stream created by `cl-open' - q.v. nil or omitted - default to `standard-input' t - interactive input by `read-from-minibuffer' a buffer - read and advance point to next line a marker - read and advance marker to next line a string - read from beginning - not very useful because there's no pointer to advance a function - call with no arguments to read a character; repeat until the function returns a newline character or signals end of file, i.e., (signal 'end-of-file nil) An input stream which signals end of file should be prepared to signal it again because it likely to be read again, especially when the last character is not newline. EOF-ERROR-P non-nil or omitted - signal error at end of file nil - return EOF-VALUE at end of file A non-empty line which ends with end of file is treated as if it ends with a newline. EOF-VALUE defaults to nil. RECURSIVE-P ignored - for Common Lisp compatibility. PROMPT defaults to \"Line: \" - for interactive input." (cond ((gethash input-stream cl-stream-hash) ; cl-io stream (funcall input-stream 'read-line eof-error-p eof-value)) ((bufferp input-stream) ; buffer (with-current-buffer input-stream (if (eobp) (if eof-error-p (signal 'end-of-file nil) eof-value) (let* ((inhibit-changing-match-data t) (start (point)) (end (search-forward "\n" nil 0))) (buffer-substring-no-properties start (- (point) (if end 1 0))))))) ((markerp input-stream) ; marker (with-current-buffer (marker-buffer input-stream) (if (>= input-stream (point-max)) (if eof-error-p (signal 'end-of-file nil) eof-value) (save-excursion (goto-char input-stream) (let* ((inhibit-changing-match-data t) (end (search-forward "\n" nil 0))) (prog1 (buffer-substring-no-properties input-stream (- (point) (if end 1 0))) (set-marker input-stream (point)))))))) ((stringp input-stream) ; string (if (string= "" input-stream) (if eof-error-p (signal 'end-of-file nil) eof-value) ;; Maybe later implement with-input-from-string (let* ((inhibit-changing-match-data t) (end (string-match "\n" input-stream))) (substring-no-properties input-stream 0 (and end (1- end)))))) ((functionp input-stream) ; function (let (cl-read-line) (condition-case err (progn (setq cl-read-line (list (funcall input-stream))) (while (/= ?\n (car cl-read-line)) (setq cl-read-line (cons (funcall input-stream) cl-read-line))) (concat (nreverse (cdr cl-read-line)))) (end-of-file (if (null cl-read-line) (if eof-error-p (signal (car err) (cdr err)) eof-value) (concat (nreverse cl-read-line))))))) ((eq input-stream t) ; t (condition-case err (read-from-minibuffer prompt) (error (if (equal err '(error "Error reading from stdin")) (if eof-error-p (signal 'end-of-file nil) eof-value) (signal (car err) (cdr err)))))) ((null input-stream) ; nil (if (null standard-input) (error "null standard-input") (cl-read-line standard-input eof-error-p eof-value recursive-p prompt))) (t (error "invalid stream %S" input-stream)))) (dolist (alias '((open cl-open) (close cl-close) (read-line cl-read-line) (with-open-stream cl-with-open-stream (lisp-indent-function defun) (edebug-form-spec ((symbolp form &rest [form form]) body))) (with-open-file cl-with-open-file (lisp-indent-function defun) (edebug-form-spec ((symbolp form &rest [form form]) body))))) (let ((new (car alias)) (old (cadr alias)) (plist (cddr alias))) (defalias new old) ;; pre emacs 23 defmacro* declare compatibility (dolist (pair plist) (let ((name (car pair)) (value (cadr pair))) (put old name value) (put new name value))))) (eval-when-compile (dont-compile (or (boundp 'cl-self-test) (message "Self Test cl-io %s" (condition-case err (let ((cl-self-test t) (load-path (cons "." load-path))) (load "cl-io.el") (with-open-file (i "cl-io.el") (assert (and (string-match "^;;; cl-io.el\\>" (read-line i)) (eq 'eval-when-compile (car (read i)))))) "PASS") (error (format "FAIL %S" err))))))) (provide 'cl-io) ;;; end cl-io.el