;;; disassemble.lisp - disassemble + arrows for jumps ;;; Copyright (c) 2016 Devon Sean McCullough ;;; Licensed under the GNU GPL ;;; Example: (disassemble* #'-) ;;; TO DO: ;;; ASDFize ;;; Horizontal lines too short, should meet end of original line. ;;; CCL address comments seem inconsistent with labels and not lined up? ;;; SBCL jumps to unconditional jumps, maybe re-route to final target? ;;; Fix comment/topology clashes. ;;; Fix inbound/outbound arc clashes. ;;; Visually break up eye-boggling multi-lane busses of long arcs. (eval-when (:compile-toplevel :load-toplevel :execute) (require '#:blocks) (require '#:cl-ppcre)) (in-package :blocks) (shadow '#:disassemble) (export 'disassemble) (defconstant +newline-as-string+ (make-string 1 :initial-element #\NewLine) "A newline as a string.") (defun disassemble (function) "Like `disassemble' decorated with jump arrows." ;; TEXT is the `disassemble' output as a string. ;; LINES is the text as a list of strings. ;; TEXT-WIDTH & TEXT-HEIGHT are the text bounding box size in characters. ;; DECORATION-LEFT & DECORATION-RIGHT bound the arrow-drawing space between the code and comment columns. ;; CODE is a list of expressions parsed from the text. ;; Each expression is a plist of :tag, :op, :source-text & :source-y where ;; :tag is the optional label as a symbol ;; :op is the machine instruction as a list ;; :source-text is the line(s) as a string ;; :source-y is the leading line number, counting from zero. ;; TAGS, DEFS, REFS & COLUMNS are a sequence of 4-tuples ;; represented as four individual sequences: ;; TAGS is a list of jump target labels as symbols. ;; DEFS is a list of the line numbers which define the tags. ;; REFS is a list of REF elements as lists; ;; each REF is a list of the line numbers, ordered least to greatest, which refer to or define the tag. ;; COLUMNS is a list of relative x-coordinates; ;; each COLUMN dedicates a vertical (│) drawing space to the tag. ;; For each tag draw a branching route of arcs (─) connecting the ref lines: ;; Draw the first arc up and left with a northeast (┐) box corner. ;; Draw any middle arcs down and left with southeast (┘) box corners, then redraw with (┤) tee joins. ;; Draw the last arc down and left with a southeast (┘) box corner. ;; Tip the defining arc with a left arrow (←) pointing at the tag definition. (let* ((text (with-output-to-string (*standard-output*) (cl:disassemble function))) (lines (cl-ppcre:split +newline-as-string+ text)) (text-width (reduce #'max lines :key #'length)) (text-height (length lines)) (code (parse-disassembly lines)) (decoration-left (pop code)) #-CCL (decoration-right (pop code)) (taggy-code (remove-if-not (lambda (expression) "Is plist EXPRESSION tagged?" (symbolp (getf expression :tag 0))) code)) (tags (mapcar #'cadr taggy-code)) (defs (mapcar (lambda (expression) (getf expression :source-y)) taggy-code)) (refs (xref tags code)) (columns (untangle refs))) (with-output-to-block (out :width (max text-width (+ decoration-left 1 (length columns))) :height text-height) (loop for column in columns for def in defs for ref in refs do (let* ((x0 decoration-left) (x (+ x0 column 1)) (y (first ref))) (move-to out x (1+ y)) (dolist (y ref) (line-to out x y) (line-to out x0 y) (princ (if (= def y) "←" "─") out) (move-to out x y)))) (move-to out 0 0) #+CCL (princ text out) #-CCL (loop for expression in code for source-text = (getf expression :source-text) for source-y = (getf expression :source-y) for comment-x = (getf expression :comment-x) do (if comment-x (progn (write-line (subseq source-text 0 decoration-left) out) (move-to out decoration-right source-y) (write-line (subseq source-text comment-x) out)) (write-line source-text out)))))) (defun xref (tags code) "Cross reference TAGS in CODE." (labels ((occurs-in-tree-p (element tree) "Does ELEMENT occur in TREE?" (or (eq element tree) (and (consp tree) (or (occurs-in-tree-p element (car tree)) (occurs-in-tree-p element (cdr tree)))))) (line-number (expression) "Get EXPRESSION source line number." (getf expression :source-y))) (mapcar (lambda (tag) (mapcar #'line-number (remove-if-not (lambda (expression) (occurs-in-tree-p tag expression)) code))) tags))) (defun untangle (refs) "Shuffle columns to avoid crossed routes." ;; TO DO: Better heuristics & metrics, maybe even exhaustive search? ;; To each TAG, dedicate a column for the route connecting its occurrences. ;; Return a column ordering which hopefully has fewer crossed routes ;; than the identity ordering, according to these simple heuristics: ;; Short routes to the left, long routes to the right. ;; Normally the +1 weight makes short routes < long routes. ;; The -4 weight makes a three-ended route < a slightly shorter two-ended route, ;; often crossed in CCL code, overriding the +1 weight in such cases. (let* ((identity (loop for i upto (1- (length refs)) collect i)) (permutation (sort (copy-list identity) #'< :key (lambda (i) "Short and bushy to the left, long and sparse to the right." (flet ((bushy (x) (* -4 (length x))) (long (x) (* +1 (- (first (last x)) (first x))))) (let ((ref (elt refs i))) (+ (bushy ref) (long ref)))))))) (mapcar (lambda (i) (position i permutation)) identity))) #+CCL (defun parse-disassembly (lines) "Parse LINES of Clozure Common Lisp compiler output. Return (DECORATION-LEFT DECORATION-RIGHT . CODE) as a dotted list." ;; [*] Kludge -- if present, :tag *must* be first, to allow NIL tags ;; Bug lurking: Should CCL disassemble output ever interpose ;; comment lines between a tag and its expression ;; this parse would rearrange those parts ;; but the CCL version ignores :source-text anyway. (list* (reduce #'max lines :key (lambda (line) (if (ppcre:scan "^;" line) 0 (length line)))) nil (remove-if #'null (loop with tag = nil for y by 1 for line in lines collect (multiple-value-bind (element err) (ignore-errors (read-from-string line)) (cond ((typep err 'error) (list :source-text line :source-y y)) ((atom element) (assert (null tag) () "Extra tag ~S" tag) (setq tag (list :tag element :source-text line :source-y y)) nil) (tag (prog1 (list :tag (getf tag :tag) :op element :source-text (concatenate 'string (getf tag :source-text) +newline-as-string+ line) :source-y (getf tag :source-y)) (setq tag nil))) (t (list :op element :source-text line :source-y y)))))))) #+SBCL (defun parse-disassembly (lines) "Parse LINES of Steel Bank Common Lisp compiler output. Return (DECORATION-LEFT DECORATION-RIGHT . CODE) as a dotted list." ;; [*] Kludge -- if present, :tag *must* be first, to allow NIL tags (let* ((decoration-left 0) (decoration-right nil) (code (loop with regexp = "^; [0-9A-F]+: *(?:(L[0-9]+): *)?([0-9A-F]+) *([^;]+?) *(;.*)?$" for y by 1 for line in lines collect (multiple-value-bind (match0 match1 reg0 reg1) (cl-ppcre:scan regexp line) (declare (ignore match1)) (if (null match0) (list :source-text line :source-y y) (destructuring-bind ((label0 hex0 op0 comment0) (label1 hex1 op1 comment1)) (list (coerce reg0 'list) (coerce reg1 'list)) (setq decoration-left (max decoration-left (1+ op1))) (when comment0 (setq decoration-right (if decoration-right (min decoration-right comment0) comment0))) (let ((label (and label0 (subseq line label0 label1))) (hex (subseq line hex0 hex1)) (op (with-input-from-string (in line :start op0 :end op1) (loop with eof = '#:eof for token = (read in nil eof) until (eq eof token) collect token do (when (eql #\, (peek-char t in nil)) (read-char in))))) (comment (and comment0 (subseq line comment0 comment1)))) (append (and label (list :tag (intern label))) (list :hex hex :op op) (and comment (list :comment comment :comment-x comment0)) (list :source-text line :source-y y))))))))) (list* decoration-left decoration-right code))) #-(or CCL SBCL) (error "~A unsupported." (lisp-implementation-type)) ;;; disassemble.lisp end