# webster-word setup
;;; Fixes HTML from m-w.com by removing all instances of the
;;; "\240" character, which shows up as garbage on START,
;;; replacing all sense_break tags with tags, and
;;; adding s in before every verb_class tag.
(define (fix-definition-body html-str)
(let* ((char-fixed (substitute html-str "\x09" ""))
(char-and-span-sense-fixed
(substitute char-fixed "" " "))
(char-and-all-span-fixed
(substitute char-and-span-sense-fixed
""
" "))
(junk-removed (substitute char-and-all-span-fixed
" "
""))
(raw-trim (or (match "
" junk-removed ':prefix)
(match "
" junk-removed ':prefix)))
(trimmed (and raw-trim
;; When we trim, we remove lots of
s.
;; Put them all back on, plus some for good
;; measure, and wrap an extra div element and
;; use extract-html-element to trim the extras
;; off again. (The remaining wrapped div will
;; do no harm.) The extra
s give us a
;; little wiggle room for the site format to
;; change, or for trimming from the
;; britannica-entry div to be different from
;; trimming from the "Rhymes with" section.
(extract-html-element
(string-append
"
" raw-trim
"
")
"div"))))
(remove-defs-variant (or trimmed junk-removed))
))
;;; Removes all
tags without removing their
;;; content.
(define (remove-defs-variant html-str)
(define (remove-defs-variant-helper new-str rest-str)
(let* ((div-match (match "