# webster-word setup ;;; Fixes HTML from m-w.com by removing all instances of the ;;; "\240" character, which shows up as garbage on START, ;;; replacing all sense_break tags with
tags, and ;;; adding
s in before every verb_class tag. (define (fix-definition-body html-str) (let* ((char-fixed (substitute html-str "\x09" "")) (char-and-span-sense-fixed (substitute char-fixed "" "
")) (char-and-all-span-fixed (substitute char-and-span-sense-fixed "" "
")) (junk-removed (substitute char-and-all-span-fixed "
" "")) (raw-trim (or (match "
" junk-removed ':prefix) (match "
" junk-removed ':prefix))) (trimmed (and raw-trim ;; When we trim, we remove lots of
s. ;; Put them all back on, plus some for good ;; measure, and wrap an extra div element and ;; use extract-html-element to trim the extras ;; off again. (The remaining wrapped div will ;; do no harm.) The extra
s give us a ;; little wiggle room for the site format to ;; change, or for trimming from the ;; britannica-entry div to be different from ;; trimming from the "Rhymes with" section. (extract-html-element (string-append "
" raw-trim "
") "div")))) (remove-defs-variant (or trimmed junk-removed)) )) ;;; Removes all
tags without removing their ;;; content. (define (remove-defs-variant html-str) (define (remove-defs-variant-helper new-str rest-str) (let* ((div-match (match "
" rest-str))) (if (not div-match) (string-append new-str rest-str) (let* ((rest-text (match:suffix div-match)) (end-match (match "
" rest-text))) (remove-defs-variant-helper (string-append new-str (match:prefix div-match) (make-shared-substring rest-text 0 (match:start end-match))) (match:suffix end-match)))))) (remove-defs-variant-helper "" html-str)) # webster-word DEFINITION-PAGE (lambda (symbol) (define (found-definition str) (not (string-match "isn't in the dictionary" str))) (let* ((page (get-page (get "webster-word" symbol "URL"))) (fragment (and (found-definition page) (all-till-next-title symbol page 0)))) (cond (fragment (set! fragment (fix-definition-body (or (match "" fragment ':suffix) fragment))) (let* ((stylesheet-m (match "]*>" page)) (stylesheets-start (and stylesheet-m (match:start stylesheet-m))) (stylesheets-end #f)) (while stylesheet-m (set! stylesheets-end (match:end stylesheet-m)) (set! stylesheet-m (match "]*>" page stylesheets-end))) (qualify-relative-urls (get "webster-word" symbol "SITE-URL") (if stylesheets-start (string-append (substring page stylesheets-start stylesheets-end) fragment) fragment)))) (else #f)))) # webster-word DEFINITION-PAGE-TERSE (lambda (symbol) (let ((full-text (get "webster-word" symbol "DEFINITION-PAGE"))) (match '("" i) ;; Word appears in upper case in Webster in this context. (all-till-next-title (string-append "Definition of " (string-upcase symbol) "") full-text 0) ':suffix))) # webster-word SITE-URL (lambda (symbol) "http://www.merriam-webster.com") # webster-word URL ; updated 25 Oct 2008, jesstess (lambda (symbol) (let* ((symbol (substitute symbol " " "+")) (base "http://www.merriam-webster.com/dictionary/") (fullpath (string-append base symbol))) fullpath))