diff options
Diffstat (limited to 'fun/amsterdump/scrape.el')
-rw-r--r-- | fun/amsterdump/scrape.el | 25 |
1 files changed, 0 insertions, 25 deletions
diff --git a/fun/amsterdump/scrape.el b/fun/amsterdump/scrape.el deleted file mode 100644 index f5537c2c8f17..000000000000 --- a/fun/amsterdump/scrape.el +++ /dev/null @@ -1,25 +0,0 @@ -;; Scraping funda.nl (this file is just notes and snippets, not full code) -;; -;; Begin by copying whole page into buffer (out of inspect element -;; because encoding is difficult) - -(beginning-of-buffer) - -;; zap everything that isn't a relevant result -(keep-lines "data-object-url-tracking\\|img alt") - -;; mark all spans, move them to the end of the buffer -(cl-letf (((symbol-function 'read-regexp) - (lambda (&rest _) "</span>"))) - (mc/mark-all-in-region-regexp (point-min) (point-max))) - -;; mark all images lines (these contain street addresses for things -;; with images), clear up and join with previous -;; -;; mark all: data-image-error-fallback - -;; delete all lines that don't either contain a span or an img tag -;; (there are duplicates) -(keep-lines "span class\\|img alt") - -;; do some manual cleanup from the hrefs and done |