about summary refs log tree commit diff
path: root/fun/amsterdump/scrape.el
diff options
context:
space:
mode:
Diffstat (limited to 'fun/amsterdump/scrape.el')
-rw-r--r--fun/amsterdump/scrape.el25
1 files changed, 0 insertions, 25 deletions
diff --git a/fun/amsterdump/scrape.el b/fun/amsterdump/scrape.el
deleted file mode 100644
index f5537c2c8f..0000000000
--- a/fun/amsterdump/scrape.el
+++ /dev/null
@@ -1,25 +0,0 @@
-;; Scraping funda.nl (this file is just notes and snippets, not full code)
-;;
-;; Begin by copying whole page into buffer (out of inspect element
-;; because encoding is difficult)
-
-(beginning-of-buffer)
-
-;; zap everything that isn't a relevant result
-(keep-lines "data-object-url-tracking\\|img alt")
-
-;; mark all spans, move them to the end of the buffer
-(cl-letf (((symbol-function 'read-regexp)
-           (lambda (&rest _) "</span>")))
-  (mc/mark-all-in-region-regexp (point-min) (point-max)))
-
-;; mark all images lines (these contain street addresses for things
-;; with images), clear up and join with previous
-;;
-;; mark all: data-image-error-fallback
-
-;; delete all lines that don't either contain a span or an img tag
-;; (there are duplicates)
-(keep-lines "span class\\|img alt")
-
-;; do some manual cleanup from the hrefs and done