about summary refs log tree commit diff
path: root/fun/amsterdump/scrape.el
diff options
context:
space:
mode:
authorVincent Ambo <mail@tazj.in>2021-03-24T00·17+0200
committertazjin <mail@tazj.in>2021-03-24T00·20+0000
commit63e1706096f825f369c016d2827f366e1dde7a35 (patch)
treef6b33936c3f49c28bcebd5cdc98d4fd4f63cbb4d /fun/amsterdump/scrape.el
parentaaaadbbcdc8234a5cdafa9591b823acddb3897da (diff)
chore(amsterdump): Remove project r/2326
This has looong outlived its purpose.

Change-Id: I66cff438dd05f0113b4a9a9bf9023d0f72238c9f
Reviewed-on: https://cl.tvl.fyi/c/depot/+/2651
Reviewed-by: tazjin <mail@tazj.in>
Tested-by: BuildkiteCI
Diffstat (limited to 'fun/amsterdump/scrape.el')
-rw-r--r--fun/amsterdump/scrape.el25
1 files changed, 0 insertions, 25 deletions
diff --git a/fun/amsterdump/scrape.el b/fun/amsterdump/scrape.el
deleted file mode 100644
index f5537c2c8f17..000000000000
--- a/fun/amsterdump/scrape.el
+++ /dev/null
@@ -1,25 +0,0 @@
-;; Scraping funda.nl (this file is just notes and snippets, not full code)
-;;
-;; Begin by copying whole page into buffer (out of inspect element
-;; because encoding is difficult)
-
-(beginning-of-buffer)
-
-;; zap everything that isn't a relevant result
-(keep-lines "data-object-url-tracking\\|img alt")
-
-;; mark all spans, move them to the end of the buffer
-(cl-letf (((symbol-function 'read-regexp)
-           (lambda (&rest _) "</span>")))
-  (mc/mark-all-in-region-regexp (point-min) (point-max)))
-
-;; mark all images lines (these contain street addresses for things
-;; with images), clear up and join with previous
-;;
-;; mark all: data-image-error-fallback
-
-;; delete all lines that don't either contain a span or an img tag
-;; (there are duplicates)
-(keep-lines "span class\\|img alt")
-
-;; do some manual cleanup from the hrefs and done