From 63e1706096f825f369c016d2827f366e1dde7a35 Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Wed, 24 Mar 2021 02:17:45 +0200 Subject: chore(amsterdump): Remove project This has looong outlived its purpose. Change-Id: I66cff438dd05f0113b4a9a9bf9023d0f72238c9f Reviewed-on: https://cl.tvl.fyi/c/depot/+/2651 Reviewed-by: tazjin Tested-by: BuildkiteCI --- fun/amsterdump/scrape.el | 25 ------------------------- 1 file changed, 25 deletions(-) delete mode 100644 fun/amsterdump/scrape.el (limited to 'fun/amsterdump/scrape.el') diff --git a/fun/amsterdump/scrape.el b/fun/amsterdump/scrape.el deleted file mode 100644 index f5537c2c8f17..000000000000 --- a/fun/amsterdump/scrape.el +++ /dev/null @@ -1,25 +0,0 @@ -;; Scraping funda.nl (this file is just notes and snippets, not full code) -;; -;; Begin by copying whole page into buffer (out of inspect element -;; because encoding is difficult) - -(beginning-of-buffer) - -;; zap everything that isn't a relevant result -(keep-lines "data-object-url-tracking\\|img alt") - -;; mark all spans, move them to the end of the buffer -(cl-letf (((symbol-function 'read-regexp) - (lambda (&rest _) ""))) - (mc/mark-all-in-region-regexp (point-min) (point-max))) - -;; mark all images lines (these contain street addresses for things -;; with images), clear up and join with previous -;; -;; mark all: data-image-error-fallback - -;; delete all lines that don't either contain a span or an img tag -;; (there are duplicates) -(keep-lines "span class\\|img alt") - -;; do some manual cleanup from the hrefs and done -- cgit 1.4.1