From c2baefbecc22986338563775524d41bb2f522bf1 Mon Sep 17 00:00:00 2001 From: Profpatsch Date: Wed, 31 May 2023 01:59:00 +0200 Subject: feat(users/Profpatsch): init jbovlaste sqlite This is intended to convert the XML dump from https://jbovlaste.lojban.org/ to an sqlite database at one point. So far only XML parsing and some pretty printing Change-Id: I48c989a3109c8d513c812703fa7a8f2689a157ee Reviewed-on: https://cl.tvl.fyi/c/depot/+/8687 Tested-by: BuildkiteCI Reviewed-by: Profpatsch Autosubmit: Profpatsch --- .../Profpatsch/jbovlaste-sqlite/JbovlasteSqlite.hs | 110 +++++++++++++++++++++ users/Profpatsch/jbovlaste-sqlite/default.nix | 32 ++++++ .../jbovlaste-sqlite/jbovlaste-sqlite.cabal | 71 +++++++++++++ 3 files changed, 213 insertions(+) create mode 100644 users/Profpatsch/jbovlaste-sqlite/JbovlasteSqlite.hs create mode 100644 users/Profpatsch/jbovlaste-sqlite/default.nix create mode 100644 users/Profpatsch/jbovlaste-sqlite/jbovlaste-sqlite.cabal (limited to 'users') diff --git a/users/Profpatsch/jbovlaste-sqlite/JbovlasteSqlite.hs b/users/Profpatsch/jbovlaste-sqlite/JbovlasteSqlite.hs new file mode 100644 index 000000000000..3d2811865a77 --- /dev/null +++ b/users/Profpatsch/jbovlaste-sqlite/JbovlasteSqlite.hs @@ -0,0 +1,110 @@ +{-# LANGUAGE QuasiQuotes #-} + +module Main where + +import Data.ByteString.Internal qualified as Bytes +import Data.Error.Tree +import Data.List qualified as List +import Data.Map.Strict qualified as Map +import Data.Text qualified as Text +import Data.Text.IO qualified as Text +import Label +import PossehlAnalyticsPrelude +import Text.XML (def) +import Text.XML qualified as Xml + +main :: IO () +main = do + f <- file + f.documentRoot + & filterElementsRec noUsers + & downTo (T2 (label @"maxdepth" 5) (label @"maxlistitems" 20)) + & toTree + & prettyErrorTree + & Text.putStrLn + +file :: IO Xml.Document +file = Xml.readFile def "./jbovlaste-en.xml" + +-- | Filter XML elements recursively based on the given predicate +filterElementsRec :: (Xml.Element -> Bool) -> Xml.Element -> Xml.Element +filterElementsRec f el = + el + { Xml.elementNodes = + mapMaybe + ( \case + Xml.NodeElement el' -> + if f el' + then Just $ Xml.NodeElement $ filterElementsRec f el' + else Nothing + other -> Just other + ) + el.elementNodes + } + +-- | no allowed +noUsers :: Xml.Element -> Bool +noUsers el = el.elementName.nameLocalName /= "user" + +downTo :: (T2 "maxdepth" Int "maxlistitems" Int) -> Xml.Element -> Xml.Element +downTo n el = + if n.maxdepth > 0 + then + el + { Xml.elementNodes = + ( do + let eleven = take (n.maxlistitems + 1) $ map down el.elementNodes + if List.length eleven == (n.maxlistitems + 1) + then eleven <> [Xml.NodeComment "snip!"] + else eleven + ) + } + else el {Xml.elementNodes = [Xml.NodeComment "snip!"]} + where + down = + \case + Xml.NodeElement el' -> + Xml.NodeElement $ + downTo + ( T2 + (label @"maxdepth" $ n.maxdepth - 1) + (label @"maxlistitems" n.maxlistitems) + ) + el' + more -> more + +toTree :: Xml.Element -> ErrorTree +toTree el = do + let outer = + if not $ null el.elementAttributes + then [fmt|<{name el.elementName}: {attrs el.elementAttributes}>|] + else [fmt|<{name el.elementName}>|] + + case el.elementNodes & filter (not . isEmptyContent) & nonEmpty of + Nothing -> singleError (newError outer) + Just (n :| []) | not $ isElementNode n -> singleError $ errorContext outer (nodeErrorNoElement n) + Just nodes -> nestedMultiError (newError outer) (nodes <&> node) + where + isEmptyContent = \case + Xml.NodeContent c -> c & Text.all Bytes.isSpaceChar8 + _ -> False + isElementNode = \case + Xml.NodeElement _ -> True + _ -> False + + node :: Xml.Node -> ErrorTree + node = \case + Xml.NodeElement el' -> toTree el' + other -> singleError $ nodeErrorNoElement other + + nodeErrorNoElement :: Xml.Node -> Error + nodeErrorNoElement = \case + Xml.NodeInstruction i -> [fmt|Instruction: {i & show}|] + Xml.NodeContent c -> [fmt|"{c & Text.replace "\"" "\\\""}"|] + Xml.NodeComment c -> [fmt||] + Xml.NodeElement _ -> error "NodeElement not allowed here" + + name :: Xml.Name -> Text + name n = [fmt|{n.namePrefix & fromMaybe ""}{n.nameLocalName}|] + attrs :: Map Xml.Name Text -> Text + attrs a = a & Map.toList <&> (\(k, v) -> [fmt|{name k}={v}|]) & Text.intercalate ", " & \s -> [fmt|({s})|] diff --git a/users/Profpatsch/jbovlaste-sqlite/default.nix b/users/Profpatsch/jbovlaste-sqlite/default.nix new file mode 100644 index 000000000000..cccd3d934e6d --- /dev/null +++ b/users/Profpatsch/jbovlaste-sqlite/default.nix @@ -0,0 +1,32 @@ +{ depot, pkgs, lib, ... }: + +let + # bins = depot.nix.getBins pkgs.sqlite ["sqlite3"]; + + jbovlaste-sqlite = pkgs.haskellPackages.mkDerivation { + pname = "jbovlaste-sqlite"; + version = "0.1.0"; + + src = depot.users.Profpatsch.exactSource ./. [ + ./jbovlaste-sqlite.cabal + ./JbovlasteSqlite.hs + ]; + + libraryHaskellDepends = [ + pkgs.haskellPackages.pa-prelude + pkgs.haskellPackages.pa-label + pkgs.haskellPackages.pa-error-tree + pkgs.haskellPackages.sqlite-simple + pkgs.haskellPackages.xml-conduit + depot.users.Profpatsch.arglib.netencode.haskell + depot.users.Profpatsch.netencode.netencode-hs + + ]; + + isExecutable = true; + isLibrary = false; + license = lib.licenses.mit; + }; + +in +jbovlaste-sqlite diff --git a/users/Profpatsch/jbovlaste-sqlite/jbovlaste-sqlite.cabal b/users/Profpatsch/jbovlaste-sqlite/jbovlaste-sqlite.cabal new file mode 100644 index 000000000000..973009bda1af --- /dev/null +++ b/users/Profpatsch/jbovlaste-sqlite/jbovlaste-sqlite.cabal @@ -0,0 +1,71 @@ +cabal-version: 3.0 +name: jbovlaste-sqlite +version: 0.1.0.0 +author: Profpatsch +maintainer: mail@profpatsch.de + +common common-options + ghc-options: + -Wall + -Wno-type-defaults + -Wunused-packages + -Wredundant-constraints + -fwarn-missing-deriving-strategies + + -- See https://downloads.haskell.org/ghc/latest/docs/users_guide/exts.html + -- for a description of all these extensions + default-extensions: + -- Infer Applicative instead of Monad where possible + ApplicativeDo + + -- Allow literal strings to be Text + OverloadedStrings + + -- Syntactic sugar improvements + LambdaCase + MultiWayIf + + -- Makes the (deprecated) usage of * instead of Data.Kind.Type an error + NoStarIsType + + -- Convenient and crucial to deal with ambiguous field names, commonly + -- known as RecordDotSyntax + OverloadedRecordDot + + -- does not export record fields as functions, use OverloadedRecordDot to access instead + NoFieldSelectors + + -- Record punning + RecordWildCards + + -- Improved Deriving + DerivingStrategies + DerivingVia + + -- Type-level strings + DataKinds + + -- to enable the `type` keyword in import lists (ormolu uses this automatically) + ExplicitNamespaces + + default-language: GHC2021 + + +executable jbovlaste-sqlite + import: common-options + + main-is: JbovlasteSqlite.hs + + build-depends: + base >=4.15 && <5, + pa-prelude, + pa-label, + pa-error-tree, + my-prelude, + containers, + bytestring, + arglib-netencode, + netencode, + text, + sqlite-simple, + xml-conduit, -- cgit 1.4.1