about summary refs log tree commit diff
path: root/users/Profpatsch
diff options
context:
space:
mode:
authorProfpatsch <mail@profpatsch.de>2023-05-30T23·59+0200
committerclbot <clbot@tvl.fyi>2023-07-13T23·03+0000
commitc2baefbecc22986338563775524d41bb2f522bf1 (patch)
tree321f9028efd3e49a2a7a6942ed91f27077d7da3d /users/Profpatsch
parent8c4730c433ba01cb17aab2917d495d055c4f468e (diff)
feat(users/Profpatsch): init jbovlaste sqlite r/6410
This is intended to convert the XML dump from
https://jbovlaste.lojban.org/ to an sqlite database at one point.

So far only XML parsing and some pretty printing

Change-Id: I48c989a3109c8d513c812703fa7a8f2689a157ee
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8687
Tested-by: BuildkiteCI
Reviewed-by: Profpatsch <mail@profpatsch.de>
Autosubmit: Profpatsch <mail@profpatsch.de>
Diffstat (limited to 'users/Profpatsch')
-rw-r--r--users/Profpatsch/jbovlaste-sqlite/JbovlasteSqlite.hs110
-rw-r--r--users/Profpatsch/jbovlaste-sqlite/default.nix32
-rw-r--r--users/Profpatsch/jbovlaste-sqlite/jbovlaste-sqlite.cabal71
3 files changed, 213 insertions, 0 deletions
diff --git a/users/Profpatsch/jbovlaste-sqlite/JbovlasteSqlite.hs b/users/Profpatsch/jbovlaste-sqlite/JbovlasteSqlite.hs
new file mode 100644
index 0000000000..3d2811865a
--- /dev/null
+++ b/users/Profpatsch/jbovlaste-sqlite/JbovlasteSqlite.hs
@@ -0,0 +1,110 @@
+{-# LANGUAGE QuasiQuotes #-}
+
+module Main where
+
+import Data.ByteString.Internal qualified as Bytes
+import Data.Error.Tree
+import Data.List qualified as List
+import Data.Map.Strict qualified as Map
+import Data.Text qualified as Text
+import Data.Text.IO qualified as Text
+import Label
+import PossehlAnalyticsPrelude
+import Text.XML (def)
+import Text.XML qualified as Xml
+
+main :: IO ()
+main = do
+  f <- file
+  f.documentRoot
+    & filterElementsRec noUsers
+    & downTo (T2 (label @"maxdepth" 5) (label @"maxlistitems" 20))
+    & toTree
+    & prettyErrorTree
+    & Text.putStrLn
+
+file :: IO Xml.Document
+file = Xml.readFile def "./jbovlaste-en.xml"
+
+-- | Filter XML elements recursively based on the given predicate
+filterElementsRec :: (Xml.Element -> Bool) -> Xml.Element -> Xml.Element
+filterElementsRec f el =
+  el
+    { Xml.elementNodes =
+        mapMaybe
+          ( \case
+              Xml.NodeElement el' ->
+                if f el'
+                  then Just $ Xml.NodeElement $ filterElementsRec f el'
+                  else Nothing
+              other -> Just other
+          )
+          el.elementNodes
+    }
+
+-- | no <user> allowed
+noUsers :: Xml.Element -> Bool
+noUsers el = el.elementName.nameLocalName /= "user"
+
+downTo :: (T2 "maxdepth" Int "maxlistitems" Int) -> Xml.Element -> Xml.Element
+downTo n el =
+  if n.maxdepth > 0
+    then
+      el
+        { Xml.elementNodes =
+            ( do
+                let eleven = take (n.maxlistitems + 1) $ map down el.elementNodes
+                if List.length eleven == (n.maxlistitems + 1)
+                  then eleven <> [Xml.NodeComment "snip!"]
+                  else eleven
+            )
+        }
+    else el {Xml.elementNodes = [Xml.NodeComment "snip!"]}
+  where
+    down =
+      \case
+        Xml.NodeElement el' ->
+          Xml.NodeElement $
+            downTo
+              ( T2
+                  (label @"maxdepth" $ n.maxdepth - 1)
+                  (label @"maxlistitems" n.maxlistitems)
+              )
+              el'
+        more -> more
+
+toTree :: Xml.Element -> ErrorTree
+toTree el = do
+  let outer =
+        if not $ null el.elementAttributes
+          then [fmt|<{name el.elementName}: {attrs el.elementAttributes}>|]
+          else [fmt|<{name el.elementName}>|]
+
+  case el.elementNodes & filter (not . isEmptyContent) & nonEmpty of
+    Nothing -> singleError (newError outer)
+    Just (n :| []) | not $ isElementNode n -> singleError $ errorContext outer (nodeErrorNoElement n)
+    Just nodes -> nestedMultiError (newError outer) (nodes <&> node)
+  where
+    isEmptyContent = \case
+      Xml.NodeContent c -> c & Text.all Bytes.isSpaceChar8
+      _ -> False
+    isElementNode = \case
+      Xml.NodeElement _ -> True
+      _ -> False
+
+    node :: Xml.Node -> ErrorTree
+    node = \case
+      Xml.NodeElement el' -> toTree el'
+      other -> singleError $ nodeErrorNoElement other
+
+    nodeErrorNoElement :: Xml.Node -> Error
+    nodeErrorNoElement = \case
+      Xml.NodeInstruction i -> [fmt|Instruction: {i & show}|]
+      Xml.NodeContent c -> [fmt|"{c & Text.replace "\"" "\\\""}"|]
+      Xml.NodeComment c -> [fmt|<!-- {c} -->|]
+      Xml.NodeElement _ -> error "NodeElement not allowed here"
+
+    name :: Xml.Name -> Text
+    name n = [fmt|{n.namePrefix & fromMaybe ""}{n.nameLocalName}|]
+    attrs :: Map Xml.Name Text -> Text
+    attrs a = a & Map.toList <&> (\(k, v) -> [fmt|{name k}={v}|]) & Text.intercalate ", " & \s -> [fmt|({s})|]
diff --git a/users/Profpatsch/jbovlaste-sqlite/default.nix b/users/Profpatsch/jbovlaste-sqlite/default.nix
new file mode 100644
index 0000000000..cccd3d934e
--- /dev/null
+++ b/users/Profpatsch/jbovlaste-sqlite/default.nix
@@ -0,0 +1,32 @@
+{ depot, pkgs, lib, ... }:
+
+let
+  #   bins = depot.nix.getBins pkgs.sqlite ["sqlite3"];
+
+  jbovlaste-sqlite = pkgs.haskellPackages.mkDerivation {
+    pname = "jbovlaste-sqlite";
+    version = "0.1.0";
+
+    src = depot.users.Profpatsch.exactSource ./. [
+      ./jbovlaste-sqlite.cabal
+      ./JbovlasteSqlite.hs
+    ];
+
+    libraryHaskellDepends = [
+      pkgs.haskellPackages.pa-prelude
+      pkgs.haskellPackages.pa-label
+      pkgs.haskellPackages.pa-error-tree
+      pkgs.haskellPackages.sqlite-simple
+      pkgs.haskellPackages.xml-conduit
+      depot.users.Profpatsch.arglib.netencode.haskell
+      depot.users.Profpatsch.netencode.netencode-hs
+
+    ];
+
+    isExecutable = true;
+    isLibrary = false;
+    license = lib.licenses.mit;
+  };
+
+in
+jbovlaste-sqlite
diff --git a/users/Profpatsch/jbovlaste-sqlite/jbovlaste-sqlite.cabal b/users/Profpatsch/jbovlaste-sqlite/jbovlaste-sqlite.cabal
new file mode 100644
index 0000000000..973009bda1
--- /dev/null
+++ b/users/Profpatsch/jbovlaste-sqlite/jbovlaste-sqlite.cabal
@@ -0,0 +1,71 @@
+cabal-version:      3.0
+name:               jbovlaste-sqlite
+version:            0.1.0.0
+author:             Profpatsch
+maintainer:         mail@profpatsch.de
+
+common common-options
+  ghc-options:
+      -Wall
+      -Wno-type-defaults
+      -Wunused-packages
+      -Wredundant-constraints
+      -fwarn-missing-deriving-strategies
+
+  -- See https://downloads.haskell.org/ghc/latest/docs/users_guide/exts.html
+  -- for a description of all these extensions
+  default-extensions:
+      -- Infer Applicative instead of Monad where possible
+    ApplicativeDo
+
+    -- Allow literal strings to be Text
+    OverloadedStrings
+
+    -- Syntactic sugar improvements
+    LambdaCase
+    MultiWayIf
+
+    -- Makes the (deprecated) usage of * instead of Data.Kind.Type an error
+    NoStarIsType
+
+    -- Convenient and crucial to deal with ambiguous field names, commonly
+    -- known as RecordDotSyntax
+    OverloadedRecordDot
+
+    -- does not export record fields as functions, use OverloadedRecordDot to access instead
+    NoFieldSelectors
+
+    -- Record punning
+    RecordWildCards
+
+    -- Improved Deriving
+    DerivingStrategies
+    DerivingVia
+
+    -- Type-level strings
+    DataKinds
+
+    -- to enable the `type` keyword in import lists (ormolu uses this automatically)
+    ExplicitNamespaces
+
+  default-language: GHC2021
+
+
+executable jbovlaste-sqlite
+    import: common-options
+
+    main-is:          JbovlasteSqlite.hs
+
+    build-depends:
+        base >=4.15 && <5,
+        pa-prelude,
+        pa-label,
+        pa-error-tree,
+        my-prelude,
+        containers,
+        bytestring,
+        arglib-netencode,
+        netencode,
+        text,
+        sqlite-simple,
+        xml-conduit,