From 07b976ccd89cc8a4b5d0624ee2223d64add4615d Mon Sep 17 00:00:00 2001 From: Profpatsch Date: Mon, 19 Jun 2023 15:56:32 +0200 Subject: feat(users/Profpatsch): init whatcd-resolver Change-Id: Ieb377fb8caa60e716703153dfeca5173f9a6779d Reviewed-on: https://cl.tvl.fyi/c/depot/+/8830 Reviewed-by: Profpatsch Tested-by: BuildkiteCI --- third_party/overlays/haskell/default.nix | 2 + .../haskell/extra-pkgs/pa-json-0.2.0.0.nix | 43 +++ .../haskell/extra-pkgs/pa-pretty-0.1.1.0.nix | 29 ++ users/Profpatsch/cabal.project | 1 + users/Profpatsch/hie.yaml | 2 + users/Profpatsch/shell.nix | 12 + users/Profpatsch/whatcd-resolver/build.ninja | 29 ++ users/Profpatsch/whatcd-resolver/notes.org | 48 +++ users/Profpatsch/whatcd-resolver/server-notes.org | 2 + .../whatcd-resolver/src/Postgres/Decoder.hs | 58 ++++ .../whatcd-resolver/src/Postgres/MonadPostgres.hs | 377 +++++++++++++++++++++ users/Profpatsch/whatcd-resolver/src/Tool.hs | 75 ++++ .../whatcd-resolver/src/ValidationParseT.hs | 15 + .../whatcd-resolver/src/WhatcdResolver.hs | 303 +++++++++++++++++ .../whatcd-resolver/whatcd-resolver.cabal | 90 +++++ 15 files changed, 1086 insertions(+) create mode 100644 third_party/overlays/haskell/extra-pkgs/pa-json-0.2.0.0.nix create mode 100644 third_party/overlays/haskell/extra-pkgs/pa-pretty-0.1.1.0.nix create mode 100644 users/Profpatsch/whatcd-resolver/build.ninja create mode 100644 users/Profpatsch/whatcd-resolver/notes.org create mode 100644 users/Profpatsch/whatcd-resolver/server-notes.org create mode 100644 users/Profpatsch/whatcd-resolver/src/Postgres/Decoder.hs create mode 100644 users/Profpatsch/whatcd-resolver/src/Postgres/MonadPostgres.hs create mode 100644 users/Profpatsch/whatcd-resolver/src/Tool.hs create mode 100644 users/Profpatsch/whatcd-resolver/src/ValidationParseT.hs create mode 100644 users/Profpatsch/whatcd-resolver/src/WhatcdResolver.hs create mode 100644 users/Profpatsch/whatcd-resolver/whatcd-resolver.cabal diff --git a/third_party/overlays/haskell/default.nix b/third_party/overlays/haskell/default.nix index ca3a1d29830c..9282d634143c 100644 --- a/third_party/overlays/haskell/default.nix +++ b/third_party/overlays/haskell/default.nix @@ -34,6 +34,8 @@ in pa-error-tree = hsSelf.callPackage ./extra-pkgs/pa-error-tree-0.1.0.0.nix { }; pa-field-parser = hsSelf.callPackage ./extra-pkgs/pa-field-parser-0.1.0.1.nix { }; pa-label = hsSelf.callPackage ./extra-pkgs/pa-label-0.1.0.1.nix { }; + pa-pretty = hsSelf.callPackage ./extra-pkgs/pa-pretty-0.1.1.0.nix { }; + pa-json = hsSelf.callPackage ./extra-pkgs/pa-json-0.2.0.0.nix { }; }; }; diff --git a/third_party/overlays/haskell/extra-pkgs/pa-json-0.2.0.0.nix b/third_party/overlays/haskell/extra-pkgs/pa-json-0.2.0.0.nix new file mode 100644 index 000000000000..56ee6eb2d10c --- /dev/null +++ b/third_party/overlays/haskell/extra-pkgs/pa-json-0.2.0.0.nix @@ -0,0 +1,43 @@ +{ mkDerivation +, aeson +, aeson-better-errors +, aeson-pretty +, base +, bytestring +, containers +, hspec-core +, hspec-expectations +, lib +, pa-error-tree +, pa-label +, pa-prelude +, scientific +, text +, time +, vector +}: +mkDerivation { + pname = "pa-json"; + version = "0.2.0.0"; + sha256 = "b57ef3888b8ea3230925675eccd6affbc3d296fc8762f5937435af4bdbd276e4"; + libraryHaskellDepends = [ + aeson + aeson-better-errors + aeson-pretty + base + bytestring + containers + hspec-core + hspec-expectations + pa-error-tree + pa-label + pa-prelude + scientific + text + time + vector + ]; + homepage = "https://github.com/possehl-analytics/pa-hackage"; + description = "Our JSON parsers/encoders"; + license = lib.licenses.bsd3; +} diff --git a/third_party/overlays/haskell/extra-pkgs/pa-pretty-0.1.1.0.nix b/third_party/overlays/haskell/extra-pkgs/pa-pretty-0.1.1.0.nix new file mode 100644 index 000000000000..d6dadef849a1 --- /dev/null +++ b/third_party/overlays/haskell/extra-pkgs/pa-pretty-0.1.1.0.nix @@ -0,0 +1,29 @@ +{ mkDerivation +, aeson +, aeson-pretty +, ansi-terminal +, base +, hscolour +, lib +, nicify-lib +, pa-prelude +, text +}: +mkDerivation { + pname = "pa-pretty"; + version = "0.1.1.0"; + sha256 = "da925a7cf2ac49c5769d7ebd08c2599b537efe45b3d506bf4d7c8673633ef6c9"; + libraryHaskellDepends = [ + aeson + aeson-pretty + ansi-terminal + base + hscolour + nicify-lib + pa-prelude + text + ]; + homepage = "https://github.com/possehl-analytics/pa-hackage"; + description = "Some pretty-printing helpers"; + license = lib.licenses.bsd3; +} diff --git a/users/Profpatsch/cabal.project b/users/Profpatsch/cabal.project index 900fd457df04..cf2d7b1366cd 100644 --- a/users/Profpatsch/cabal.project +++ b/users/Profpatsch/cabal.project @@ -6,3 +6,4 @@ packages: ./htmx-experiment/htmx-experiment.cabal ./cas-serve/cas-serve.cabal ./jbovlaste-sqlite/jbovlaste-sqlite.cabal + ./whatcd-resolver/whatcd-resolver.cabal diff --git a/users/Profpatsch/hie.yaml b/users/Profpatsch/hie.yaml index d11a60ac737c..cf5982d317ca 100644 --- a/users/Profpatsch/hie.yaml +++ b/users/Profpatsch/hie.yaml @@ -18,3 +18,5 @@ cradle: component: "cas-serve:exe:cas-serve" - path: "./jbovlaste-sqlite/JbovlasteSqlite.hs" component: "jbovlaste-sqlite:exe:jbovlaste-sqlite" + - path: "./whatcd-resolver/src" + component: "lib:whatcd-resolver" diff --git a/users/Profpatsch/shell.nix b/users/Profpatsch/shell.nix index 4f7ff53c3b2e..ea88c423cb0f 100644 --- a/users/Profpatsch/shell.nix +++ b/users/Profpatsch/shell.nix @@ -18,6 +18,8 @@ pkgs.mkShell { h.monad-logger h.pa-field-parser h.pa-label + h.pa-json + h.pa-pretty h.ihp-hsx h.PyF h.foldl @@ -46,13 +48,23 @@ pkgs.mkShell { h.nicify-lib h.hspec h.hspec-expectations-pretty-diff + h.tmp-postgres + h.postgresql-simple + h.resource-pool ])) pkgs.rustup pkgs.pkg-config pkgs.fuse + pkgs.postgresql ]; + WHATCD_RESOLVER_TOOLS = pkgs.linkFarm "whatcd-resolver-tools" [ + { + name = "pg_format"; + path = "${pkgs.pgformatter}/bin/pg_format"; + } + ]; RUSTC_WRAPPER = let diff --git a/users/Profpatsch/whatcd-resolver/build.ninja b/users/Profpatsch/whatcd-resolver/build.ninja new file mode 100644 index 000000000000..026f100a2e82 --- /dev/null +++ b/users/Profpatsch/whatcd-resolver/build.ninja @@ -0,0 +1,29 @@ +builddir = .ninja + +rule cabal-run + command = cabal run $target + +rule cabal-repl + command = cabal repl $target + +rule cabal-test + command = cabal test $target + +rule hpack-file + description = hpack $in + command = $ + hpack --force $in $ + && touch $out + +build repl : cabal-repl | cabal-preconditions + target = whatcd-resolver-server + pool = console + +build run : cabal-run | cabal-preconditions + target = whatcd-resolver-server + pool = console + + +build cabal-preconditions : phony whatcd-resolver-server.cabal + +build whatcd-resolver-server.cabal : hpack-file package.yaml diff --git a/users/Profpatsch/whatcd-resolver/notes.org b/users/Profpatsch/whatcd-resolver/notes.org new file mode 100644 index 000000000000..24662c0f32a4 --- /dev/null +++ b/users/Profpatsch/whatcd-resolver/notes.org @@ -0,0 +1,48 @@ +* The Glorious what.cd¹ Resolver + + ¹: At the time of writing, what.cd didn’t even exist anymore + +** Idea + + Stream your music (or media) from a private tracker transparently. + “Spotify for torrents” + +** Technical + + You need to have a seedbox, which runs a server program. + The server manages queries, downloads torrents and requested files, and + provides http streams to the downloaded files (while caching them for + seeding). + + Clients then use the API to search for music (e.g. query for artists or + tracks) and get back the promise of a stream to the resolved file (a bit how + resolvers in the Tomahawk Player work) + +*** The Server + +**** Resolving queries + + ~resolve :: Query -> IO Identifiers~ + + A query is a search input for content (could be an artist or a movie name + or something) + + There have to be multiple providers, depending on the site used + (e.g. one for Gazelle trackers, one for Piratebay) and some intermediate + structure (e.g. for going through Musicbrainz first). + + Output is a unique identifier for a fetchable resource; this could be a + link to a torrent combined with a file/directory in said torrent. + +**** Fetching Identifiers + + ~fetch :: Identifier -> IO (Promise Stream)~ + + Takes an Identifier (which should provide all information on how to grab + the media file and returns a stream to the media file once it’s ready. + + For torrents, this probably consists of telling the torrent + library/application to fetch a certain torrent and start downloading the + required files in it. The torrent fetcher would also need to do seeding and + space management, since one usually has to keep a ratio and hard drive + space is not unlimited. diff --git a/users/Profpatsch/whatcd-resolver/server-notes.org b/users/Profpatsch/whatcd-resolver/server-notes.org new file mode 100644 index 000000000000..cb990aba3dfb --- /dev/null +++ b/users/Profpatsch/whatcd-resolver/server-notes.org @@ -0,0 +1,2 @@ +* whatcd-resolver-server + diff --git a/users/Profpatsch/whatcd-resolver/src/Postgres/Decoder.hs b/users/Profpatsch/whatcd-resolver/src/Postgres/Decoder.hs new file mode 100644 index 000000000000..2e7fcb8779ed --- /dev/null +++ b/users/Profpatsch/whatcd-resolver/src/Postgres/Decoder.hs @@ -0,0 +1,58 @@ +module Postgres.Decoder where + +import Control.Applicative (Alternative) +import Data.Aeson qualified as Json +import Data.Aeson.BetterErrors qualified as Json +import Data.Error.Tree +import Data.Typeable (Typeable) +import Database.PostgreSQL.Simple.FromField qualified as PG +import Database.PostgreSQL.Simple.FromRow qualified as PG +import Json qualified +import Label +import PossehlAnalyticsPrelude + +-- | A Decoder of postgres values. Allows embedding more complex parsers (like a 'Json.ParseT'). +newtype Decoder a = Decoder (PG.RowParser a) + deriving newtype (Functor, Applicative, Alternative, Monad) + +-- | Turn any type that implements 'PG.fromField' into a 'Decoder'. Use type applications to prevent accidental conversions: +-- +-- @ +-- fromField @Text :: Decoder Text +-- @ +fromField :: PG.FromField a => Decoder a +fromField = Decoder $ PG.fieldWith PG.fromField + +-- | Turn any type that implements 'PG.fromField' into a 'Decoder' and wrap the result into the given 'Label'. Use type applications to prevent accidental conversions: +-- +-- @ +-- fromField @"myField" @Text :: Decoder (Label "myField" Text) +-- @ +fromFieldLabel :: forall lbl a. PG.FromField a => Decoder (Label lbl a) +fromFieldLabel = label @lbl <$> fromField + +-- | Parse fields out of a json value returned from the database. +-- +-- ATTN: The whole json record has to be transferred before it is parsed, +-- so if you only need a tiny bit of it, use `->` and `->>` in your SQL statement +-- and return only the fields you need from the query. +-- +-- In that case pay attention to NULL though: +-- +-- @ +-- SELECT '{"foo": {}}'::jsonb->>'foo' IS NULL +-- → TRUE +-- @ +-- +-- Also note: `->>` will coerce the json value to @text@, regardless of the content. +-- So the JSON object @{"foo": {}}"@ would be returned as the text: @"{\"foo\": {}}"@. +json :: Typeable a => Json.ParseT ErrorTree Identity a -> Decoder a +json parser = Decoder $ PG.fieldWith $ \field bytes -> do + val <- PG.fromField @Json.Value field bytes + case Json.parseValue parser val of + Left err -> + PG.returnError + PG.ConversionFailed + field + (err & Json.parseErrorTree "Cannot decode jsonb column" & prettyErrorTree & textToString) + Right a -> pure a diff --git a/users/Profpatsch/whatcd-resolver/src/Postgres/MonadPostgres.hs b/users/Profpatsch/whatcd-resolver/src/Postgres/MonadPostgres.hs new file mode 100644 index 000000000000..9911f260a2e9 --- /dev/null +++ b/users/Profpatsch/whatcd-resolver/src/Postgres/MonadPostgres.hs @@ -0,0 +1,377 @@ +{-# LANGUAGE AllowAmbiguousTypes #-} +{-# LANGUAGE FunctionalDependencies #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE TypeFamilyDependencies #-} +{-# LANGUAGE UndecidableInstances #-} + +module Postgres.MonadPostgres where + +import Control.Exception +import Control.Monad.Except +import Control.Monad.Logger.CallStack +import Control.Monad.Reader (MonadReader (ask), ReaderT (..)) +import Data.Error.Tree +import Data.Int (Int64) +import Data.Kind (Type) +import Data.List qualified as List +import Data.Typeable (Typeable) +import Database.PostgreSQL.Simple (Connection, FormatError, FromRow, Query, QueryError, ResultError, SqlError, ToRow) +import Database.PostgreSQL.Simple qualified as PG +import Database.PostgreSQL.Simple.FromRow qualified as PG +import Label +import PossehlAnalyticsPrelude +import Postgres.Decoder +import Pretty (showPretty) +import System.Exit (ExitCode (..)) +import Tool +import UnliftIO (MonadUnliftIO (withRunInIO)) +import UnliftIO.Process qualified as Process + +-- | Postgres queries/commands that can be executed within a running transaction. +-- +-- These are implemented with the @postgresql-simple@ primitives of the same name +-- and will behave the same unless othewise documented. +class Monad m => MonadPostgres (m :: Type -> Type) where + -- | Execute an INSERT, UPDATE, or other SQL query that is not expected to return results. + + -- Returns the number of rows affected. + execute :: (ToRow params, Typeable params) => Query -> params -> Transaction m (Label "numberOfRowsAffected" Natural) + + -- | Execute a multi-row INSERT, UPDATE, or other SQL query that is not expected to return results. + -- + -- Returns the number of rows affected. If the list of parameters is empty, this function will simply return 0 without issuing the query to the backend. If this is not desired, consider using the 'PG.Values' constructor instead. + executeMany :: (ToRow params, Typeable params) => Query -> [params] -> Transaction m (Label "numberOfRowsAffected" Natural) + + -- | Execute INSERT ... RETURNING, UPDATE ... RETURNING, or other SQL query that accepts multi-row input and is expected to return results. Note that it is possible to write query conn "INSERT ... RETURNING ..." ... in cases where you are only inserting a single row, and do not need functionality analogous to 'executeMany'. + -- + -- If the list of parameters is empty, this function will simply return [] without issuing the query to the backend. If this is not desired, consider using the 'PG.Values' constructor instead. + executeManyReturning :: (ToRow q, FromRow r) => Query -> [q] -> Transaction m [r] + + -- | Run a query, passing parameters and result row parser. + queryWith :: (PG.ToRow params, Typeable params, Typeable r) => PG.Query -> params -> Decoder r -> Transaction m [r] + + -- | Run a query without any parameters and result row parser. + queryWith_ :: (Typeable r) => PG.Query -> Decoder r -> Transaction m [r] + + -- | Run a query, passing parameters, and fold over the resulting rows. + -- + -- This doesn’t have to realize the full list of results in memory, + -- rather results are streamed incrementally from the database. + -- + -- When dealing with small results, it may be simpler (and perhaps faster) to use query instead. + -- + -- This fold is _not_ strict. The stream consumer is responsible for forcing the evaluation of its result to avoid space leaks. + -- + -- If you can, prefer aggregating in the database itself. + foldRows :: + (FromRow row, ToRow params, Typeable row, Typeable params) => + Query -> + params -> + a -> + (a -> row -> Transaction m a) -> + Transaction m a + + -- | Run a given transaction in a transaction block, rolling back the transaction + -- if any exception (postgres or Haskell Exception) is thrown during execution. + -- + -- Re-throws the exception. + -- + -- Don’t do any long-running things on the Haskell side during a transaction, + -- because it will block a database connection and potentially also lock + -- database tables from being written or read by other clients. + -- + -- Nonetheless, try to push transactions as far out to the handlers as possible, + -- don’t do something like @runTransaction $ query …@, because it will lead people + -- to accidentally start nested transactions (the inner transaction is run on a new connections, + -- thus can’t see any changes done by the outer transaction). + -- Only handlers should run transactions. + runTransaction :: Transaction m a -> m a + +-- | Run a query, passing parameters. +query :: forall m params r. (PG.ToRow params, PG.FromRow r, Typeable params, Typeable r, MonadPostgres m) => PG.Query -> params -> Transaction m [r] +query qry params = queryWith qry params (Decoder PG.fromRow) + +-- | Run a query without any parameters. +query_ :: forall m r. (Typeable r, PG.FromRow r, MonadPostgres m) => PG.Query -> Transaction m [r] +query_ qry = queryWith_ qry (Decoder PG.fromRow) + +-- TODO: implement via fold, so that the result doesn’t have to be realized in memory +querySingleRow :: + ( MonadPostgres m, + ToRow qParams, + Typeable qParams, + FromRow a, + Typeable a, + MonadThrow m + ) => + Query -> + qParams -> + Transaction m a +querySingleRow qry params = do + query qry params >>= ensureSingleRow + +-- TODO: implement via fold, so that the result doesn’t have to be realized in memory +querySingleRowMaybe :: + ( MonadPostgres m, + ToRow qParams, + Typeable qParams, + FromRow a, + Typeable a, + MonadThrow m + ) => + Query -> + qParams -> + Transaction m (Maybe a) +querySingleRowMaybe qry params = do + rows <- query qry params + case rows of + [] -> pure Nothing + [one] -> pure (Just one) + -- TODO: Should we MonadThrow this here? It’s really an implementation detail of MonadPostgres + -- that a database function can error out, should probably handled by the instances. + more -> throwM $ SingleRowError {numberOfRowsReturned = (List.length more)} + +ensureSingleRow :: MonadThrow m => [a] -> m a +ensureSingleRow = \case + -- TODO: Should we MonadThrow this here? It’s really an implementation detail of MonadPostgres + -- that a database function can error out, should probably handled by the instances. + [] -> throwM (SingleRowError {numberOfRowsReturned = 0}) + [one] -> pure one + more -> + throwM $ + SingleRowError + { numberOfRowsReturned = + -- TODO: this is VERY bad, because it requires to parse the full database output, even if there’s 10000000000 elements + List.length more + } + +-- | A better `query` +-- +-- Parameters are passed first, +-- then a Proxy which you should annotate with the return type of the query. +-- This way it’s right before the @SELECT@, +-- meaning it’s easy to see whether the two correspond. +-- +-- TODO: maybe replace the query function in the class with this? +queryBetter :: + ( MonadPostgres m, + ToRow params, + FromRow res, + Typeable params, + Typeable res + ) => + params -> + Proxy res -> + Query -> + Transaction m [res] +queryBetter params Proxy q = query q params + +newtype Transaction m a = Transaction {unTransaction :: (ReaderT Connection m a)} + deriving newtype + ( Functor, + Applicative, + Monad, + MonadThrow, + MonadLogger, + MonadIO, + MonadUnliftIO, + MonadTrans + ) + +runTransaction' :: Connection -> Transaction m a -> m a +runTransaction' conn transaction = runReaderT transaction.unTransaction conn + +-- | Catch any Postgres exception that gets thrown, +-- print the query that was run and the query parameters, +-- then rethrow inside an 'Error'. +handlePGException :: + forall a params m. + (ToRow params, MonadUnliftIO m, MonadLogger m, MonadTools m) => + Text -> + Query -> + -- | Depending on whether we used `format` or `formatMany`. + Either params [params] -> + IO a -> + Transaction m a +handlePGException queryType query' params io = do + withRunInIO $ \unliftIO -> + io + `catches` [ Handler $ unliftIO . logQueryException @SqlError, + Handler $ unliftIO . logQueryException @QueryError, + Handler $ unliftIO . logQueryException @ResultError, + Handler $ unliftIO . logFormatException + ] + where + -- TODO: use throwInternalError here (after pulling it into the MonadPostgres class) + throwAsError = unwrapIOError . Left . newError + throwErr err = liftIO $ throwAsError $ prettyErrorTree $ nestedMultiError "A Postgres query failed" err + logQueryException :: Exception e => e -> Transaction m a + logQueryException exc = do + formattedQuery <- case params of + Left one -> pgFormatQuery' query' one + Right many -> pgFormatQueryMany' query' many + throwErr + ( singleError [fmt|Query Type: {queryType}|] + :| [ nestedError "Exception" (exc & showPretty & newError & singleError), + nestedError "Query" (formattedQuery & newError & singleError) + ] + ) + logFormatException :: FormatError -> Transaction m a + logFormatException fe = throwErr (fe & showPretty & newError & singleError & singleton) + +pgExecute :: (ToRow params, MonadUnliftIO m, MonadLogger m, MonadTools m) => Query -> params -> Transaction m (Label "numberOfRowsAffected" Natural) +pgExecute qry params = do + conn <- Transaction ask + PG.execute conn qry params + & handlePGException "execute" qry (Left params) + >>= toNumberOfRowsAffected "pgExecute" + +pgExecute_ :: (MonadUnliftIO m, MonadLogger m, MonadTools m) => Query -> Transaction m (Label "numberOfRowsAffected" Natural) +pgExecute_ qry = do + conn <- Transaction ask + PG.execute_ conn qry + & handlePGException "execute_" qry (Left ()) + >>= toNumberOfRowsAffected "pgExecute_" + +pgExecuteMany :: (ToRow params, MonadUnliftIO m, MonadLogger m, MonadTools m) => Query -> [params] -> Transaction m (Label "numberOfRowsAffected" Natural) +pgExecuteMany qry params = + do + conn <- Transaction ask + PG.executeMany conn qry params + & handlePGException "executeMany" qry (Right params) + >>= toNumberOfRowsAffected "pgExecuteMany" + +toNumberOfRowsAffected :: MonadIO m => Text -> Int64 -> m (Label "numberOfRowsAffected" Natural) +toNumberOfRowsAffected functionName i64 = + i64 + & intToNatural + & annotate [fmt|{functionName}: postgres returned a negative number of rows affected: {i64}|] + -- we throw this directly in IO here, because we don’t want to e.g. have to propagate MonadThrow through user code (it’s an assertion) + & unwrapIOError + & liftIO + <&> label @"numberOfRowsAffected" + +pgExecuteManyReturning :: (ToRow params, FromRow r, MonadUnliftIO m, MonadLogger m, MonadTools m) => Query -> [params] -> Transaction m [r] +pgExecuteManyReturning qry params = + do + conn <- Transaction ask + PG.returning conn qry params + & handlePGException "executeManyReturning" qry (Right params) + +pgFold :: + (FromRow row, ToRow params, MonadUnliftIO m, MonadLogger m, MonadTools m) => + Query -> + params -> + a -> + (a -> row -> Transaction m a) -> + Transaction m a +pgFold qry params accumulator f = do + conn <- Transaction ask + + withRunInIO + ( \runInIO -> + do + PG.fold + conn + qry + params + accumulator + (\acc row -> runInIO $ f acc row) + & handlePGException "fold" qry (Left params) + & runInIO + ) + +pgFormatQuery :: (ToRow params, MonadIO m) => Query -> params -> Transaction m ByteString +pgFormatQuery qry params = Transaction $ do + conn <- ask + liftIO $ PG.formatQuery conn qry params + +pgFormatQueryMany :: (MonadIO m, ToRow params) => Query -> [params] -> Transaction m ByteString +pgFormatQueryMany qry params = Transaction $ do + conn <- ask + liftIO $ PG.formatMany conn qry params + +pgQueryWith :: (ToRow params, MonadUnliftIO m, MonadLogger m, MonadTools m) => Query -> params -> Decoder r -> Transaction m [r] +pgQueryWith qry params (Decoder fromRow) = do + conn <- Transaction ask + PG.queryWith fromRow conn qry params + & handlePGException "query" qry (Left params) + +pgQueryWith_ :: (MonadUnliftIO m, MonadLogger m, MonadTools m) => Query -> Decoder r -> Transaction m [r] +pgQueryWith_ qry (Decoder fromRow) = do + conn <- Transaction ask + liftIO (PG.queryWith_ fromRow conn qry) + & handlePGException "query" qry (Left ()) + +pgQuery :: (ToRow params, FromRow r, MonadUnliftIO m, MonadLogger m, MonadTools m) => Query -> params -> Transaction m [r] +pgQuery qry params = do + conn <- Transaction ask + PG.query conn qry params + & handlePGException "query" qry (Left params) + +pgQuery_ :: (FromRow r, MonadUnliftIO m, MonadLogger m, MonadTools m) => Query -> Transaction m [r] +pgQuery_ qry = do + conn <- Transaction ask + PG.query_ conn qry + & handlePGException "query_" qry (Left ()) + +data SingleRowError = SingleRowError + { -- | How many columns were actually returned by the query + numberOfRowsReturned :: Int + } + deriving stock (Show) + +instance Exception SingleRowError where + displayException (SingleRowError {..}) = [fmt|Single row expected from SQL query result, {numberOfRowsReturned} rows were returned instead."|] + +pgFormatQuery' :: (MonadIO m, ToRow params, MonadLogger m, MonadTools m) => Query -> params -> Transaction m Text +pgFormatQuery' q p = + pgFormatQuery q p + >>= lift . pgFormatQueryByteString + +pgFormatQueryMany' :: (MonadIO m, ToRow params, MonadLogger m, MonadTools m) => Query -> [params] -> Transaction m Text +pgFormatQueryMany' q p = + pgFormatQueryMany q p + >>= lift . pgFormatQueryByteString + +-- | Tools required at runtime +data Tools = Tools + { pgFormat :: Tool + } + deriving stock (Show) + +class Monad m => MonadTools m where + getTools :: m Tools + +initMonadTools :: Label "envvar" Text -> IO Tools +initMonadTools var = + Tool.readTools (label @"toolsEnvVar" var.envvar) toolParser + where + toolParser = do + pgFormat <- readTool "pg_format" + pure $ Tools {..} + +pgFormatQueryByteString :: (MonadIO m, MonadLogger m, MonadTools m) => ByteString -> m Text +pgFormatQueryByteString queryBytes = do + do + tools <- getTools + (exitCode, stdout, stderr) <- + Process.readProcessWithExitCode + tools.pgFormat.toolPath + ["-"] + (queryBytes & bytesToTextUtf8Lenient & textToString) + case exitCode of + ExitSuccess -> pure (stdout & stringToText) + ExitFailure status -> do + logWarn [fmt|pg_format failed with status {status} while formatting the query, using original query string. Is there a syntax error?|] + logDebug + ( prettyErrorTree + ( nestedMultiError + "pg_format output" + ( nestedError "stdout" (singleError (stdout & stringToText & newError)) + :| [(nestedError "stderr" (singleError (stderr & stringToText & newError)))] + ) + ) + ) + logDebug [fmt|pg_format stdout: stderr|] + pure (queryBytes & bytesToTextUtf8Lenient) diff --git a/users/Profpatsch/whatcd-resolver/src/Tool.hs b/users/Profpatsch/whatcd-resolver/src/Tool.hs new file mode 100644 index 000000000000..066f68bbe0df --- /dev/null +++ b/users/Profpatsch/whatcd-resolver/src/Tool.hs @@ -0,0 +1,75 @@ +{-# LANGUAGE QuasiQuotes #-} + +module Tool where + +import Data.Error.Tree +import Label +import PossehlAnalyticsPrelude +import System.Environment qualified as Env +import System.Exit qualified as Exit +import System.FilePath (()) +import System.Posix qualified as Posix +import ValidationParseT + +data Tool = Tool + { -- | absolute path to the executable + toolPath :: FilePath + } + deriving stock (Show) + +-- | Reads all tools from the @toolsEnvVar@ variable or aborts. +readTools :: + Label "toolsEnvVar" Text -> + -- | Parser for Tools we bring with us at build time. + -- + -- These are executables that we need available, and that we have to ship with the distribution of @pa-cli@. + ToolParserT IO tools -> + IO tools +readTools env toolParser = + Env.lookupEnv (env.toolsEnvVar & textToString) >>= \case + Nothing -> do + Exit.die [fmt|Please set {env.toolsEnvVar} to a directory with all tools we need (see `Tools` in the code).|] + Just toolsDir -> + (Posix.fileExist toolsDir & ifTrueOrErr () [fmt|{env.toolsEnvVar} directory does not exist: {toolsDir}|]) + & thenValidate + ( \() -> + (Posix.getFileStatus toolsDir <&> Posix.isDirectory) + & ifTrueOrErr () [fmt|{env.toolsEnvVar} does not point to a directory: {toolsDir}|] + ) + & thenValidate + (\() -> toolParser.unToolParser toolsDir) + <&> first (errorTree [fmt|Could not find all tools in {env.toolsEnvVar}|]) + >>= \case + Failure err -> Exit.die (err & prettyErrorTree & textToString) + Success t -> pure t + +newtype ToolParserT m a = ToolParserT + { unToolParser :: + FilePath -> + m (Validation (NonEmpty Error) a) + } + deriving + (Functor, Applicative) + via (ValidationParseT FilePath m) + +-- | Given a file path and the name of the tool executable, see whether it is an executable and return its full path. +readTool :: Text -> ToolParserT IO Tool +readTool exeName = ToolParserT $ \toolDir -> do + let toolPath :: FilePath = toolDir (exeName & textToString) + let read' = True + let write = False + let exec = True + Posix.fileExist toolPath + & ifTrueOrErr () [fmt|Tool does not exist: {toolPath}|] + & thenValidate + ( \() -> + Posix.fileAccess toolPath read' write exec + & ifTrueOrErr (Tool {..}) [fmt|Tool is not readable/executable: {toolPath}|] + ) + +-- | helper +ifTrueOrErr :: Functor f => a -> Text -> f Bool -> f (Validation (NonEmpty Error) a) +ifTrueOrErr true err io = + io <&> \case + True -> Success true + False -> Failure $ singleton $ newError err diff --git a/users/Profpatsch/whatcd-resolver/src/ValidationParseT.hs b/users/Profpatsch/whatcd-resolver/src/ValidationParseT.hs new file mode 100644 index 000000000000..62322a0ac0bc --- /dev/null +++ b/users/Profpatsch/whatcd-resolver/src/ValidationParseT.hs @@ -0,0 +1,15 @@ +module ValidationParseT where + +import Data.Functor.Compose (Compose (..)) +import PossehlAnalyticsPrelude + +-- | A simple way to create an Applicative parser that parses from some environment. +-- +-- Use with DerivingVia. Grep codebase for examples. +newtype ValidationParseT env m a = ValidationParseT {unValidationParseT :: env -> m (Validation (NonEmpty Error) a)} + deriving + (Functor, Applicative) + via ( Compose + ((->) env) + (Compose m (Validation (NonEmpty Error))) + ) diff --git a/users/Profpatsch/whatcd-resolver/src/WhatcdResolver.hs b/users/Profpatsch/whatcd-resolver/src/WhatcdResolver.hs new file mode 100644 index 000000000000..6fe8f8c77f6c --- /dev/null +++ b/users/Profpatsch/whatcd-resolver/src/WhatcdResolver.hs @@ -0,0 +1,303 @@ +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE QuasiQuotes #-} + +module WhatcdResolver where + +import Control.Monad.Logger qualified as Logger +import Control.Monad.Logger.CallStack +import Control.Monad.Reader +import Data.Aeson qualified as Json +import Data.Aeson.BetterErrors qualified as Json +import Data.Aeson.KeyMap qualified as KeyMap +import Data.Error.Tree +import Data.List.NonEmpty qualified as NonEmpty +import Data.Map.Strict qualified as Map +import Data.Pool (Pool) +import Data.Pool qualified as Pool +import Data.Text qualified as Text +import Database.PostgreSQL.Simple qualified as Postgres +import Database.PostgreSQL.Simple.Types qualified as Postgres +import Database.Postgres.Temp qualified as TmpPg +import FieldParser qualified as Field +import Json qualified +import Json.Enc (Enc) +import Json.Enc qualified as Enc +import Label +import Network.HTTP.Simple qualified as Http +import Network.HTTP.Types +import PossehlAnalyticsPrelude +import Postgres.Decoder qualified as Dec +import Postgres.MonadPostgres +import Pretty +import System.Directory qualified as Dir +import System.Directory qualified as Xdg +import System.FilePath (()) +import System.IO qualified as IO +import UnliftIO + +data TransmissionRequest = TransmissionRequest + { method :: Text, + arguments :: Map Text Enc, + tag :: Maybe Int + } + deriving stock (Show) + +requestListAllTorrents = + TransmissionRequest + { method = "torrent-get", + arguments = + Map.fromList + [ ("fields", Enc.list Enc.text ["id", "name"]) + ], + tag = Nothing + } + +data TransmissionResponse = TransmissionResponse + { result :: TransmissionResponseStatus, + arguments :: Map Text Json.Value, + tag :: Maybe Int + } + deriving stock (Show) + +data TransmissionResponseStatus + = TransmissionResponseSuccess + | TransmissionResponseFailure Text + deriving stock (Show) + +doTransmissionRequest :: + ( MonadIO m, + MonadTransmission m, + HasField "host" t1 Text, + HasField "port" t1 Text, + MonadThrow m + ) => + t1 -> + TransmissionRequest -> + m TransmissionResponse +doTransmissionRequest dat req = do + sessionId <- getTransmissionId + let httpReq = + [fmt|http://{dat.host}:{dat.port}/transmission/rpc|] + & Http.setRequestMethod "POST" + & Http.setRequestBodyLBS + ( Enc.encToBytesUtf8Lazy $ + Enc.object + ( [ ("method", req.method & Enc.text), + ("arguments", Enc.map id req.arguments) + ] + <> (req.tag & maybe [] (\t -> [("tag", t & Enc.int)])) + ) + ) + & (sessionId & maybe id (Http.setRequestHeader "X-Transmission-Session-Id" . (: []))) + resp <- Http.httpBS httpReq + -- Implement the CSRF protection thingy + case resp & Http.getResponseStatus & (.statusCode) of + 409 -> do + tid <- + resp + & Http.getResponseHeader "X-Transmission-Session-Id" + & nonEmpty + & annotate [fmt|Missing "X-Transmission-Session-Id" header in 409 response: {showPretty resp}|] + & unwrapIOError + & liftIO + <&> NonEmpty.head + setTransmissionId tid + doTransmissionRequest dat req + 200 -> + resp + & Http.getResponseBody + & Json.parseStrict + ( Json.mapError singleError $ do + result <- + Json.key "result" Json.asText <&> \case + "success" -> TransmissionResponseSuccess + err -> TransmissionResponseFailure err + arguments <- + Json.keyMay "arguments" Json.asObject + <&> fromMaybe mempty + <&> KeyMap.toMapText + tag <- + Json.keyMay + "tag" + (Field.jsonParser (Field.jsonNumber >>> Field.boundedScientificIntegral "tag too long")) + pure TransmissionResponse {..} + ) + & first (Json.parseErrorTree "Cannot parse transmission RPC response") + & \case + Right a -> pure a + Left err -> appThrowTree err + _ -> liftIO $ unwrapIOError $ Left [fmt|Non-200 response: {showPretty resp}|] + +runAppWith :: AppT IO a -> IO (Either TmpPg.StartError a) +runAppWith appT = withDb $ \db -> do + tools <- initMonadTools (label @"envvar" "WHATCD_RESOLVER_TOOLS") + let config = label @"logDatabaseQueries" LogDatabaseQueries + pgConnPool <- + Pool.createPool + (Postgres.connectPostgreSQL (db & TmpPg.toConnectionString)) + Postgres.close + {- number of stripes -} 5 + {- unusedResourceOpenTime -} 10 + {- max resources per stripe -} 10 + transmissionSessionId <- newEmptyMVar + runReaderT appT.unAppT Context {..} + +withDb :: (TmpPg.DB -> IO a) -> IO (Either TmpPg.StartError a) +withDb act = do + dataDir <- Xdg.getXdgDirectory Xdg.XdgData "whatcd-resolver" + let databaseDir = dataDir "database" + initDbConfig <- + Dir.doesDirectoryExist databaseDir >>= \case + True -> pure TmpPg.Zlich + False -> do + putStderrLn [fmt|Database does not exist yet, creating in "{databaseDir}"|] + Dir.createDirectoryIfMissing True databaseDir + pure TmpPg.DontCare + let cfg = + mempty + { TmpPg.dataDirectory = TmpPg.Permanent (databaseDir), + TmpPg.initDbConfig + } + TmpPg.withConfig cfg $ \db -> do + -- print [fmt|data dir: {db & TmpPg.toDataDirectory}|] + -- print [fmt|conn string: {db & TmpPg.toConnectionString}|] + act db + +data Context = Context + { config :: Label "logDatabaseQueries" DatabaseLogging, + tools :: Tools, + pgConnPool :: Pool Postgres.Connection, + transmissionSessionId :: MVar ByteString + } + +newtype AppT m a = AppT {unAppT :: ReaderT Context m a} + deriving newtype (Functor, Applicative, Monad, MonadIO, MonadUnliftIO, MonadThrow) + +data AppException = AppException Text + deriving stock (Show) + deriving anyclass (Exception) + +appThrowTree :: MonadThrow m => ErrorTree -> m a +appThrowTree exc = throwM $ AppException $ prettyErrorTree exc + +instance MonadIO m => MonadLogger (AppT m) where + monadLoggerLog loc src lvl msg = liftIO $ Logger.defaultOutput IO.stderr loc src lvl (Logger.toLogStr msg) + +instance Monad m => MonadTools (AppT m) where + getTools = AppT $ asks (.tools) + +class MonadTransmission m where + getTransmissionId :: m (Maybe ByteString) + setTransmissionId :: ByteString -> m () + +instance (MonadIO m) => MonadTransmission (AppT m) where + getTransmissionId = AppT (asks (.transmissionSessionId)) >>= tryTakeMVar + setTransmissionId t = do + var <- AppT $ asks (.transmissionSessionId) + putMVar var t + +instance (MonadThrow m, MonadUnliftIO m) => MonadPostgres (AppT m) where + execute qry params = do + conf <- lift $ AppT (asks (.config)) + logQueryIfEnabled conf qry (Left params) + pgExecute qry params + executeMany qry params = do + conf <- lift $ AppT (asks (.config)) + logQueryIfEnabled conf qry (Right params) + pgExecuteMany qry params + executeManyReturning qry params = do + conf <- lift $ AppT (asks (.config)) + logQueryIfEnabled conf qry (Right params) + pgExecuteManyReturning qry params + + queryWith qry params decoder = do + conf <- lift $ AppT (asks (.config)) + logQueryIfEnabled conf qry (Left params) + pgQueryWith qry params decoder + + -- TODO: log these queries as well with `logQueryIfEnabled`, but test out whether it works with query_ and foldRows first. + queryWith_ = pgQueryWith_ + foldRows = pgFold + + runTransaction = runPGTransaction + +runPGTransaction :: MonadUnliftIO m => Transaction (AppT m) a -> AppT m a +runPGTransaction (Transaction transaction) = do + pool <- AppT ask <&> (.pgConnPool) + withRunInIO $ \unliftIO -> + withPGTransaction pool $ \conn -> do + unliftIO $ runReaderT transaction conn + +-- | Perform a Postgres action within a transaction +withPGTransaction :: + -- | Postgres connection pool to be used for the action + Pool Postgres.Connection -> + -- | DB-action to be performed + (Postgres.Connection -> IO a) -> + -- | Result of the DB-action + IO a +withPGTransaction connPool f = + Pool.withResource + connPool + (\conn -> Postgres.withTransaction conn (f conn)) + +-- | Log the postgres query depending on the setting of @config.debugInfo.logDatabaseQueries@. +logQueryIfEnabled :: + ( Postgres.ToRow params, + MonadUnliftIO m, + MonadLogger m, + MonadTools m, + HasField "logDatabaseQueries" config DatabaseLogging + ) => + config -> + Postgres.Query -> + Either params [params] -> + Transaction m () +logQueryIfEnabled config qry params = do + -- In case we have query logging enabled, we want to do that + let formattedQuery = case params of + Left p -> pgFormatQuery' qry p + Right ps -> pgFormatQueryMany' qry ps + + let doLog errs = + errs + & nestedMultiError "Postgres query" + & prettyErrorTree + & logDebug + & lift + let addQuery = do + formattedQuery + <&> newError + <&> singleError + let addExplain = do + q <- formattedQuery + pgQueryWith_ + ( "EXPLAIN " + <> ( + -- TODO: this is not nice, but the only way to get the `executeMany` form to work with this + -- because we need the query with all elements already interpolated. + Postgres.Query (q & textToBytesUtf8) + ) + ) + (Dec.fromField @Text) + <&> Text.intercalate "\n" + <&> newError + <&> singleError + + case config.logDatabaseQueries of + DontLogDatabaseQueries -> pure () + LogDatabaseQueries -> do + aq <- addQuery + doLog (aq :| []) + LogDatabaseQueriesAndExplain -> do + aq <- addQuery + -- XXX: stuff like `CREATE SCHEMA` cannot be EXPLAINed, so we should catch exceptions here + -- and just ignore anything that errors (if it errors because of a problem with the query, it would have been caught by the query itself. + ex <- addExplain + doLog (nestedError "Query" aq :| [nestedError "Explain" ex]) + +data DatabaseLogging + = DontLogDatabaseQueries + | LogDatabaseQueries + | LogDatabaseQueriesAndExplain diff --git a/users/Profpatsch/whatcd-resolver/whatcd-resolver.cabal b/users/Profpatsch/whatcd-resolver/whatcd-resolver.cabal new file mode 100644 index 000000000000..6d8fd5bd52f8 --- /dev/null +++ b/users/Profpatsch/whatcd-resolver/whatcd-resolver.cabal @@ -0,0 +1,90 @@ +cabal-version: 3.0 +name: whatcd-resolver +version: 0.1.0.0 +author: Profpatsch +maintainer: mail@profpatsch.de + +common common-options + ghc-options: + -Wall + -Wno-type-defaults + -Wunused-packages + -Wredundant-constraints + -fwarn-missing-deriving-strategies + + -- See https://downloads.haskell.org/ghc/latest/docs/users_guide/exts.html + -- for a description of all these extensions + default-extensions: + -- Infer Applicative instead of Monad where possible + ApplicativeDo + + -- Allow literal strings to be Text + OverloadedStrings + + -- Syntactic sugar improvements + LambdaCase + MultiWayIf + + -- Makes the (deprecated) usage of * instead of Data.Kind.Type an error + NoStarIsType + + -- Convenient and crucial to deal with ambiguous field names, commonly + -- known as RecordDotSyntax + OverloadedRecordDot + + -- does not export record fields as functions, use OverloadedRecordDot to access instead + NoFieldSelectors + + -- Record punning + RecordWildCards + + -- Improved Deriving + DerivingStrategies + DerivingVia + + -- Type-level strings + DataKinds + + -- to enable the `type` keyword in import lists (ormolu uses this automatically) + ExplicitNamespaces + + default-language: GHC2021 + + +library + import: common-options + + hs-source-dirs: src + + exposed-modules: + WhatcdResolver + Postgres.Decoder + Postgres.MonadPostgres + Tool + ValidationParseT + + build-depends: + base >=4.15 && <5, + text, + pa-prelude, + pa-error-tree, + pa-label, + pa-json, + pa-field-parser, + containers, + pa-pretty, + tmp-postgres, + directory, + filepath, + aeson, + aeson-better-errors, + postgresql-simple, + resource-pool, + http-conduit, + http-types, + mtl, + transformers, + unliftio, + monad-logger, + unix, + -- cgit 1.4.1