diff options
author | Profpatsch <mail@profpatsch.de> | 2023-06-19T13·56+0200 |
---|---|---|
committer | Profpatsch <mail@profpatsch.de> | 2023-07-14T08·03+0000 |
commit | 07b976ccd89cc8a4b5d0624ee2223d64add4615d (patch) | |
tree | 559db5d16d23cbb7f576448fccccb9a5f84e85e7 /users/Profpatsch/whatcd-resolver/src/Postgres | |
parent | 5daa31db3ba0a7dbe2a5f8ef01b024deb61fcc3e (diff) |
feat(users/Profpatsch): init whatcd-resolver r/6413
Change-Id: Ieb377fb8caa60e716703153dfeca5173f9a6779d Reviewed-on: https://cl.tvl.fyi/c/depot/+/8830 Reviewed-by: Profpatsch <mail@profpatsch.de> Tested-by: BuildkiteCI
Diffstat (limited to 'users/Profpatsch/whatcd-resolver/src/Postgres')
-rw-r--r-- | users/Profpatsch/whatcd-resolver/src/Postgres/Decoder.hs | 58 | ||||
-rw-r--r-- | users/Profpatsch/whatcd-resolver/src/Postgres/MonadPostgres.hs | 377 |
2 files changed, 435 insertions, 0 deletions
diff --git a/users/Profpatsch/whatcd-resolver/src/Postgres/Decoder.hs b/users/Profpatsch/whatcd-resolver/src/Postgres/Decoder.hs new file mode 100644 index 000000000000..2e7fcb8779ed --- /dev/null +++ b/users/Profpatsch/whatcd-resolver/src/Postgres/Decoder.hs @@ -0,0 +1,58 @@ +module Postgres.Decoder where + +import Control.Applicative (Alternative) +import Data.Aeson qualified as Json +import Data.Aeson.BetterErrors qualified as Json +import Data.Error.Tree +import Data.Typeable (Typeable) +import Database.PostgreSQL.Simple.FromField qualified as PG +import Database.PostgreSQL.Simple.FromRow qualified as PG +import Json qualified +import Label +import PossehlAnalyticsPrelude + +-- | A Decoder of postgres values. Allows embedding more complex parsers (like a 'Json.ParseT'). +newtype Decoder a = Decoder (PG.RowParser a) + deriving newtype (Functor, Applicative, Alternative, Monad) + +-- | Turn any type that implements 'PG.fromField' into a 'Decoder'. Use type applications to prevent accidental conversions: +-- +-- @ +-- fromField @Text :: Decoder Text +-- @ +fromField :: PG.FromField a => Decoder a +fromField = Decoder $ PG.fieldWith PG.fromField + +-- | Turn any type that implements 'PG.fromField' into a 'Decoder' and wrap the result into the given 'Label'. Use type applications to prevent accidental conversions: +-- +-- @ +-- fromField @"myField" @Text :: Decoder (Label "myField" Text) +-- @ +fromFieldLabel :: forall lbl a. PG.FromField a => Decoder (Label lbl a) +fromFieldLabel = label @lbl <$> fromField + +-- | Parse fields out of a json value returned from the database. +-- +-- ATTN: The whole json record has to be transferred before it is parsed, +-- so if you only need a tiny bit of it, use `->` and `->>` in your SQL statement +-- and return only the fields you need from the query. +-- +-- In that case pay attention to NULL though: +-- +-- @ +-- SELECT '{"foo": {}}'::jsonb->>'foo' IS NULL +-- → TRUE +-- @ +-- +-- Also note: `->>` will coerce the json value to @text@, regardless of the content. +-- So the JSON object @{"foo": {}}"@ would be returned as the text: @"{\"foo\": {}}"@. +json :: Typeable a => Json.ParseT ErrorTree Identity a -> Decoder a +json parser = Decoder $ PG.fieldWith $ \field bytes -> do + val <- PG.fromField @Json.Value field bytes + case Json.parseValue parser val of + Left err -> + PG.returnError + PG.ConversionFailed + field + (err & Json.parseErrorTree "Cannot decode jsonb column" & prettyErrorTree & textToString) + Right a -> pure a diff --git a/users/Profpatsch/whatcd-resolver/src/Postgres/MonadPostgres.hs b/users/Profpatsch/whatcd-resolver/src/Postgres/MonadPostgres.hs new file mode 100644 index 000000000000..9911f260a2e9 --- /dev/null +++ b/users/Profpatsch/whatcd-resolver/src/Postgres/MonadPostgres.hs @@ -0,0 +1,377 @@ +{-# LANGUAGE AllowAmbiguousTypes #-} +{-# LANGUAGE FunctionalDependencies #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE TypeFamilyDependencies #-} +{-# LANGUAGE UndecidableInstances #-} + +module Postgres.MonadPostgres where + +import Control.Exception +import Control.Monad.Except +import Control.Monad.Logger.CallStack +import Control.Monad.Reader (MonadReader (ask), ReaderT (..)) +import Data.Error.Tree +import Data.Int (Int64) +import Data.Kind (Type) +import Data.List qualified as List +import Data.Typeable (Typeable) +import Database.PostgreSQL.Simple (Connection, FormatError, FromRow, Query, QueryError, ResultError, SqlError, ToRow) +import Database.PostgreSQL.Simple qualified as PG +import Database.PostgreSQL.Simple.FromRow qualified as PG +import Label +import PossehlAnalyticsPrelude +import Postgres.Decoder +import Pretty (showPretty) +import System.Exit (ExitCode (..)) +import Tool +import UnliftIO (MonadUnliftIO (withRunInIO)) +import UnliftIO.Process qualified as Process + +-- | Postgres queries/commands that can be executed within a running transaction. +-- +-- These are implemented with the @postgresql-simple@ primitives of the same name +-- and will behave the same unless othewise documented. +class Monad m => MonadPostgres (m :: Type -> Type) where + -- | Execute an INSERT, UPDATE, or other SQL query that is not expected to return results. + + -- Returns the number of rows affected. + execute :: (ToRow params, Typeable params) => Query -> params -> Transaction m (Label "numberOfRowsAffected" Natural) + + -- | Execute a multi-row INSERT, UPDATE, or other SQL query that is not expected to return results. + -- + -- Returns the number of rows affected. If the list of parameters is empty, this function will simply return 0 without issuing the query to the backend. If this is not desired, consider using the 'PG.Values' constructor instead. + executeMany :: (ToRow params, Typeable params) => Query -> [params] -> Transaction m (Label "numberOfRowsAffected" Natural) + + -- | Execute INSERT ... RETURNING, UPDATE ... RETURNING, or other SQL query that accepts multi-row input and is expected to return results. Note that it is possible to write query conn "INSERT ... RETURNING ..." ... in cases where you are only inserting a single row, and do not need functionality analogous to 'executeMany'. + -- + -- If the list of parameters is empty, this function will simply return [] without issuing the query to the backend. If this is not desired, consider using the 'PG.Values' constructor instead. + executeManyReturning :: (ToRow q, FromRow r) => Query -> [q] -> Transaction m [r] + + -- | Run a query, passing parameters and result row parser. + queryWith :: (PG.ToRow params, Typeable params, Typeable r) => PG.Query -> params -> Decoder r -> Transaction m [r] + + -- | Run a query without any parameters and result row parser. + queryWith_ :: (Typeable r) => PG.Query -> Decoder r -> Transaction m [r] + + -- | Run a query, passing parameters, and fold over the resulting rows. + -- + -- This doesn’t have to realize the full list of results in memory, + -- rather results are streamed incrementally from the database. + -- + -- When dealing with small results, it may be simpler (and perhaps faster) to use query instead. + -- + -- This fold is _not_ strict. The stream consumer is responsible for forcing the evaluation of its result to avoid space leaks. + -- + -- If you can, prefer aggregating in the database itself. + foldRows :: + (FromRow row, ToRow params, Typeable row, Typeable params) => + Query -> + params -> + a -> + (a -> row -> Transaction m a) -> + Transaction m a + + -- | Run a given transaction in a transaction block, rolling back the transaction + -- if any exception (postgres or Haskell Exception) is thrown during execution. + -- + -- Re-throws the exception. + -- + -- Don’t do any long-running things on the Haskell side during a transaction, + -- because it will block a database connection and potentially also lock + -- database tables from being written or read by other clients. + -- + -- Nonetheless, try to push transactions as far out to the handlers as possible, + -- don’t do something like @runTransaction $ query …@, because it will lead people + -- to accidentally start nested transactions (the inner transaction is run on a new connections, + -- thus can’t see any changes done by the outer transaction). + -- Only handlers should run transactions. + runTransaction :: Transaction m a -> m a + +-- | Run a query, passing parameters. +query :: forall m params r. (PG.ToRow params, PG.FromRow r, Typeable params, Typeable r, MonadPostgres m) => PG.Query -> params -> Transaction m [r] +query qry params = queryWith qry params (Decoder PG.fromRow) + +-- | Run a query without any parameters. +query_ :: forall m r. (Typeable r, PG.FromRow r, MonadPostgres m) => PG.Query -> Transaction m [r] +query_ qry = queryWith_ qry (Decoder PG.fromRow) + +-- TODO: implement via fold, so that the result doesn’t have to be realized in memory +querySingleRow :: + ( MonadPostgres m, + ToRow qParams, + Typeable qParams, + FromRow a, + Typeable a, + MonadThrow m + ) => + Query -> + qParams -> + Transaction m a +querySingleRow qry params = do + query qry params >>= ensureSingleRow + +-- TODO: implement via fold, so that the result doesn’t have to be realized in memory +querySingleRowMaybe :: + ( MonadPostgres m, + ToRow qParams, + Typeable qParams, + FromRow a, + Typeable a, + MonadThrow m + ) => + Query -> + qParams -> + Transaction m (Maybe a) +querySingleRowMaybe qry params = do + rows <- query qry params + case rows of + [] -> pure Nothing + [one] -> pure (Just one) + -- TODO: Should we MonadThrow this here? It’s really an implementation detail of MonadPostgres + -- that a database function can error out, should probably handled by the instances. + more -> throwM $ SingleRowError {numberOfRowsReturned = (List.length more)} + +ensureSingleRow :: MonadThrow m => [a] -> m a +ensureSingleRow = \case + -- TODO: Should we MonadThrow this here? It’s really an implementation detail of MonadPostgres + -- that a database function can error out, should probably handled by the instances. + [] -> throwM (SingleRowError {numberOfRowsReturned = 0}) + [one] -> pure one + more -> + throwM $ + SingleRowError + { numberOfRowsReturned = + -- TODO: this is VERY bad, because it requires to parse the full database output, even if there’s 10000000000 elements + List.length more + } + +-- | A better `query` +-- +-- Parameters are passed first, +-- then a Proxy which you should annotate with the return type of the query. +-- This way it’s right before the @SELECT@, +-- meaning it’s easy to see whether the two correspond. +-- +-- TODO: maybe replace the query function in the class with this? +queryBetter :: + ( MonadPostgres m, + ToRow params, + FromRow res, + Typeable params, + Typeable res + ) => + params -> + Proxy res -> + Query -> + Transaction m [res] +queryBetter params Proxy q = query q params + +newtype Transaction m a = Transaction {unTransaction :: (ReaderT Connection m a)} + deriving newtype + ( Functor, + Applicative, + Monad, + MonadThrow, + MonadLogger, + MonadIO, + MonadUnliftIO, + MonadTrans + ) + +runTransaction' :: Connection -> Transaction m a -> m a +runTransaction' conn transaction = runReaderT transaction.unTransaction conn + +-- | Catch any Postgres exception that gets thrown, +-- print the query that was run and the query parameters, +-- then rethrow inside an 'Error'. +handlePGException :: + forall a params m. + (ToRow params, MonadUnliftIO m, MonadLogger m, MonadTools m) => + Text -> + Query -> + -- | Depending on whether we used `format` or `formatMany`. + Either params [params] -> + IO a -> + Transaction m a +handlePGException queryType query' params io = do + withRunInIO $ \unliftIO -> + io + `catches` [ Handler $ unliftIO . logQueryException @SqlError, + Handler $ unliftIO . logQueryException @QueryError, + Handler $ unliftIO . logQueryException @ResultError, + Handler $ unliftIO . logFormatException + ] + where + -- TODO: use throwInternalError here (after pulling it into the MonadPostgres class) + throwAsError = unwrapIOError . Left . newError + throwErr err = liftIO $ throwAsError $ prettyErrorTree $ nestedMultiError "A Postgres query failed" err + logQueryException :: Exception e => e -> Transaction m a + logQueryException exc = do + formattedQuery <- case params of + Left one -> pgFormatQuery' query' one + Right many -> pgFormatQueryMany' query' many + throwErr + ( singleError [fmt|Query Type: {queryType}|] + :| [ nestedError "Exception" (exc & showPretty & newError & singleError), + nestedError "Query" (formattedQuery & newError & singleError) + ] + ) + logFormatException :: FormatError -> Transaction m a + logFormatException fe = throwErr (fe & showPretty & newError & singleError & singleton) + +pgExecute :: (ToRow params, MonadUnliftIO m, MonadLogger m, MonadTools m) => Query -> params -> Transaction m (Label "numberOfRowsAffected" Natural) +pgExecute qry params = do + conn <- Transaction ask + PG.execute conn qry params + & handlePGException "execute" qry (Left params) + >>= toNumberOfRowsAffected "pgExecute" + +pgExecute_ :: (MonadUnliftIO m, MonadLogger m, MonadTools m) => Query -> Transaction m (Label "numberOfRowsAffected" Natural) +pgExecute_ qry = do + conn <- Transaction ask + PG.execute_ conn qry + & handlePGException "execute_" qry (Left ()) + >>= toNumberOfRowsAffected "pgExecute_" + +pgExecuteMany :: (ToRow params, MonadUnliftIO m, MonadLogger m, MonadTools m) => Query -> [params] -> Transaction m (Label "numberOfRowsAffected" Natural) +pgExecuteMany qry params = + do + conn <- Transaction ask + PG.executeMany conn qry params + & handlePGException "executeMany" qry (Right params) + >>= toNumberOfRowsAffected "pgExecuteMany" + +toNumberOfRowsAffected :: MonadIO m => Text -> Int64 -> m (Label "numberOfRowsAffected" Natural) +toNumberOfRowsAffected functionName i64 = + i64 + & intToNatural + & annotate [fmt|{functionName}: postgres returned a negative number of rows affected: {i64}|] + -- we throw this directly in IO here, because we don’t want to e.g. have to propagate MonadThrow through user code (it’s an assertion) + & unwrapIOError + & liftIO + <&> label @"numberOfRowsAffected" + +pgExecuteManyReturning :: (ToRow params, FromRow r, MonadUnliftIO m, MonadLogger m, MonadTools m) => Query -> [params] -> Transaction m [r] +pgExecuteManyReturning qry params = + do + conn <- Transaction ask + PG.returning conn qry params + & handlePGException "executeManyReturning" qry (Right params) + +pgFold :: + (FromRow row, ToRow params, MonadUnliftIO m, MonadLogger m, MonadTools m) => + Query -> + params -> + a -> + (a -> row -> Transaction m a) -> + Transaction m a +pgFold qry params accumulator f = do + conn <- Transaction ask + + withRunInIO + ( \runInIO -> + do + PG.fold + conn + qry + params + accumulator + (\acc row -> runInIO $ f acc row) + & handlePGException "fold" qry (Left params) + & runInIO + ) + +pgFormatQuery :: (ToRow params, MonadIO m) => Query -> params -> Transaction m ByteString +pgFormatQuery qry params = Transaction $ do + conn <- ask + liftIO $ PG.formatQuery conn qry params + +pgFormatQueryMany :: (MonadIO m, ToRow params) => Query -> [params] -> Transaction m ByteString +pgFormatQueryMany qry params = Transaction $ do + conn <- ask + liftIO $ PG.formatMany conn qry params + +pgQueryWith :: (ToRow params, MonadUnliftIO m, MonadLogger m, MonadTools m) => Query -> params -> Decoder r -> Transaction m [r] +pgQueryWith qry params (Decoder fromRow) = do + conn <- Transaction ask + PG.queryWith fromRow conn qry params + & handlePGException "query" qry (Left params) + +pgQueryWith_ :: (MonadUnliftIO m, MonadLogger m, MonadTools m) => Query -> Decoder r -> Transaction m [r] +pgQueryWith_ qry (Decoder fromRow) = do + conn <- Transaction ask + liftIO (PG.queryWith_ fromRow conn qry) + & handlePGException "query" qry (Left ()) + +pgQuery :: (ToRow params, FromRow r, MonadUnliftIO m, MonadLogger m, MonadTools m) => Query -> params -> Transaction m [r] +pgQuery qry params = do + conn <- Transaction ask + PG.query conn qry params + & handlePGException "query" qry (Left params) + +pgQuery_ :: (FromRow r, MonadUnliftIO m, MonadLogger m, MonadTools m) => Query -> Transaction m [r] +pgQuery_ qry = do + conn <- Transaction ask + PG.query_ conn qry + & handlePGException "query_" qry (Left ()) + +data SingleRowError = SingleRowError + { -- | How many columns were actually returned by the query + numberOfRowsReturned :: Int + } + deriving stock (Show) + +instance Exception SingleRowError where + displayException (SingleRowError {..}) = [fmt|Single row expected from SQL query result, {numberOfRowsReturned} rows were returned instead."|] + +pgFormatQuery' :: (MonadIO m, ToRow params, MonadLogger m, MonadTools m) => Query -> params -> Transaction m Text +pgFormatQuery' q p = + pgFormatQuery q p + >>= lift . pgFormatQueryByteString + +pgFormatQueryMany' :: (MonadIO m, ToRow params, MonadLogger m, MonadTools m) => Query -> [params] -> Transaction m Text +pgFormatQueryMany' q p = + pgFormatQueryMany q p + >>= lift . pgFormatQueryByteString + +-- | Tools required at runtime +data Tools = Tools + { pgFormat :: Tool + } + deriving stock (Show) + +class Monad m => MonadTools m where + getTools :: m Tools + +initMonadTools :: Label "envvar" Text -> IO Tools +initMonadTools var = + Tool.readTools (label @"toolsEnvVar" var.envvar) toolParser + where + toolParser = do + pgFormat <- readTool "pg_format" + pure $ Tools {..} + +pgFormatQueryByteString :: (MonadIO m, MonadLogger m, MonadTools m) => ByteString -> m Text +pgFormatQueryByteString queryBytes = do + do + tools <- getTools + (exitCode, stdout, stderr) <- + Process.readProcessWithExitCode + tools.pgFormat.toolPath + ["-"] + (queryBytes & bytesToTextUtf8Lenient & textToString) + case exitCode of + ExitSuccess -> pure (stdout & stringToText) + ExitFailure status -> do + logWarn [fmt|pg_format failed with status {status} while formatting the query, using original query string. Is there a syntax error?|] + logDebug + ( prettyErrorTree + ( nestedMultiError + "pg_format output" + ( nestedError "stdout" (singleError (stdout & stringToText & newError)) + :| [(nestedError "stderr" (singleError (stderr & stringToText & newError)))] + ) + ) + ) + logDebug [fmt|pg_format stdout: stderr|] + pure (queryBytes & bytesToTextUtf8Lenient) |