~fgaz/orchid

3aea6d0b40b1660ea4034682b511c3f3bf4c6a95 — Francesco Gazzetta 1 year, 25 days ago d4e4ecc
Don't use the filesystem as a poor man's database

...instead use JSON as a poor man's database :D
A cabal.project => cabal.project +1 -0
@@ 0,0 1,1 @@
packages: orchid.cabal bimap-many

M default.nix => default.nix +3 -1
@@ 5,5 5,7 @@
let
  src = pkgs.nix-gitignore.gitignoreSource [".git"] ./.;
  pkg = pkgs.haskell.lib.generateOptparseApplicativeCompletions ["orchid"]
        (pkgs.haskellPackages.callCabal2nix "orchid" src {});
        (pkgs.haskellPackages.callCabal2nix "orchid" src {
          bimap-many = pkgs.haskellPackages.callCabal2nix "bimap-many" ./bimap-many {};
        });
in if onlyExe then pkgs.haskell.lib.justStaticExecutables pkg else pkg

M orchid.cabal => orchid.cabal +13 -6
@@ 26,8 26,7 @@ common common
                         || ^>=4.14
                         || ^>=4.15
                         || ^>=4.16
                     , optparse-applicative ^>= 0.16.0.0
                                         || ^>= 0.17.0.0
                         || ^>=4.17

library
  import:              common


@@ 36,10 35,12 @@ library
                     , BaseContext
                     , Command
                     , Command.Pin
                     , Command.Pin.Db
                     , Command.Pin.Options
  other-modules:       Util
                     , Util.Nix
  -- other-extensions:
  other-extensions:    GeneralizedNewtypeDeriving
                       NamedFieldPuns
  build-depends:       containers ^>=0.6.0.1
                     , bytestring ^>= 0.10.10
                               || ^>= 0.11


@@ 47,13 48,19 @@ library
                     , filepath ^>= 1.4.2
                     , directory ^>= 1.3.6
                     , process ^>= 1.6.9
                     , typed-process ^>=0.2.10.1
                                  || ^>= 0.2.11.0
                     , time ^>= 1.9.3
                         || ^>= 1.10
                         || ^>= 1.11
                         || ^>= 1.12.1
                     , cryptohash-sha256 ^>= 0.11.101
                     , unix ^>= 2.7.2
                     , mtl ^>= 2.3
                     , mtl ^>= 2.2
                        || ^>= 2.3
                     , bimap-many
                     , optparse-applicative ^>= 0.16.0.0
                                         || ^>= 0.17.0.0
                     , aeson ^>= 2.0
                          || ^>= 2.1
  hs-source-dirs:      src

executable orchid

M src/BaseContext.hs => src/BaseContext.hs +9 -4
@@ 4,8 4,10 @@ import Options.GlobalOptions (GlobalOptions)
import qualified Options.GlobalOptions as GlobalOptions

import Data.Maybe (fromMaybe)
import System.Exit (die)

import System.Process (readProcess)
import System.Process.Typed (proc, readProcessStdout_)
import Data.Aeson (eitherDecode)


-- | The context we need to execute any command.


@@ 16,9 18,12 @@ data BaseContext = BaseContext

establishBaseContext :: GlobalOptions -> IO BaseContext
establishBaseContext globalOpts = do
  defaultStoreDir <- readProcess "nix"
                       ["eval", "--raw", "--expr", "builtins.storeDir"]
                       mempty
  -- TODO move to nix module, maybe use typed-process for everything
  defaultStoreDirJSON <- readProcessStdout_ $ proc "nix-instantiate"
                           ["--eval", "--json", "--expr", "(builtins.storeDir)"]
  defaultStoreDir <- case eitherDecode defaultStoreDirJSON of
    Left e -> die $ "Cannot decode result of builtins.storeDir Nix call: " <> e
    Right s -> pure s
  pure BaseContext
    { storeDir = fromMaybe defaultStoreDir $ GlobalOptions.storeDir globalOpts
    }

M src/Command/Pin.hs => src/Command/Pin.hs +89 -94
@@ 4,6 4,7 @@ module Command.Pin where

import Command (Command(Pin))
import Command.Pin.Options (PinOptions, pinOptions)
import Command.Pin.Db
import qualified Command.Pin.Options as Opts
import BaseContext



@@ 12,56 13,55 @@ import Util.Nix

import Options.Applicative

import Data.Maybe (mapMaybe, maybeToList)
import Data.Foldable (traverse_)
import Control.Monad (when, unless, forM_, void)
import Data.Maybe (maybeToList, fromMaybe)
import Control.Monad (when, forM_)

import qualified Data.ByteString.Lazy as LBS
import Data.String (fromString)

import Data.Containers.ListUtils (nubOrd)
import qualified Data.BimapMany as BMM
import qualified Data.Set as Set

import System.FilePath ((</>))
-- TODO use bytestring functions from unix package
import System.Directory
  ( listDirectory, doesPathExist, doesFileExist
  , createDirectoryIfMissing, createFileLink
  , getModificationTime, setModificationTime
  , removeDirectoryRecursive
  ( doesFileExist
  , createDirectoryIfMissing
  , getModificationTime
  , getXdgDirectory
  , XdgDirectory(XdgState)
  , removeFile
  , canonicalizePath
  , pathIsSymbolicLink
  , getSymbolicLinkTarget )

import System.Process (readProcess)

import System.Posix.User (getEffectiveUserName)

import Crypto.Hash.SHA256 (hash)

import Data.Time.Clock (UTCTime, getCurrentTime)
import Data.Time.Clock.POSIX (posixSecondsToUTCTime)

import Control.Monad.Reader (MonadReader, ReaderT, runReaderT, ask, asks)
import Control.Monad.Reader (MonadReader, ReaderT, runReaderT, ask)
import Control.Monad.State (MonadState, StateT, runStateT, get, modify')
import Control.Monad.IO.Class (MonadIO, liftIO)


data PinContext = PinContext
  { targets :: [FilePath]
  , fullScan :: Bool
  , gcrootsRoot :: FilePath
  , gcrootsDir :: FilePath
  , lastExecutionFile :: FilePath
  , lastExecution :: UTCTime
  , dbPath :: FilePath
  , initialDb :: PinDb
  , startTime :: UTCTime
  , storeContents :: StoreContents }

type PinState = RootsBimap

-- TODO state = type PinState = BimapMany FilePath Hash ()
newtype PinM a = PinM { runPin' :: ReaderT (BaseContext, PinContext) IO a }
  deriving (Functor, Applicative, Monad, MonadIO, MonadReader (BaseContext, PinContext))
-- MAYBE when we have a Monoid trie, change State to Accum
newtype PinM a = PinM { runPin' :: ReaderT (BaseContext, PinContext) (StateT PinState IO) a }
  deriving
    ( Functor, Applicative, Monad, MonadIO
    , MonadReader (BaseContext, PinContext)
    , MonadState PinState )

runPin :: BaseContext -> PinContext -> PinM a -> IO a
runPin baseCtx pinCtx = flip runReaderT (baseCtx, pinCtx) . runPin'
runPin :: BaseContext -> PinContext -> PinState -> PinM a -> IO (a, PinState)
runPin baseCtx pinCtx pinState = flip runStateT pinState . flip runReaderT (baseCtx, pinCtx) . runPin'

pinCommand :: Mod CommandFields Command
pinCommand = command "pin" (info (Pin <$> pinOptions)


@@ 69,97 69,92 @@ pinCommand = command "pin" (info (Pin <$> pinOptions)

pinAction :: BaseContext -> PinOptions -> IO ()
pinAction baseCtx pinOpts = do
  liftIO $ putStrLn "Reading state and nix store…"
  pinCtx <- establishPinContext baseCtx pinOpts
  runPin baseCtx pinCtx $ do
  ((), finalDb) <- runPin baseCtx pinCtx (rootsBimap $ initialDb pinCtx) $ do
    liftIO $ putStrLn "Cleaning old roots…"
    cleanLinks
    liftIO $ putStrLn "Scanning for new references…"
    traverseFileTrees processFile $ targets pinCtx
    updateLastExecution

updateLastExecution :: PinM ()
updateLastExecution = do
  time <- asks (startTime . snd)
  file <- asks (lastExecutionFile . snd)
  liftIO $ setModificationTime file time
    newDb <- get
    let rootsToCreate = Set.difference (BMM.keysSetR newDb) (BMM.keysSetR (rootsBimap $ initialDb pinCtx))
    liftIO $ putStrLn "Creating new roots…"
    forM_ rootsToCreate linkHash
  putStrLn "Writing out new state…"
  -- TODO atomic write (mktemp+mv)
  writeDb (dbPath pinCtx) (PinDb (startTime pinCtx) finalDb)
  putStrLn "All done!"

establishPinContext :: BaseContext -> PinOptions -> IO PinContext
establishPinContext baseCtx pinOpts = do
  targets <- traverse canonicalizePath $ Opts.targets pinOpts
  gcrootsRoot <- maybe getDefaultGcrootsPath pure $ Opts.gcrootsPath pinOpts
  let gcrootsDir = gcrootsRoot </> "files"
      lastExecutionFile = gcrootsRoot </> "last-execution"
  stateDir <- getXdgDirectory XdgState "orchid"
  let gcrootsDir = fromMaybe (stateDir </> "roots") $ Opts.gcrootsPath pinOpts
      dbPath = stateDir </> "pin-db.json"
  initialDb <- readDb dbPath
  createDirectoryIfMissing True gcrootsDir
  lastExecutionExists <- doesPathExist lastExecutionFile
  unless lastExecutionExists $ do
    writeFile lastExecutionFile ""
    setModificationTime lastExecutionFile $ posixSecondsToUTCTime 0
  lastExecution <- getModificationTime lastExecutionFile
  startTime <- getCurrentTime
  storeContents <- getStoreContents $ storeDir baseCtx
  pure PinContext
    { targets
    , fullScan = Opts.fullScan pinOpts
    , gcrootsRoot
    , gcrootsDir
    , lastExecutionFile
    , lastExecution
    , dbPath
    , initialDb
    , startTime
    , storeContents }

getDefaultGcrootsPath :: IO FilePath
getDefaultGcrootsPath = do
  userName <- getEffectiveUserName
  pure $ "/nix/var/nix/gcroots/per-user" </> userName </> "orchid"

processFile :: FilePath -> PinM ()
processFile source = do
  (baseCtx, pinCtx) <- ask
  liftIO $ do
    let hashed = bsHex $ hash $ fromString source
        target = gcrootsDir pinCtx </> hashed
    fileMod <- getModificationTime source
    when (fileMod > lastExecution pinCtx) $ do
      isLink <- pathIsSymbolicLink source
      storePaths <- if isLink
        then do
          linkTarget <- getSymbolicLinkTarget source
          let maybeHash = hashFromPath (storeDir baseCtx) linkTarget
              maybeStorePath = maybeHash >>= lookupHash (storeContents pinCtx)
          pure $ maybeToList maybeStorePath
        else do
          -- TODO check that it's a normal file (could be a socket or other special file)
          contents <- LBS.readFile source
          pure $ mapMaybe (lookupHash $ storeContents pinCtx)
              $ nubOrd
              $ findStoreHashes (storeDir baseCtx) contents
      unless (null storePaths) $ do
        createDirectoryIfMissing True $ target </> "roots"
        createFileLink source $ target </> "source"
        traverse_ (linkPath $ target </> "roots") storePaths

linkPath :: FilePath -> (Hash, FilePath) -> IO ()
linkPath targetDir (hashed, storePath) = do
  putStrLn $ "Pinning " <> storePath
  -- The link is guaranteed (modulo interleaving) not to exist because of the
  -- clean phase
  void $ readProcess "nix-store"
    [ "--quiet"
    , "--realise"
    , "--add-root", targetDir </> hashed
    , storePath ]
    mempty
  -- TODO https://github.com/haskell/directory/issues/155
  --      unix has `modificationTime <$> getSymbolicLinkStatus _`
  fileMod <- liftIO $ getModificationTime source
  when (fileMod > lastExecution (initialDb pinCtx)) $ do
    isLink <- liftIO $ pathIsSymbolicLink source
    -- TODO print a warning if the store path does not exist, and point to the recover command
    hashes <- filter (hashExists (storeContents pinCtx)) <$> if isLink
      then do
        linkTarget <- liftIO $ getSymbolicLinkTarget source
        pure $ maybeToList $ hashFromPath (storeDir baseCtx) linkTarget
      else do
        -- TODO check that it's a normal file (could be a socket or other special file)
        --      unix has isRegularFile
        contents <- liftIO $ LBS.readFile source
        pure $ nubOrd $ findStoreHashes (storeDir baseCtx) contents
    forM_ hashes $ \hash -> modify' $ BMM.insert source hash

linkHash :: Hash -> PinM ()
linkHash hash = do
  (_, pinCtx) <- ask
  case lookupHash (storeContents pinCtx) hash of
    -- TODO make type-safe, for example by using Bimap.Extended and adding the
    -- path there or by adding a (Set (Hash, storePath :: FilePath)) of stuff
    -- to link to the state. Or sqlite.
    Nothing -> error $ "impossible: store path with hash " <> hash <> " disappeared"
    Just storePath -> liftIO $ do
      putStrLn $ "  Pinning " <> storePath
      -- The link is guaranteed (modulo interleaving) not to exist because of the
      -- clean phase
      addRoot storePath $ gcrootsDir pinCtx </> hash

cleanLinks :: PinM ()
cleanLinks = do
  (_, pinCtx) <- ask
  liftIO $ do
    ls <- listDirectory $ gcrootsDir pinCtx
    forM_ ls $ \entry -> do
      entrySource <- getSymbolicLinkTarget (gcrootsDir pinCtx </> entry </> "source")
      shouldRemove <- do
        sourceExists <- doesFileExist entrySource
        if sourceExists
        then do
          entryMod <- getModificationTime entrySource
          pure $ entryMod > lastExecution pinCtx
        else pure True
      when shouldRemove $ removeDirectoryRecursive $ gcrootsDir pinCtx </> entry
  dbDirty <- get
  forM_ (BMM.keysL dbDirty) $ \fp -> do
    shouldRemove <- do
      sourceExists <- liftIO $ doesFileExist fp
      if sourceExists
      then do
        -- TODO https://github.com/haskell/directory/issues/155
        --      unix has `modificationTime <$> getSymbolicLinkStatus _`
        entryMod <- liftIO $ getModificationTime fp
        pure $ entryMod > lastExecution (initialDb pinCtx)
      else pure True
    when shouldRemove $ modify' $ BMM.deleteL fp
  dbClean <- get
  let rootsToDelete = Set.difference (BMM.keysSetR dbDirty) (BMM.keysSetR dbClean)
  liftIO $ forM_ rootsToDelete $ \root -> do
    putStrLn $ "  Removing stale link " <> root
    removeFile $ gcrootsDir pinCtx </> root

A src/Command/Pin/Db.hs => src/Command/Pin/Db.hs +75 -0
@@ 0,0 1,75 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NamedFieldPuns #-}
module Command.Pin.Db where

import Util.Nix

import qualified Data.BimapMany as BMM
import Data.BimapMany (BimapMany)

import Data.Aeson
  ( eitherDecodeFileStrict', encodeFile
  , FromJSON(..), ToJSON(..)
  , withObject, (.:)
  , object, (.=) )
import Data.Time.Clock (UTCTime)
import Data.Time.Clock.POSIX (posixSecondsToUTCTime)

import Control.Monad (guard, unless)
import Control.Exception (handleJust)
import System.IO.Error (isDoesNotExistError)
import System.IO (hPutStrLn, stderr)
import System.Exit (exitFailure)

-- MAYBE rename "roots" to "references" throughout the code
type RootsBimap = BimapMany FilePath Hash

data PinDb = PinDb
  -- TODO this works pretty bad when pinning different stuff separately
  -- either
  --   better document that the user has to either
  --     scan everything in a single command or
  --     use different root dirs
  --   or implement a path=>scan_time map or trie (pay attention to the cleanup phase)
  { lastExecution :: UTCTime
  , rootsBimap :: RootsBimap }

-- TODO when we switch to a trie, we can have a Monoid instance
emptyPinDb :: PinDb
emptyPinDb =  PinDb (posixSecondsToUTCTime 0) mempty

currentDbVersion :: Int
currentDbVersion = 1

-- TODO use a more efficient encoding, like `Map a (Set b)` or use indexes
-- instead of strings + an index-string map, and/or switch to sqlite or binary
-- encoding
instance FromJSON PinDb where
  parseJSON = withObject "PinDb" $ \dbObj -> do
    version <- dbObj .: "version" >>= parseJSON
    unless (version == currentDbVersion) $ fail "Database version mismatch. Sorry, you'll have to recreate it!"
    lastExecution <- dbObj .: "last-execution" >>= parseJSON
    roots <- dbObj .: "roots"
    rootsBimap <- BMM.fromList <$> parseJSON roots
    pure $ PinDb {lastExecution, rootsBimap}

instance ToJSON PinDb where
  toJSON PinDb {lastExecution, rootsBimap} = object
    [ "version" .= toJSON currentDbVersion
    , "last-execution" .= toJSON lastExecution
    , "roots" .= toJSON (BMM.toList rootsBimap) ]
  -- TODO toEncoding

readDb :: FilePath -> IO PinDb
readDb dbPath = handleJust (guard . isDoesNotExistError) (const $ pure emptyPinDb) $ do
  eitherDecoded <- eitherDecodeFileStrict' dbPath
  case eitherDecoded of
    -- TODO handle this properly
    Left err -> do
      hPutStrLn stderr $ "Error decoding db: " <> err
      hPutStrLn stderr $ "Try deleting " <> dbPath <> " and retry"
      exitFailure
    Right db -> pure db

writeDb :: FilePath -> PinDb -> IO ()
writeDb = encodeFile

M src/Command/Pin/Options.hs => src/Command/Pin/Options.hs +1 -1
@@ 16,7 16,7 @@ pinOptions = PinOptions
      ( long "gcroots-path"
     <> hidden
     <> metavar "PATH"
     <> help "Where to put the garbage collection roots (must be in nix's gcroots directory)" )
     <> help "Where to put the garbage collection roots" )
  <*> switch
      ( long "full-scan"
     <> help "Perform a full scan, ignoring modification times" )

M src/Util/Nix.hs => src/Util/Nix.hs +18 -4
@@ 1,6 1,7 @@
{-# LANGUAGE TupleSections #-}
module Util.Nix where

import Control.Monad (void)

import qualified Data.Map as Map
import Data.Map (Map)



@@ 16,6 17,9 @@ import Data.ByteString.Lazy.Search (split)
import System.FilePath ((</>), addTrailingPathSeparator, isPathSeparator)
import System.Directory (listDirectory)

import System.Process (readProcess)


type Hash = String

type StoreContents = Map Hash FilePath


@@ 43,11 47,21 @@ hashFromPath storePath path = do
  then pure hashCandidate
  else Nothing

lookupHash :: StoreContents -> Hash -> Maybe (Hash, FilePath)
lookupHash storeContents hash =
  (hash,) <$> Map.lookup hash storeContents
lookupHash :: StoreContents -> Hash -> Maybe FilePath
lookupHash storeContents hash = Map.lookup hash storeContents

hashExists :: StoreContents -> Hash -> Bool
hashExists = flip Map.member

getStoreContents :: FilePath -> IO StoreContents
getStoreContents storePath =
  Map.fromList . fmap (\path -> (take 32 path, storePath </> path))
  <$> listDirectory storePath

addRoot :: FilePath -> FilePath -> IO ()
addRoot storePath path = void $ readProcess "nix-store"
  [ "--quiet"
  , "--realise"
  , "--add-root", path
  , storePath ]
  mempty