stackage-server/Stackage/Database/Cron.hs
Michael Snoyman a030ba4afb LTS bump
2016-05-02 14:16:11 +03:00

323 lines
12 KiB
Haskell

module Stackage.Database.Cron
( stackageServerCron
, loadFromS3
, getHoogleDB
) where
import ClassyPrelude.Conduit
import Stackage.PackageIndex.Conduit
import Database.Persist (Entity (Entity))
import Data.Char (isAlpha)
import qualified Codec.Archive.Tar as Tar
import Stackage.Database
import Network.HTTP.Client
import Network.HTTP.Client.Conduit (bodyReaderSource)
import Filesystem (rename, removeTree, removeFile)
import Web.PathPieces (toPathPiece)
import Filesystem (isFile, createTree)
import Filesystem.Path.CurrentOS (parent, fromText, encodeString)
import Control.Monad.State.Strict (StateT, get, put)
import Network.HTTP.Types (status200)
import Data.Streaming.Network (bindPortTCP)
import Network.AWS (Credentials (Discover),
Region (NorthVirginia), newEnv,
send, chunkedFile, defaultChunkSize,
envManager, runAWS)
import Control.Monad.Trans.AWS (trying, _Error)
import Network.AWS.Data.Body (toBody)
import Network.AWS.S3 (ObjectCannedACL (OPublicRead),
poACL, putObject,
BucketName(BucketName),
ObjectKey(ObjectKey))
import Control.Lens (set, view)
import qualified Data.Conduit.Binary as CB
import Data.Conduit.Zlib (WindowBits (WindowBits),
compress, ungzip)
import qualified Hoogle
import System.Directory (doesFileExist)
filename' :: Text
filename' = concat
[ "stackage-database-"
, tshow currentSchema
, ".sqlite3"
]
keyName :: Text
keyName = "stackage-database/" ++ filename'
url :: Text
url = concat
[ "https://s3.amazonaws.com/haddock.stackage.org/"
, keyName
]
-- | Provides an action to be used to refresh the file from S3.
loadFromS3 :: Bool -- ^ devel mode? if True, won't delete old databases, and won't refresh them either
-> Manager -> IO (IO StackageDatabase, IO ())
loadFromS3 develMode man = do
killPrevVar <- newTVarIO $ return ()
currSuffixVar <- newTVarIO (1 :: Int)
let root = "stackage-database"
unless develMode $ handleIO print $ removeTree root
createTree root
req <- parseUrl $ unpack url
let download = do
suffix <- atomically $ do
x <- readTVar currSuffixVar
writeTVar currSuffixVar $! x + 1
return x
let fp = root </> unpack ("database-download-" ++ tshow suffix)
isInitial = suffix == 1
toSkip <-
if isInitial
then do
putStrLn $ "Checking if database exists: " ++ tshow fp
doesFileExist fp
else return False
if toSkip
then putStrLn "Skipping initial database download"
else do
putStrLn $ "Downloading database to " ++ pack fp
withResponse req man $ \res ->
runResourceT
$ bodyReaderSource (responseBody res)
$= ungzip
$$ sinkFile fp
putStrLn "Finished downloading database"
return fp
dbvar <- newTVarIO $ error "database not yet loaded"
let update = do
fp <- download
db <- openStackageDatabase (fromString fp) `onException` removeFile (fromString fp)
void $ tryIO $ join $ atomically $ do
writeTVar dbvar db
oldKill <- readTVar killPrevVar
writeTVar killPrevVar $ do
-- give existing users a chance to clean up
threadDelay $ 1000000 * 30
void $ tryIO $ removeFile (fromString fp)
return oldKill
update
return (readTVarIO dbvar, unless develMode update)
hoogleKey :: SnapName -> Text
hoogleKey name = concat
[ "hoogle/"
, toPathPiece name
, "/"
, VERSION_hoogle
, ".hoo"
]
hoogleUrl :: SnapName -> Text
hoogleUrl n = concat
[ "https://s3.amazonaws.com/haddock.stackage.org/"
, hoogleKey n
]
getHoogleDB :: Bool -- ^ print exceptions?
-> Manager -> SnapName -> IO (Maybe FilePath)
getHoogleDB toPrint man name = do
let fp = fromText $ hoogleKey name
fptmp = encodeString fp <.> "tmp"
exists <- isFile fp
if exists
then return $ Just (encodeString fp)
else do
req' <- parseUrl $ unpack $ hoogleUrl name
let req = req'
{ checkStatus = \_ _ _ -> Nothing
, decompress = const False
}
withResponse req man $ \res -> if responseStatus res == status200
then do
createTree $ parent (fromString fptmp)
runResourceT $ bodyReaderSource (responseBody res)
$= ungzip
$$ sinkFile fptmp
rename (fromString fptmp) fp
return $ Just $ encodeString fp
else do
when toPrint $ mapM brRead res >>= print
return Nothing
stackageServerCron :: IO ()
stackageServerCron = do
-- Hacky approach instead of PID files
void $ catchIO (bindPortTCP 17834 "127.0.0.1") $ \_ ->
error $ "cabal loader process already running, exiting"
env <- newEnv NorthVirginia Discover
let upload :: FilePath -> ObjectKey -> IO ()
upload fp key = do
let fpgz = fp <.> "gz"
runResourceT $ sourceFile fp
$$ compress 9 (WindowBits 31)
=$ CB.sinkFile fpgz
body <- chunkedFile defaultChunkSize fpgz
let po =
set poACL (Just OPublicRead)
$ putObject "haddock.stackage.org" key body
putStrLn $ "Uploading: " ++ tshow key
eres <- runResourceT $ runAWS env $ trying _Error $ send po
case eres of
Left e -> error $ show (fp, key, e)
Right _ -> putStrLn "Success"
let dbfp = fromText keyName
createStackageDatabase dbfp
upload (encodeString dbfp) (ObjectKey keyName)
db <- openStackageDatabase dbfp
do
snapshots <- runReaderT snapshotsJSON db
let key = ObjectKey "snapshots.json"
po =
set poACL (Just OPublicRead)
$ putObject (BucketName "haddock.stackage.org") key (toBody snapshots)
putStrLn $ "Uploading: " ++ tshow key
eres <- runResourceT $ runAWS env $ trying _Error $ send po
case eres of
Left e -> error $ show (key, e)
Right _ -> putStrLn "Success"
names <- runReaderT last5Lts5Nightly db
let manager = view envManager env
forM_ names $ \name -> do
mfp <- getHoogleDB False manager name
case mfp of
Just _ -> putStrLn $ "Hoogle database exists for: " ++ toPathPiece name
Nothing -> do
mfp' <- createHoogleDB db manager name
forM_ mfp' $ \fp -> do
let key = hoogleKey name
upload fp (ObjectKey key)
let dest = unpack key
createTree $ parent (fromString dest)
rename (fromString fp) (fromString dest)
createHoogleDB :: StackageDatabase -> Manager -> SnapName -> IO (Maybe FilePath)
createHoogleDB db man name = handleAny (\e -> print e $> Nothing) $ do
req' <- parseUrl $ unpack tarUrl
let req = req' { decompress = const True }
unlessM (isFile (fromString tarFP)) $ withResponse req man $ \res -> do
let tmp = tarFP <.> "tmp"
createTree $ parent (fromString tmp)
runResourceT $ bodyReaderSource (responseBody res)
$$ sinkFile tmp
rename (fromString tmp) (fromString tarFP)
void $ tryIO $ removeTree (fromString bindir)
void $ tryIO $ removeFile (fromString outname)
createTree (fromString bindir)
dbs <- runResourceT
$ sourceTarFile False tarFP
$$ evalStateC 1 (mapMC (singleDB db name bindir))
=$ sinkList
putStrLn "Merging databases..."
Hoogle.mergeDatabase (catMaybes dbs) outname
putStrLn "Merge done"
return $ Just outname
where
root = "hoogle-gen"
bindir = root </> "bindir"
outname = root </> "output.hoo"
tarKey = toPathPiece name ++ "/hoogle/orig.tar"
tarUrl = "https://s3.amazonaws.com/haddock.stackage.org/" ++ tarKey
tarFP = root </> unpack tarKey
singleDB :: StackageDatabase
-> SnapName
-> FilePath -- ^ bindir to write to
-> Tar.Entry
-> StateT Int (ResourceT IO) (Maybe FilePath)
singleDB db sname bindir e@(Tar.entryContent -> Tar.NormalFile lbs _) = do
idx <- get
put $! idx + 1
putStrLn $ "Loading file for Hoogle: " ++ pack (Tar.entryPath e)
let pkg = pack $ takeWhile (/= '.') $ Tar.entryPath e
msp <- flip runReaderT db $ do
Just (Entity sid _) <- lookupSnapshot sname
lookupSnapshotPackage sid pkg
case msp of
Nothing -> do
putStrLn $ "Unknown: " ++ pkg
return Nothing
Just (Entity _ sp) -> do
let ver = snapshotPackageVersion sp
pkgver = concat [pkg, "-", ver]
out = bindir </> show idx <.> "hoo"
src' = unlines
$ haddockHacks (Just $ unpack docsUrl)
$ lines
$ unpack
$ decodeUtf8 lbs
docsUrl = concat
[ "https://www.stackage.org/haddock/"
, toPathPiece sname
, "/"
, pkgver
, "/index.html"
]
_errs <- liftIO $ Hoogle.createDatabase "" Hoogle.Haskell [] src' out
return $ Just out
singleDB _ _ _ _ = return Nothing
---------------------------------------------------------------------
-- HADDOCK HACKS
-- (Copied from hoogle-4.2.36/src/Recipe/Haddock.hs)
-- Modifications:
-- 1) Some name qualification
-- 2) Explicit type sig due to polymorphic elem
-- 3) Fixed an unused binding warning
-- Eliminate @version
-- Change :*: to (:*:), Haddock bug
-- Change !!Int to !Int, Haddock bug
-- Change instance [overlap ok] to instance, Haddock bug
-- Change instance [incoherent] to instance, Haddock bug
-- Change instance [safe] to instance, Haddock bug
-- Change !Int to Int, HSE bug
-- Drop {-# UNPACK #-}, Haddock bug
-- Drop everything after where, Haddock bug
haddockHacks :: Maybe Hoogle.URL -> [String] -> [String]
haddockHacks loc src = maybe id haddockPackageUrl loc (translate src)
where
translate :: [String] -> [String]
translate = map (unwords . g . map f . words) . filter (not . isPrefixOf "@version ")
f "::" = "::"
f (':':xs) = "(:" ++ xs ++ ")"
f ('!':'!':x:xs) | isAlpha x = xs
f ('!':x:xs) | isAlpha x || x `elem` ("[(" :: String) = x:xs
f x | x `elem` ["[overlap","ok]","[incoherent]","[safe]"] = ""
f x | x `elem` ["{-#","UNPACK","#-}"] = ""
f x = x
g ("where":_) = []
g (x:xs) = x : g xs
g [] = []
haddockPackageUrl :: Hoogle.URL -> [String] -> [String]
haddockPackageUrl x = concatMap f
where f y | "@package " `isPrefixOf` y = ["@url " ++ x, y]
| otherwise = [y]