hakyll/src/Hakyll/Check.hs

245 lines
8.6 KiB
Haskell
Raw Normal View History

2012-12-29 09:41:05 +00:00
--------------------------------------------------------------------------------
2013-03-09 16:54:08 +00:00
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
2012-12-29 09:41:05 +00:00
module Hakyll.Check
2012-12-31 14:32:46 +00:00
( Check (..)
, check
2012-12-29 09:41:05 +00:00
) where
--------------------------------------------------------------------------------
import Control.Applicative ((<$>))
import Control.Monad (forM_)
import Control.Monad.Reader (ask)
import Control.Monad.RWS (RWST, runRWST)
2012-12-29 09:41:05 +00:00
import Control.Monad.Trans (liftIO)
import Control.Monad.Writer (tell)
2013-03-09 16:54:08 +00:00
import Data.List (isPrefixOf)
2012-12-29 09:41:05 +00:00
import Data.Monoid (Monoid (..))
import Data.Set (Set)
import qualified Data.Set as S
2012-12-29 10:36:53 +00:00
import System.Directory (doesDirectoryExist, doesFileExist)
import System.Exit (ExitCode (..))
2012-12-29 09:41:05 +00:00
import System.FilePath (takeDirectory, takeExtension, (</>))
import qualified Text.HTML.TagSoup as TS
--------------------------------------------------------------------------------
2013-03-09 16:54:08 +00:00
#ifdef CHECK_EXTERNAL
import Control.Exception (AsyncException (..),
SomeException (..), handle, throw)
import Control.Monad.State (get, modify)
import Data.List (intercalate)
import Data.Typeable (cast)
import Data.Version (versionBranch)
2013-03-09 16:54:08 +00:00
import GHC.Exts (fromString)
import qualified Network.HTTP.Conduit as Http
import qualified Network.HTTP.Types as Http
import qualified Paths_hakyll as Paths_hakyll
#endif
--------------------------------------------------------------------------------
2012-12-29 09:41:05 +00:00
import Hakyll.Core.Configuration
2012-12-31 14:16:14 +00:00
import Hakyll.Core.Logger (Logger, Verbosity)
2012-12-29 09:41:05 +00:00
import qualified Hakyll.Core.Logger as Logger
import Hakyll.Core.Util.File
import Hakyll.Web.Html
--------------------------------------------------------------------------------
2012-12-31 14:32:46 +00:00
data Check = All | InternalLinks
deriving (Eq, Ord, Show)
--------------------------------------------------------------------------------
check :: Configuration -> Verbosity -> Check -> IO ExitCode
check config verbosity check' = do
((), write) <- runChecker checkDestination config verbosity check'
return $ if checkerFaulty write >= 0 then ExitFailure 1 else ExitSuccess
2012-12-29 09:41:05 +00:00
--------------------------------------------------------------------------------
data CheckerRead = CheckerRead
{ checkerConfig :: Configuration
, checkerLogger :: Logger
2012-12-31 14:32:46 +00:00
, checkerCheck :: Check
2012-12-29 09:41:05 +00:00
}
--------------------------------------------------------------------------------
data CheckerWrite = CheckerWrite
{ checkerFaulty :: Int
, checkerOk :: Int
2012-12-29 10:36:53 +00:00
} deriving (Show)
2012-12-29 09:41:05 +00:00
--------------------------------------------------------------------------------
instance Monoid CheckerWrite where
mempty = CheckerWrite 0 0
mappend (CheckerWrite f1 o1) (CheckerWrite f2 o2) =
CheckerWrite (f1 + f2) (o1 + o2)
--------------------------------------------------------------------------------
type CheckerState = Set String
2012-12-29 09:41:05 +00:00
--------------------------------------------------------------------------------
type Checker a = RWST CheckerRead CheckerWrite CheckerState IO a
--------------------------------------------------------------------------------
2012-12-31 14:32:46 +00:00
runChecker :: Checker a -> Configuration -> Verbosity -> Check
-> IO (a, CheckerWrite)
runChecker checker config verbosity check' = do
2012-12-31 14:16:14 +00:00
logger <- Logger.new verbosity
2013-03-09 16:54:08 +00:00
let read' = CheckerRead
{ checkerConfig = config
, checkerLogger = logger
, checkerCheck = check'
}
(x, _, write) <- runRWST checker read' S.empty
2012-12-29 09:41:05 +00:00
Logger.flush logger
return (x, write)
2012-12-29 09:41:05 +00:00
--------------------------------------------------------------------------------
checkDestination :: Checker ()
checkDestination = do
2012-12-29 09:41:05 +00:00
config <- checkerConfig <$> ask
files <- liftIO $ getRecursiveContents
(const $ return False) (destinationDirectory config)
2012-12-29 09:41:05 +00:00
let htmls =
[ destinationDirectory config </> file
| file <- files
, takeExtension file == ".html"
]
forM_ htmls checkFile
--------------------------------------------------------------------------------
checkFile :: FilePath -> Checker ()
checkFile filePath = do
logger <- checkerLogger <$> ask
contents <- liftIO $ readFile filePath
Logger.header logger $ "Checking file " ++ filePath
2012-12-29 10:36:53 +00:00
2012-12-29 11:30:23 +00:00
let urls = getUrls $ TS.parseTags contents
forM_ urls $ \url -> do
Logger.debug logger $ "Checking link " ++ url
2013-03-27 13:02:56 +00:00
checkUrl filePath url
--------------------------------------------------------------------------------
checkUrl :: FilePath -> String -> Checker ()
checkUrl filePath url
| isExternal url = checkExternalUrl url
| hasProtocol url = skip "Unknown protocol, skipping"
| otherwise = checkInternalUrl filePath url
where
validProtoChars = ['A'..'Z'] ++ ['a'..'z'] ++ ['0'..'9'] ++ "+-."
hasProtocol str = case break (== ':') str of
(proto, ':' : _) -> all (`elem` validProtoChars) proto
_ -> False
2012-12-29 11:30:23 +00:00
2012-12-29 16:49:11 +00:00
--------------------------------------------------------------------------------
ok :: String -> Checker ()
ok _ = tell $ mempty {checkerOk = 1}
--------------------------------------------------------------------------------
skip :: String -> Checker ()
skip reason = do
logger <- checkerLogger <$> ask
Logger.debug logger $ reason
tell $ mempty {checkerOk = 1}
2012-12-29 16:49:11 +00:00
--------------------------------------------------------------------------------
faulty :: String -> Checker ()
faulty url = do
logger <- checkerLogger <$> ask
Logger.error logger $ "Broken link to " ++ show url
tell $ mempty {checkerFaulty = 1}
2012-12-29 11:30:23 +00:00
--------------------------------------------------------------------------------
checkInternalUrl :: FilePath -> String -> Checker ()
checkInternalUrl base url = case url' of
2012-12-29 16:49:11 +00:00
"" -> ok url
2012-12-29 11:30:23 +00:00
_ -> do
config <- checkerConfig <$> ask
let dest = destinationDirectory config
dir = takeDirectory base
filePath
| "/" `isPrefixOf` url' = dest ++ url'
| otherwise = dir </> url'
exists <- checkFileExists filePath
2012-12-29 16:49:11 +00:00
if exists then ok url else faulty url
2012-12-29 11:30:23 +00:00
where
url' = stripFragments url
2012-12-29 09:41:05 +00:00
--------------------------------------------------------------------------------
2012-12-29 11:30:23 +00:00
checkExternalUrl :: String -> Checker ()
2013-03-09 16:54:08 +00:00
#ifdef CHECK_EXTERNAL
2012-12-29 11:30:23 +00:00
checkExternalUrl url = do
2012-12-31 14:32:46 +00:00
logger <- checkerLogger <$> ask
needsCheck <- (== All) . checkerCheck <$> ask
checked <- (url `S.member`) <$> get
2012-12-31 14:32:46 +00:00
if not needsCheck || checked
then Logger.debug logger "Already checked, skipping"
else do
isOk <- liftIO $ handle (failure logger) $
Http.withManager $ \mgr -> do
request <- Http.parseUrl url
response <- Http.http (settings request) mgr
let code = Http.statusCode (Http.responseStatus response)
return $ code >= 200 && code < 300
modify $ S.insert url
if isOk then ok url else faulty url
2012-12-29 16:49:11 +00:00
where
-- Add additional request info
2012-12-29 16:49:11 +00:00
settings r = r
{ Http.method = "HEAD"
, Http.redirectCount = 10
, Http.requestHeaders = ("User-Agent", ua) : Http.requestHeaders r
2012-12-29 16:49:11 +00:00
}
-- Nice user agent info
ua = fromString $ "hakyll-check/" ++
(intercalate "." $ map show $ versionBranch $ Paths_hakyll.version)
-- Catch all the things except UserInterrupt
failure logger (SomeException e) = case cast e of
Just UserInterrupt -> throw UserInterrupt
_ -> Logger.error logger (show e) >> return False
2013-03-09 16:54:08 +00:00
#else
checkExternalUrl _ = return ()
#endif
2012-12-29 10:36:53 +00:00
--------------------------------------------------------------------------------
-- | Wraps doesFileExist, also checks for index.html
checkFileExists :: FilePath -> Checker Bool
checkFileExists filePath = liftIO $ do
file <- doesFileExist filePath
dir <- doesDirectoryExist filePath
case (file, dir) of
(True, _) -> return True
(_, True) -> doesFileExist $ filePath </> "index.html"
_ -> return False
--------------------------------------------------------------------------------
stripFragments :: String -> String
stripFragments = takeWhile (not . flip elem ['?', '#'])