From 5b1a675b94deef7741d2fa3f4c619ce3634bfa4d Mon Sep 17 00:00:00 2001 From: Jasper Van der Jeugt Date: Sat, 29 Dec 2012 10:41:05 +0100 Subject: [PATCH] Draft tool to check internal links --- hakyll.cabal | 1 + src/Hakyll/Check.hs | 106 +++++++++++++++++++++++++++++++++++++ src/Hakyll/Core/Logger.hs | 6 +-- src/Hakyll/Core/Runtime.hs | 2 +- src/Hakyll/Main.hs | 8 +++ src/Hakyll/Web/Html.hs | 32 ++++++----- tests/TestSuite/Util.hs | 2 +- 7 files changed, 140 insertions(+), 17 deletions(-) create mode 100644 src/Hakyll/Check.hs diff --git a/hakyll.cabal b/hakyll.cabal index f24aae5..b408c76 100644 --- a/hakyll.cabal +++ b/hakyll.cabal @@ -138,6 +138,7 @@ Library Hakyll.Web.Template.Read Other-Modules: + Hakyll.Check Hakyll.Core.Compiler.Internal Hakyll.Core.Compiler.Require Hakyll.Core.Item.SomeItem diff --git a/src/Hakyll/Check.hs b/src/Hakyll/Check.hs new file mode 100644 index 0000000..550348b --- /dev/null +++ b/src/Hakyll/Check.hs @@ -0,0 +1,106 @@ +-------------------------------------------------------------------------------- +module Hakyll.Check + ( runCheck + ) where + + +-------------------------------------------------------------------------------- +import Control.Applicative ((<$>)) +import Control.Monad (forM_) +import Control.Monad.Reader (ReaderT, ask, runReaderT) +import Control.Monad.Trans (liftIO) +import Control.Monad.Writer (WriterT, runWriterT, tell) +import Data.List (isPrefixOf) +import Data.Monoid (Monoid (..)) +import System.Directory (doesFileExist) +import System.FilePath (takeDirectory, takeExtension, ()) +import qualified Text.HTML.TagSoup as TS + + +-------------------------------------------------------------------------------- +import Hakyll.Core.Configuration +import Hakyll.Core.Logger (Logger) +import qualified Hakyll.Core.Logger as Logger +import Hakyll.Core.Util.File +import Hakyll.Web.Html + + +-------------------------------------------------------------------------------- +data CheckerRead = CheckerRead + { checkerConfig :: Configuration + , checkerLogger :: Logger + } + + +-------------------------------------------------------------------------------- +data CheckerWrite = CheckerWrite + { checkerFaulty :: Int + , checkerOk :: Int + } + + +-------------------------------------------------------------------------------- +instance Monoid CheckerWrite where + mempty = CheckerWrite 0 0 + mappend (CheckerWrite f1 o1) (CheckerWrite f2 o2) = + CheckerWrite (f1 + f2) (o1 + o2) + + +-------------------------------------------------------------------------------- +type Checker a = ReaderT CheckerRead (WriterT CheckerWrite IO) a + + +-------------------------------------------------------------------------------- +runCheck :: Configuration -> IO () +runCheck config = do + logger <- Logger.new (verbosity config) + let read' = CheckerRead config logger + ((), _write) <- runWriterT $ runReaderT check read' + Logger.flush logger + + +-------------------------------------------------------------------------------- +check :: Checker () +check = do + config <- checkerConfig <$> ask + files <- liftIO $ getRecursiveContents (destinationDirectory config) + + let htmls = + [ destinationDirectory config file + | file <- files + , takeExtension file == ".html" + ] + + forM_ htmls checkFile + + +-------------------------------------------------------------------------------- +checkFile :: FilePath -> Checker () +checkFile filePath = do + logger <- checkerLogger <$> ask + contents <- liftIO $ readFile filePath + + Logger.header logger $ "Checking " ++ filePath + let tags = TS.parseTags contents + urls = filter (not . isExternal) $ getUrls tags + mapM_ (checkUrl filePath) urls + + +-------------------------------------------------------------------------------- +checkUrl :: FilePath -> String -> Checker () +checkUrl base url = do + logger <- checkerLogger <$> ask + config <- checkerConfig <$> ask + + let dest = destinationDirectory config + dir = takeDirectory base + filePath + | "/" `isPrefixOf` url = dest ++ url + | otherwise = dir url + + exists <- liftIO $ doesFileExist filePath + if exists + then tell $ mempty {checkerOk = 1} + else do + tell $ mempty {checkerFaulty = 1} + Logger.error logger $ base ++ ": broken reference to " ++ url diff --git a/src/Hakyll/Core/Logger.hs b/src/Hakyll/Core/Logger.hs index 17bff32..4731c20 100644 --- a/src/Hakyll/Core/Logger.hs +++ b/src/Hakyll/Core/Logger.hs @@ -42,10 +42,10 @@ data Logger = Logger -------------------------------------------------------------------------------- -- | Create a new logger -new :: Verbosity -> (String -> IO ()) -> IO Logger -new vbty sink = do +new :: Verbosity -> IO Logger +new vbty = do logger <- Logger <$> - newChan <*> newEmptyMVar <*> pure sink <*> pure vbty + newChan <*> newEmptyMVar <*> pure putStrLn <*> pure vbty _ <- forkIO $ loggerThread logger return logger where diff --git a/src/Hakyll/Core/Runtime.hs b/src/Hakyll/Core/Runtime.hs index 2943942..eaa7039 100644 --- a/src/Hakyll/Core/Runtime.hs +++ b/src/Hakyll/Core/Runtime.hs @@ -43,7 +43,7 @@ import Hakyll.Core.Writable run :: Configuration -> Rules a -> IO RuleSet run config rules = do -- Initialization - logger <- Logger.new (verbosity config) putStrLn + logger <- Logger.new (verbosity config) Logger.header logger "Initialising..." Logger.message logger "Creating store..." store <- Store.new (inMemoryCache config) $ storeDirectory config diff --git a/src/Hakyll/Main.hs b/src/Hakyll/Main.hs index 3ead225..edd923a 100644 --- a/src/Hakyll/Main.hs +++ b/src/Hakyll/Main.hs @@ -16,6 +16,7 @@ import System.Process (system) -------------------------------------------------------------------------------- +import Hakyll.Check import Hakyll.Core.Configuration import Hakyll.Core.Rules import Hakyll.Core.Runtime @@ -47,6 +48,7 @@ hakyllWith conf rules = do args <- getArgs case args of ["build"] -> build conf rules + ["check"] -> check conf ["clean"] -> clean conf ["help"] -> help ["preview"] -> preview conf rules 8000 @@ -66,6 +68,12 @@ build conf rules = do return () +-------------------------------------------------------------------------------- +-- | Run the checker +check :: Configuration -> IO () +check = runCheck + + -------------------------------------------------------------------------------- -- | Remove the output directories clean :: Configuration -> IO () diff --git a/src/Hakyll/Web/Html.hs b/src/Hakyll/Web/Html.hs index 3c94b2f..48482e6 100644 --- a/src/Hakyll/Web/Html.hs +++ b/src/Hakyll/Web/Html.hs @@ -8,6 +8,7 @@ module Hakyll.Web.Html , demoteHeaders -- * Url manipulation + , getUrls , withUrls , toUrl , toSiteRoot @@ -20,16 +21,14 @@ module Hakyll.Web.Html -------------------------------------------------------------------------------- -import Data.Char (digitToInt, intToDigit, isDigit, toLower) -import Data.List (isPrefixOf) -import qualified Data.Set as S -import System.FilePath (joinPath, splitPath, takeDirectory) -import Text.Blaze.Html (toHtml) -import Text.Blaze.Html.Renderer.String (renderHtml) - - --------------------------------------------------------------------------------- -import qualified Text.HTML.TagSoup as TS +import Data.Char (digitToInt, intToDigit, + isDigit, toLower) +import Data.List (isPrefixOf) +import System.FilePath (joinPath, splitPath, + takeDirectory) +import Text.Blaze.Html (toHtml) +import Text.Blaze.Html.Renderer.String (renderHtml) +import qualified Text.HTML.TagSoup as TS -------------------------------------------------------------------------------- @@ -52,6 +51,16 @@ demoteHeaders = withTags $ \tag -> case tag of demote t = t +-------------------------------------------------------------------------------- +isUrlAttribute :: String -> Bool +isUrlAttribute = (`elem` ["src", "href"]) + + +-------------------------------------------------------------------------------- +getUrls :: [TS.Tag String] -> [String] +getUrls tags = [v | TS.TagOpen _ as <- tags, (k, v) <- as, isUrlAttribute k] + + -------------------------------------------------------------------------------- -- | Apply a function to each URL on a webpage withUrls :: (String -> String) -> String -> String @@ -59,8 +68,7 @@ withUrls f = withTags tag where tag (TS.TagOpen s a) = TS.TagOpen s $ map attr a tag x = x - attr (k, v) = (k, if k `S.member` refs then f v else v) - refs = S.fromList ["src", "href"] + attr (k, v) = (k, if isUrlAttribute k then f v else v) -- | Customized TagSoup renderer. (The default TagSoup renderer escape CSS diff --git a/tests/TestSuite/Util.hs b/tests/TestSuite/Util.hs index 7a2f0a7..7000f5d 100644 --- a/tests/TestSuite/Util.hs +++ b/tests/TestSuite/Util.hs @@ -68,7 +68,7 @@ newTestProvider store = newProvider store (const False) "tests/data" testCompiler :: Store -> Provider -> Identifier -> Compiler a -> IO (CompilerResult a) testCompiler store provider underlying compiler = do - logger <- Logger.new Logger.Debug (\_ -> return ()) + logger <- Logger.new Logger.Error let read' = CompilerRead { compilerUnderlying = underlying , compilerProvider = provider