2019-01-31 02:54:47 +00:00
|
|
|
module Handler.Archive where
|
|
|
|
|
|
|
|
import Import
|
|
|
|
import Data.Function ((&))
|
2020-01-19 06:47:53 +00:00
|
|
|
import Data.Char (chr)
|
|
|
|
import qualified Data.Attoparsec.ByteString.Char8 as AP8
|
|
|
|
import qualified Data.Attoparsec.ByteString as AP
|
2019-01-31 02:54:47 +00:00
|
|
|
import qualified Data.ByteString as BS
|
|
|
|
import qualified Data.ByteString.Lazy as LBS
|
|
|
|
import qualified Data.ByteString.Char8 as BS8
|
|
|
|
import qualified Network.HTTP.Client as NH
|
|
|
|
import qualified Network.HTTP.Client.TLS as NH
|
|
|
|
import qualified Network.HTTP.Types.Status as NH
|
|
|
|
import qualified Web.FormUrlEncoded as WH
|
|
|
|
import qualified Control.Monad.Metrics as MM
|
2020-01-19 06:47:53 +00:00
|
|
|
import HTMLEntities.Decoder (htmlEncodedText)
|
|
|
|
import Data.Text.Lazy.Builder (toLazyText)
|
2019-01-31 02:54:47 +00:00
|
|
|
|
|
|
|
shouldArchiveBookmark :: User -> Key Bookmark -> Handler Bool
|
|
|
|
shouldArchiveBookmark user kbid = do
|
|
|
|
runDB (get kbid) >>= \case
|
|
|
|
Nothing -> pure False
|
|
|
|
Just bm -> do
|
|
|
|
pure $
|
|
|
|
(isNothing $ bookmarkArchiveHref bm) &&
|
|
|
|
(bookmarkShared bm)
|
|
|
|
&& not (_isArchiveBlacklisted bm)
|
|
|
|
&& not (userPrivacyLock user)
|
|
|
|
&& userArchiveDefault user
|
|
|
|
|
|
|
|
archiveBookmarkUrl :: Key Bookmark -> String -> Handler ()
|
|
|
|
archiveBookmarkUrl kbid url =
|
|
|
|
(_fetchArchiveSubmitInfo >>= \case
|
|
|
|
Left e -> do
|
|
|
|
MM.increment "archive.fetchSubmitId_noparse"
|
|
|
|
$(logError) (pack e)
|
|
|
|
Right submitInfo -> do
|
|
|
|
userId <- requireAuthId
|
|
|
|
let req = _buildArchiveSubmitRequest submitInfo url
|
|
|
|
MM.increment "archive.submit"
|
|
|
|
res <- liftIO $ NH.httpLbs req =<< NH.getGlobalManager
|
|
|
|
let status = NH.responseStatus res
|
|
|
|
MM.increment ("archive.submit_status_" <> (pack.show) (NH.statusCode status))
|
|
|
|
let updateArchiveUrl = runDB . updateBookmarkArchiveUrl userId kbid . Just
|
|
|
|
headers = NH.responseHeaders res
|
|
|
|
case status of
|
|
|
|
s | s == NH.status200 ->
|
|
|
|
for_ (lookup "Refresh" headers >>= _parseRefreshHeaderUrl) updateArchiveUrl
|
2019-10-27 21:01:10 +00:00
|
|
|
s | s == NH.status302 || s == NH.status307 ->
|
2019-01-31 02:54:47 +00:00
|
|
|
for_ (lookup "Location" headers) (updateArchiveUrl . decodeUtf8)
|
|
|
|
_ -> $(logError) (pack (show res)))
|
|
|
|
`catch` (\(e::SomeException) -> ($(logError) $ (pack.show) e) >> throwIO e)
|
|
|
|
|
|
|
|
_isArchiveBlacklisted :: Bookmark -> Bool
|
|
|
|
_isArchiveBlacklisted (Bookmark {..}) =
|
|
|
|
[ "hulu"
|
|
|
|
, "livestream"
|
|
|
|
, "netflix"
|
|
|
|
, "skillsmatter"
|
|
|
|
, "twitch.tv"
|
|
|
|
, "vimeo"
|
|
|
|
, "youtu.be"
|
|
|
|
, "youtube"
|
|
|
|
, "archive."
|
|
|
|
] &
|
|
|
|
any (`isInfixOf` bookmarkHref)
|
|
|
|
|
|
|
|
_parseRefreshHeaderUrl :: ByteString -> Maybe Text
|
|
|
|
_parseRefreshHeaderUrl h = do
|
|
|
|
let u = BS8.drop 1 $ BS8.dropWhile (/= '=') h
|
|
|
|
if (not (null u))
|
|
|
|
then Just $ decodeUtf8 u
|
|
|
|
else Nothing
|
|
|
|
|
|
|
|
_buildArchiveSubmitRequest :: (String, String) -> String -> NH.Request
|
|
|
|
_buildArchiveSubmitRequest (action, submitId) href =
|
|
|
|
NH.parseRequest_ ("POST " <> action) & \r ->
|
|
|
|
r { NH.requestHeaders =
|
|
|
|
[ ("User-Agent", _archiveUserAgent)
|
|
|
|
, ("Content-Type", "application/x-www-form-urlencoded")
|
|
|
|
]
|
|
|
|
, NH.requestBody = NH.RequestBodyLBS $ WH.urlEncodeAsForm ((
|
|
|
|
[ ("submitid" , submitId)
|
|
|
|
, ("url", href)
|
|
|
|
]) :: [(String, String)])
|
|
|
|
, NH.redirectCount = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
_fetchArchiveSubmitInfo :: Handler (Either String (String , String))
|
|
|
|
_fetchArchiveSubmitInfo = do
|
|
|
|
MM.increment "archive.fetchSubmitId"
|
2020-01-18 20:27:52 +00:00
|
|
|
res <- liftIO $ NH.httpLbs (buildSimpleRequest "https://archive.li/") =<< NH.getGlobalManager
|
2019-01-31 02:54:47 +00:00
|
|
|
MM.increment ("archive.fetchSubmitId_status_" <> (pack.show) (NH.statusCode (NH.responseStatus res)))
|
|
|
|
let body = LBS.toStrict (responseBody res)
|
2020-01-19 06:47:53 +00:00
|
|
|
action = _parseSubstring (AP8.string "action=\"") (AP8.notChar '"') body
|
|
|
|
submitId = _parseSubstring (AP8.string "submitid\" value=\"") (AP8.notChar '"') body
|
2019-01-31 02:54:47 +00:00
|
|
|
pure $ (,) <$> action <*> submitId
|
|
|
|
|
|
|
|
_archiveUserAgent :: ByteString
|
|
|
|
_archiveUserAgent = "espial"
|
|
|
|
|
2020-01-19 06:47:53 +00:00
|
|
|
_parseSubstring :: AP8.Parser ByteString -> AP8.Parser Char -> BS.ByteString -> Either String String
|
2019-01-31 02:54:47 +00:00
|
|
|
_parseSubstring start inner res = do
|
2020-01-19 06:47:53 +00:00
|
|
|
(flip AP8.parseOnly) res (skipAnyTill start >> AP8.many1 inner)
|
2019-01-31 02:54:47 +00:00
|
|
|
where
|
2020-01-19 06:47:53 +00:00
|
|
|
skipAnyTill end = go where go = end *> pure () <|> AP8.anyChar *> go
|
2020-01-18 20:27:52 +00:00
|
|
|
|
|
|
|
|
2020-01-19 06:47:53 +00:00
|
|
|
fetchPageTitle :: String -> Handler (Either String Text)
|
|
|
|
fetchPageTitle url =
|
|
|
|
do MM.increment "fetchPageTitle"
|
|
|
|
res <- liftIO $ NH.httpLbs (buildSimpleRequest url) =<< NH.getGlobalManager
|
|
|
|
let body = LBS.toStrict (responseBody res)
|
|
|
|
pure (decodeHtmlBs <$> parseTitle body)
|
|
|
|
`catch` (\(e :: SomeException) -> do
|
|
|
|
MM.increment "fetchPageTitle.error"
|
|
|
|
$(logError) $ (pack . show) e
|
|
|
|
pure (Left (show e)))
|
2020-01-18 20:27:52 +00:00
|
|
|
where
|
2020-01-19 06:47:53 +00:00
|
|
|
parseTitle bs =
|
|
|
|
(flip AP.parseOnly) bs $ do
|
|
|
|
_ <- skipAnyTill (AP.string "<title")
|
|
|
|
_ <- skipAnyTill (AP.string ">")
|
|
|
|
AP.takeTill (\w -> chr (fromEnum w) == '<')
|
|
|
|
decodeHtmlBs = toStrict . toLazyText . htmlEncodedText . decodeUtf8
|
|
|
|
skipAnyTill end = go where go = end *> pure () <|> AP.anyWord8 *> go
|
2020-01-18 20:27:52 +00:00
|
|
|
|
|
|
|
buildSimpleRequest :: String -> Request
|
|
|
|
buildSimpleRequest url =
|
|
|
|
NH.parseRequest_ url & \r ->
|
|
|
|
r {NH.requestHeaders = [("User-Agent", _archiveUserAgent)]}
|