diff --git a/human-friendly-id-gen.cabal b/human-friendly-id-gen.cabal index fc55933..6f50a0b 100644 --- a/human-friendly-id-gen.cabal +++ b/human-friendly-id-gen.cabal @@ -2,7 +2,7 @@ -- -- see: https://github.com/sol/hpack -- --- hash: c5f958a044b04a3b5795efc86fa4cd7fd5ef7fd9f515f9e483f366db6991f1ec +-- hash: 9c3e37ab9a4ab0be5cfdc76a03c0f8a4f3927508f29397d5d7cf5ebb45fa74b3 name: human-friendly-id-gen version: 0.1.0.0 @@ -28,8 +28,9 @@ data-files: library exposed-modules: - IDGen.Dictionary - IDGen.Short + HFIG.Dictionary + HFIG.Lovecraftian + HFIG.Short Lib other-modules: Paths_human_friendly_id_gen @@ -58,6 +59,7 @@ executable human-friendly-id-gen-exe base >=4.8 && <5 , human-friendly-id-gen , protolude + , turtle default-language: Haskell2010 test-suite human-friendly-id-gen-doctest diff --git a/package.yaml b/package.yaml index 6039635..0961c04 100644 --- a/package.yaml +++ b/package.yaml @@ -44,6 +44,7 @@ executables: - -with-rtsopts=-N dependencies: - human-friendly-id-gen + - turtle tests: human-friendly-id-gen-doctest: main: Main.hs diff --git a/src-exe/Main.hs b/src-exe/Main.hs index a79c02f..1baaeb4 100644 --- a/src-exe/Main.hs +++ b/src-exe/Main.hs @@ -1,6 +1,47 @@ -import Protolude +module Main where -import IDGen.Short (idgen) +import Protolude hiding (FilePath, die) + +import Turtle + +import qualified HFIG.Dictionary as Dict +import qualified HFIG.Lovecraftian as Lovecraftian +import qualified HFIG.Short as Short main :: IO () -main = idgen 8 >>= putText +main = do + opts <- options "Generate Human Friendly identifiers" optParser + case genMode opts of + Short -> Short.idgen 8 >>= putText + Lovecraftian -> Lovecraftian.idgen 2 >>= putText + Dict -> do + let file = case optDict opts of + Just "english" -> "dictionaries/english.txt" + Just "first-names" -> "dictionaries/first-names.txt" + Just "generic" -> "dictionaries/generic.txt" + Just "literary" -> "dictionaries/literary.txt" + Just filepath -> toS (format fp filepath) + Nothing -> "dictionaries/english.txt" + dict <- Dict.dictionaryFromFile file + Dict.idgen dict (fromMaybe 3 (optLen opts)) >>= putText + +-- Option parsing +data GenMode = Short | Lovecraftian | Dict deriving (Eq) +data AppOptions = AppOptions { genMode :: GenMode + , optDict :: Maybe FilePath + , optLen :: Maybe Int + } + +optParser :: Parser AppOptions +optParser = AppOptions + <$> (fromMaybe Lovecraftian <$> optional (opt toGenMode "gen" 'g' "Possible values: short, dict")) + <*> optional (optPath "dict" 'd' "dictionary (predefined names are: english, first-names, generic, literary) or use any file path (one word per line)") + <*> optional (optInt "len" 'n' "complexity depends on the gen chosen") + +toGenMode :: Text -> Maybe GenMode +toGenMode "s" = Just Short +toGenMode "short" = Just Short +toGenMode "d" = Just Dict +toGenMode "dict" = Just Dict +toGenMode "dictionary" = Just Dict +toGenMode _ = Nothing diff --git a/src-test/Main.hs b/src-test/Main.hs index 34e768b..3a9fe0f 100644 --- a/src-test/Main.hs +++ b/src-test/Main.hs @@ -4,7 +4,8 @@ import Test.Tasty import Test.Tasty.HUnit import Test.Tasty.SmallCheck -import Lib (inc) +inc :: Int -> Int +inc = (+1) main :: IO () main = defaultMain $ testGroup "all-tests" tests diff --git a/src/IDGen/Dictionary.hs b/src/HFIG/Dictionary.hs similarity index 86% rename from src/IDGen/Dictionary.hs rename to src/HFIG/Dictionary.hs index 802deef..d5339fb 100644 --- a/src/IDGen/Dictionary.hs +++ b/src/HFIG/Dictionary.hs @@ -1,5 +1,5 @@ {-| -module : IDGen.Dictionary +module : HFIG.Dictionary Description : Dictionary strategy to generate random human friendly ids License : Public Domain Maintainer : yann.esposito@gmail.com @@ -10,8 +10,9 @@ when talking about them. Yet not the best for preventing collision. -} -module IDGen.Dictionary +module HFIG.Dictionary ( idgen + , dictionaryFromFile , collisionProbability ) where @@ -23,12 +24,12 @@ import qualified Control.Monad.Primitive as Prim import qualified Data.Vector as V import qualified Data.Text as T +type Dictionary = V.Vector Text -- | Will generate readable short names The integer parameter determine the -- length in number of syllabus of the name -idgen :: Int -> FilePath -> IO Text -idgen n dictName = do - allwords <- words dictName +idgen :: Dictionary -> Int -> IO Text +idgen allwords n = Random.withSystemRandom $ \gen -> T.intercalate "-" <$> replicateM n (genWord gen allwords) @@ -65,5 +66,5 @@ genWord gen allwords = do (k :: Int) <- Random.uniformR (0, V.length allwords - 1) gen return (allwords V.! k) -words :: FilePath -> IO (V.Vector Text) -words dictName = (V.fromList . T.lines) <$> readFile dictName +dictionaryFromFile :: FilePath -> IO (V.Vector Text) +dictionaryFromFile dictName = (V.fromList . T.lines) <$> readFile dictName diff --git a/src/HFIG/Lovecraftian.hs b/src/HFIG/Lovecraftian.hs new file mode 100644 index 0000000..3ce90de --- /dev/null +++ b/src/HFIG/Lovecraftian.hs @@ -0,0 +1,78 @@ +{-| +module : HFIG.Lovecraftian +Description : Lovecraftian strategy to generate random human friendly ids +License : Public Domain +Maintainer : yann.esposito@gmail.com + +Should generate readable words easy to pronounce so minimizing mistake +when talking about them. + +Yet not the best for preventing collision. + +-} +module HFIG.Lovecraftian + ( idgen + , collisionProbability + ) +where + +import Protolude + +import qualified System.Random.MWC as Random +import qualified Control.Monad.Primitive as Prim +import qualified Data.Vector as V +import Data.Vector ((!)) +import qualified Data.Text as Text + + +-- | Will generate readable short names The integer parameter determine the +-- length in number of syllabus of the name +idgen :: Int -> IO Text +idgen n = Random.withSystemRandom $ \gen -> do + choice <- Random.uniformR (0 :: Int,1) gen + let np = if choice == 0 then drop 1 nameparts else nameparts + Text.intercalate "-" <$> replicateM n (genGod gen np) + +rnd :: Prim.PrimMonad m => Random.Gen (Prim.PrimState m) -> V.Vector b -> m b +rnd gen v = do + (k :: Int) <- Random.uniformR (0,V.length v - 1) gen + return (v ! k) + +genGod :: Random.Gen Prim.RealWorld -> [V.Vector Text] -> IO Text +genGod gen nps = mconcat <$> traverse (rnd gen) nps + +nameparts :: [V.Vector Text] +nameparts = [ + V.fromList ["a","e","i","u","o","a","ai","aiu","aiue","e","i","ia","iau","iu","o","u","y","ya","yi","yo"] + , V.fromList ["bh","br","c'th","cn","ct","cth","cx","d","d'","g","gh","ghr","gr","h","k","kh","kth","mh","mh'","ml","n","ng","sh","t","th","tr","v","v'","vh","vh'","vr","x","z","z'","zh"] + , V.fromList ["a","e","i","u","o","a","e","i","u","o","ao","aio","ui","aa","io","ou","y"] + , V.fromList ["bb","bh","br","cn","ct","dh","dhr","dr","drr","g","gd","gg","ggd","gh","gn","gnn","gr","jh","kl","l","ld","lk","ll","lp","lth","mbr","nd","p","r","rr","rv","th","thl","thr","thrh","tl","vh","x","xh","z","zh","zt"] + , V.fromList ["","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","'dhr","'dr","'end","'gn","'ith","'itr","'k","'kr","'l","'m","'r","'th","'vh","'x","'zh"] + , V.fromList ["a","e","i","u","o"] + , V.fromList ["","","","","","","","","","","d","g","h","l","lb","lbh","n","r","rc","rh","s","sh","ss","st","sz","th","tl","x","xr","xz"] + ] + +-- | Approximate collision probability other n generated name with complexity +-- parameter equal to l +-- +-- For example if you generate 1000 words randomly with complexity parameter 4 +-- We estimate the probability of collision to 3.85% +-- +-- This is a nice helper function to use when you want to estimate the optimal +-- length of your ids +-- +-- @ +-- > collisionProbability 1000 4 +-- 3.8580246913580245e-2 +-- +-- > collisionProbability 10000 5 +-- 6.430041152263374e-2 +-- +-- > collisionProbability 10000 6 +-- 1.0716735253772291e-3 +-- @ +collisionProbability :: Double -- ^ nb of generated names + -> Double -- ^ length parameter used + -> Double +collisionProbability n l = min ((n**2) / (2 * (fromIntegral size ** l))) 1 + where size = product (fmap (V.length . V.filter (/= "")) nameparts) -- filtering the "" give a better approximation diff --git a/src/IDGen/Short.hs b/src/HFIG/Short.hs similarity index 98% rename from src/IDGen/Short.hs rename to src/HFIG/Short.hs index 5585d9d..794cdaa 100644 --- a/src/IDGen/Short.hs +++ b/src/HFIG/Short.hs @@ -1,5 +1,5 @@ {-| -module : IDGen.Short +module : HFIG.Short Description : Short strategy to generate random human friendly ids License : Public Domain Maintainer : yann.esposito@gmail.com @@ -10,7 +10,7 @@ when talking about them. Yet not the best for preventing collision. -} -module IDGen.Short +module HFIG.Short ( idgen , idgenVariableLength , collisionProbability diff --git a/src/Lib.hs b/src/Lib.hs deleted file mode 100644 index 22a3b3c..0000000 --- a/src/Lib.hs +++ /dev/null @@ -1,38 +0,0 @@ -{- | -Module : Lib -Description : Example of a library file. -Copyright : (c) 2018, Yann Esposito -License : ISC -Maintainer : yann.esposito@gmail.com -Stability : experimental -Portability : POSIX - -Example of library file which is also used for testing the test suites. -You can write a longer description of this module here and add @some markup@. - --} -module Lib - ( - -- * Exported functions - inc - ) where - -import Protolude - --- | Increment one 'Num' value. --- --- >>> let answer = 42 :: Int --- >>> let prev = answer - 1 --- >>> inc prev --- 42 --- >>> succ . Prelude.last . Prelude.take prev . iterate inc $ 1 --- 42 --- --- Properties: --- --- prop> succ x == inc x --- prop> inc (negate x) == negate (pred x) --- -inc :: Int -- ^ value to increment - -> Int -- ^ result -inc x = x + 1