From 75f8064aaf31f1bed038e2e7c687d04be20a3149 Mon Sep 17 00:00:00 2001 From: "Yann Esposito (Yogsototh)" Date: Tue, 4 Sep 2018 14:21:25 +0200 Subject: [PATCH] code cleanup, better random quality for short --- human-friendly-id-gen.cabal | 4 +- src-exe/Main.hs | 4 +- src/HFIG/Helpers.hs | 67 +++++++++++++++++++++++++++++++++ src/HFIG/Lovecraftian.hs | 30 +++------------ src/HFIG/Short.hs | 75 +++++++------------------------------ 5 files changed, 90 insertions(+), 90 deletions(-) create mode 100644 src/HFIG/Helpers.hs diff --git a/human-friendly-id-gen.cabal b/human-friendly-id-gen.cabal index 6f50a0b..89b50da 100644 --- a/human-friendly-id-gen.cabal +++ b/human-friendly-id-gen.cabal @@ -2,7 +2,7 @@ -- -- see: https://github.com/sol/hpack -- --- hash: 9c3e37ab9a4ab0be5cfdc76a03c0f8a4f3927508f29397d5d7cf5ebb45fa74b3 +-- hash: 0bb664d30c1e0572386dc9e95d45e1f5ec99b28b9c2e2722a81c52887647eb5b name: human-friendly-id-gen version: 0.1.0.0 @@ -29,9 +29,9 @@ data-files: library exposed-modules: HFIG.Dictionary + HFIG.Helpers HFIG.Lovecraftian HFIG.Short - Lib other-modules: Paths_human_friendly_id_gen hs-source-dirs: diff --git a/src-exe/Main.hs b/src-exe/Main.hs index 1baaeb4..58bca8d 100644 --- a/src-exe/Main.hs +++ b/src-exe/Main.hs @@ -12,8 +12,8 @@ main :: IO () main = do opts <- options "Generate Human Friendly identifiers" optParser case genMode opts of - Short -> Short.idgen 8 >>= putText - Lovecraftian -> Lovecraftian.idgen 2 >>= putText + Short -> Short.idgen (fromMaybe 4 (optLen opts)) >>= putText + Lovecraftian -> Lovecraftian.idgen (fromMaybe 2 (optLen opts)) >>= putText Dict -> do let file = case optDict opts of Just "english" -> "dictionaries/english.txt" diff --git a/src/HFIG/Helpers.hs b/src/HFIG/Helpers.hs new file mode 100644 index 0000000..b50afaa --- /dev/null +++ b/src/HFIG/Helpers.hs @@ -0,0 +1,67 @@ +{-| +module : HFIG.Helpers +Description : Helpers for most generation strategy +License : Public Domain +Maintainer : yann.esposito@gmail.com + +Helpers to generate different kind of identifiers + +-} +module HFIG.Helpers + ( idgen + , genWord + , collisionProbability + , NameParts + ) +where + +import Protolude + +import qualified System.Random.MWC as Random +import qualified Control.Monad.Primitive as Prim +import qualified Data.Vector as V +import Data.Vector ((!)) + +import qualified Data.Text as Text + +-- | Generic id generator from using nameparts +idgen :: Text -> NameParts -> Int -> IO Text +idgen i np n = Random.withSystemRandom $ \gen -> + Text.intercalate i <$> replicateM n (genWord gen np) + +rnd :: Prim.PrimMonad m => Random.Gen (Prim.PrimState m) -> V.Vector b -> m b +rnd gen v = do + (k :: Int) <- Random.uniformR (0,V.length v - 1) gen + return (v ! k) + +genWord :: Random.Gen Prim.RealWorld -> [V.Vector Text] -> IO Text +genWord gen nps = mconcat <$> traverse (rnd gen) nps + +type NameParts = [V.Vector Text] + +-- | Approximate collision probability other n generated name with complexity +-- parameter equal to l +-- +-- For example if you generate 1000 words randomly with complexity parameter 4 +-- We estimate the probability of collision to 3.85% +-- +-- This is a nice helper function to use when you want to estimate the optimal +-- length of your ids +-- +-- @ +-- > collisionProbability 1000 4 +-- 3.8580246913580245e-2 +-- +-- > collisionProbability 10000 5 +-- 6.430041152263374e-2 +-- +-- > collisionProbability 10000 6 +-- 1.0716735253772291e-3 +-- @ +collisionProbability :: + NameParts -- ^ name parts used + -> Double -- ^ nb of generated names + -> Double -- ^ length parameter used + -> Double +collisionProbability nameparts n l = min ((n**2) / (2 * (fromIntegral size ** l))) 1 + where size = product (fmap (V.length . V.filter (/= "")) nameparts) diff --git a/src/HFIG/Lovecraftian.hs b/src/HFIG/Lovecraftian.hs index 3ce90de..3f5fc1b 100644 --- a/src/HFIG/Lovecraftian.hs +++ b/src/HFIG/Lovecraftian.hs @@ -19,11 +19,11 @@ where import Protolude import qualified System.Random.MWC as Random -import qualified Control.Monad.Primitive as Prim import qualified Data.Vector as V -import Data.Vector ((!)) import qualified Data.Text as Text +import qualified HFIG.Helpers as Helpers +import HFIG.Helpers (NameParts) -- | Will generate readable short names The integer parameter determine the -- length in number of syllabus of the name @@ -31,17 +31,9 @@ idgen :: Int -> IO Text idgen n = Random.withSystemRandom $ \gen -> do choice <- Random.uniformR (0 :: Int,1) gen let np = if choice == 0 then drop 1 nameparts else nameparts - Text.intercalate "-" <$> replicateM n (genGod gen np) + Text.intercalate "-" <$> replicateM n (Helpers.genWord gen np) -rnd :: Prim.PrimMonad m => Random.Gen (Prim.PrimState m) -> V.Vector b -> m b -rnd gen v = do - (k :: Int) <- Random.uniformR (0,V.length v - 1) gen - return (v ! k) - -genGod :: Random.Gen Prim.RealWorld -> [V.Vector Text] -> IO Text -genGod gen nps = mconcat <$> traverse (rnd gen) nps - -nameparts :: [V.Vector Text] +nameparts :: NameParts nameparts = [ V.fromList ["a","e","i","u","o","a","ai","aiu","aiue","e","i","ia","iau","iu","o","u","y","ya","yi","yo"] , V.fromList ["bh","br","c'th","cn","ct","cth","cx","d","d'","g","gh","ghr","gr","h","k","kh","kth","mh","mh'","ml","n","ng","sh","t","th","tr","v","v'","vh","vh'","vr","x","z","z'","zh"] @@ -60,19 +52,7 @@ nameparts = [ -- -- This is a nice helper function to use when you want to estimate the optimal -- length of your ids --- --- @ --- > collisionProbability 1000 4 --- 3.8580246913580245e-2 --- --- > collisionProbability 10000 5 --- 6.430041152263374e-2 --- --- > collisionProbability 10000 6 --- 1.0716735253772291e-3 --- @ collisionProbability :: Double -- ^ nb of generated names -> Double -- ^ length parameter used -> Double -collisionProbability n l = min ((n**2) / (2 * (fromIntegral size ** l))) 1 - where size = product (fmap (V.length . V.filter (/= "")) nameparts) -- filtering the "" give a better approximation +collisionProbability = Helpers.collisionProbability nameparts diff --git a/src/HFIG/Short.hs b/src/HFIG/Short.hs index 794cdaa..59e8c1d 100644 --- a/src/HFIG/Short.hs +++ b/src/HFIG/Short.hs @@ -1,3 +1,4 @@ +{-# OPTIONS_GHC -fno-warn-type-defaults #-} {-| module : HFIG.Short Description : Short strategy to generate random human friendly ids @@ -12,82 +13,34 @@ Yet not the best for preventing collision. -} module HFIG.Short ( idgen - , idgenVariableLength - , collisionProbability ) where import Protolude -import qualified System.Random.MWC as Random -import qualified System.Random.MWC.Distributions as Distr -import qualified Control.Monad.Primitive as Prim import qualified Data.Vector as V +import qualified HFIG.Helpers as Helpers -- | Will generate readable short names The integer parameter determine the -- length in number of syllabus of the name idgen :: Int -> IO Text -idgen n = Random.withSystemRandom $ \gen -> do - mconcat <$> replicateM n (genSylab gen) +idgen = Helpers.idgen "" nameparts --- | Approximate collision probability other n generated name with complexity --- parameter equal to l --- --- For example if you generate 1000 words randomly with complexity parameter 4 --- We estimate the probability of collision to 3.85% --- --- This is a nice helper function to use when you want to estimate the optimal --- length of your ids --- --- @ --- > collisionProbability 1000 4 --- 3.8580246913580245e-2 --- --- > collisionProbability 10000 5 --- 6.430041152263374e-2 --- --- > collisionProbability 10000 6 --- 1.0716735253772291e-3 --- @ -collisionProbability :: Double -- ^ nb of generated names - -> Double -- ^ length parameter used - -> Double -collisionProbability n l = min ((n**2) / (2 * (nbSylabs ** l))) 1 --- | Will generate readable short names --- The integer parameter determine the maximal length in number of sillabus of the name -idgenVariableLength :: Int -> IO Text -idgenVariableLength n = Random.withSystemRandom $ \gen -> do - val <- Distr.exponential nbSylabs gen - let len = n - (min (truncate $ val * fromIntegral n) (n - 1)) - mconcat <$> replicateM len (genSylab gen) - -nbSylabs :: Double -nbSylabs = fromIntegral $ V.length consonnants * V.length voyels - -genSylab :: Random.Gen Prim.RealWorld -> IO Text -genSylab gen = mappend <$> genConsonnant gen <*> genVoyel gen - -genConsonnant :: Random.Gen (Prim.PrimState IO) -> IO Text -genConsonnant gen = do - (k :: Int) <- Random.uniformR (0, V.length consonnants - 1) gen - return (consonnants V.! k) +nameparts :: Helpers.NameParts +nameparts = [ consonnants + , voyels + , V.fromList (replicate (2*(length consonnants)) "") <> consonnants] consonnants :: V.Vector Text -consonnants = V.fromList basics - where - basics = ["b" , "d", "f", "j", "k", "l", "m", "n", "p", "r", "s", "t", "v", "x", "z"] - -- you can also use basic <> composed but it will change the length of names in number of chars - -- and provide slightly harder to pronounce, read and understand names - -- for now I prefer to put that aside - -- composable = [ "b" , "d", "f", "k", "p", "s", "t", "v", "z"] - -- composed = mconcat (fmap (\x -> [ x <> c | c <- ["r", "l"]]) composable) - -genVoyel :: Prim.PrimMonad m => Random.Gen (Prim.PrimState m) -> m Text -genVoyel gen = do - (k :: Int) <- Random.uniformR (0, V.length voyels - 1) gen - return (voyels V.! k) +consonnants = V.fromList $ basics + where + basics = [ "b", "d", "f", "j", "k", "l", "m", "n" + , "p", "r", "s", "t", "v", "x", "z","w" + , "ch" + , "br", "dr", "pr", "tr" + , "bl", "pl" ] voyels :: V.Vector Text voyels = V.fromList ["a","i","o","u"]