code cleanup, better random quality for short
This commit is contained in:
parent
d2a03a9125
commit
75f8064aaf
5 changed files with 90 additions and 90 deletions
|
@ -2,7 +2,7 @@
|
||||||
--
|
--
|
||||||
-- see: https://github.com/sol/hpack
|
-- see: https://github.com/sol/hpack
|
||||||
--
|
--
|
||||||
-- hash: 9c3e37ab9a4ab0be5cfdc76a03c0f8a4f3927508f29397d5d7cf5ebb45fa74b3
|
-- hash: 0bb664d30c1e0572386dc9e95d45e1f5ec99b28b9c2e2722a81c52887647eb5b
|
||||||
|
|
||||||
name: human-friendly-id-gen
|
name: human-friendly-id-gen
|
||||||
version: 0.1.0.0
|
version: 0.1.0.0
|
||||||
|
@ -29,9 +29,9 @@ data-files:
|
||||||
library
|
library
|
||||||
exposed-modules:
|
exposed-modules:
|
||||||
HFIG.Dictionary
|
HFIG.Dictionary
|
||||||
|
HFIG.Helpers
|
||||||
HFIG.Lovecraftian
|
HFIG.Lovecraftian
|
||||||
HFIG.Short
|
HFIG.Short
|
||||||
Lib
|
|
||||||
other-modules:
|
other-modules:
|
||||||
Paths_human_friendly_id_gen
|
Paths_human_friendly_id_gen
|
||||||
hs-source-dirs:
|
hs-source-dirs:
|
||||||
|
|
|
@ -12,8 +12,8 @@ main :: IO ()
|
||||||
main = do
|
main = do
|
||||||
opts <- options "Generate Human Friendly identifiers" optParser
|
opts <- options "Generate Human Friendly identifiers" optParser
|
||||||
case genMode opts of
|
case genMode opts of
|
||||||
Short -> Short.idgen 8 >>= putText
|
Short -> Short.idgen (fromMaybe 4 (optLen opts)) >>= putText
|
||||||
Lovecraftian -> Lovecraftian.idgen 2 >>= putText
|
Lovecraftian -> Lovecraftian.idgen (fromMaybe 2 (optLen opts)) >>= putText
|
||||||
Dict -> do
|
Dict -> do
|
||||||
let file = case optDict opts of
|
let file = case optDict opts of
|
||||||
Just "english" -> "dictionaries/english.txt"
|
Just "english" -> "dictionaries/english.txt"
|
||||||
|
|
67
src/HFIG/Helpers.hs
Normal file
67
src/HFIG/Helpers.hs
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
{-|
|
||||||
|
module : HFIG.Helpers
|
||||||
|
Description : Helpers for most generation strategy
|
||||||
|
License : Public Domain
|
||||||
|
Maintainer : yann.esposito@gmail.com
|
||||||
|
|
||||||
|
Helpers to generate different kind of identifiers
|
||||||
|
|
||||||
|
-}
|
||||||
|
module HFIG.Helpers
|
||||||
|
( idgen
|
||||||
|
, genWord
|
||||||
|
, collisionProbability
|
||||||
|
, NameParts
|
||||||
|
)
|
||||||
|
where
|
||||||
|
|
||||||
|
import Protolude
|
||||||
|
|
||||||
|
import qualified System.Random.MWC as Random
|
||||||
|
import qualified Control.Monad.Primitive as Prim
|
||||||
|
import qualified Data.Vector as V
|
||||||
|
import Data.Vector ((!))
|
||||||
|
|
||||||
|
import qualified Data.Text as Text
|
||||||
|
|
||||||
|
-- | Generic id generator from using nameparts
|
||||||
|
idgen :: Text -> NameParts -> Int -> IO Text
|
||||||
|
idgen i np n = Random.withSystemRandom $ \gen ->
|
||||||
|
Text.intercalate i <$> replicateM n (genWord gen np)
|
||||||
|
|
||||||
|
rnd :: Prim.PrimMonad m => Random.Gen (Prim.PrimState m) -> V.Vector b -> m b
|
||||||
|
rnd gen v = do
|
||||||
|
(k :: Int) <- Random.uniformR (0,V.length v - 1) gen
|
||||||
|
return (v ! k)
|
||||||
|
|
||||||
|
genWord :: Random.Gen Prim.RealWorld -> [V.Vector Text] -> IO Text
|
||||||
|
genWord gen nps = mconcat <$> traverse (rnd gen) nps
|
||||||
|
|
||||||
|
type NameParts = [V.Vector Text]
|
||||||
|
|
||||||
|
-- | Approximate collision probability other n generated name with complexity
|
||||||
|
-- parameter equal to l
|
||||||
|
--
|
||||||
|
-- For example if you generate 1000 words randomly with complexity parameter 4
|
||||||
|
-- We estimate the probability of collision to 3.85%
|
||||||
|
--
|
||||||
|
-- This is a nice helper function to use when you want to estimate the optimal
|
||||||
|
-- length of your ids
|
||||||
|
--
|
||||||
|
-- @
|
||||||
|
-- > collisionProbability 1000 4
|
||||||
|
-- 3.8580246913580245e-2
|
||||||
|
--
|
||||||
|
-- > collisionProbability 10000 5
|
||||||
|
-- 6.430041152263374e-2
|
||||||
|
--
|
||||||
|
-- > collisionProbability 10000 6
|
||||||
|
-- 1.0716735253772291e-3
|
||||||
|
-- @
|
||||||
|
collisionProbability ::
|
||||||
|
NameParts -- ^ name parts used
|
||||||
|
-> Double -- ^ nb of generated names
|
||||||
|
-> Double -- ^ length parameter used
|
||||||
|
-> Double
|
||||||
|
collisionProbability nameparts n l = min ((n**2) / (2 * (fromIntegral size ** l))) 1
|
||||||
|
where size = product (fmap (V.length . V.filter (/= "")) nameparts)
|
|
@ -19,11 +19,11 @@ where
|
||||||
import Protolude
|
import Protolude
|
||||||
|
|
||||||
import qualified System.Random.MWC as Random
|
import qualified System.Random.MWC as Random
|
||||||
import qualified Control.Monad.Primitive as Prim
|
|
||||||
import qualified Data.Vector as V
|
import qualified Data.Vector as V
|
||||||
import Data.Vector ((!))
|
|
||||||
import qualified Data.Text as Text
|
import qualified Data.Text as Text
|
||||||
|
|
||||||
|
import qualified HFIG.Helpers as Helpers
|
||||||
|
import HFIG.Helpers (NameParts)
|
||||||
|
|
||||||
-- | Will generate readable short names The integer parameter determine the
|
-- | Will generate readable short names The integer parameter determine the
|
||||||
-- length in number of syllabus of the name
|
-- length in number of syllabus of the name
|
||||||
|
@ -31,17 +31,9 @@ idgen :: Int -> IO Text
|
||||||
idgen n = Random.withSystemRandom $ \gen -> do
|
idgen n = Random.withSystemRandom $ \gen -> do
|
||||||
choice <- Random.uniformR (0 :: Int,1) gen
|
choice <- Random.uniformR (0 :: Int,1) gen
|
||||||
let np = if choice == 0 then drop 1 nameparts else nameparts
|
let np = if choice == 0 then drop 1 nameparts else nameparts
|
||||||
Text.intercalate "-" <$> replicateM n (genGod gen np)
|
Text.intercalate "-" <$> replicateM n (Helpers.genWord gen np)
|
||||||
|
|
||||||
rnd :: Prim.PrimMonad m => Random.Gen (Prim.PrimState m) -> V.Vector b -> m b
|
nameparts :: NameParts
|
||||||
rnd gen v = do
|
|
||||||
(k :: Int) <- Random.uniformR (0,V.length v - 1) gen
|
|
||||||
return (v ! k)
|
|
||||||
|
|
||||||
genGod :: Random.Gen Prim.RealWorld -> [V.Vector Text] -> IO Text
|
|
||||||
genGod gen nps = mconcat <$> traverse (rnd gen) nps
|
|
||||||
|
|
||||||
nameparts :: [V.Vector Text]
|
|
||||||
nameparts = [
|
nameparts = [
|
||||||
V.fromList ["a","e","i","u","o","a","ai","aiu","aiue","e","i","ia","iau","iu","o","u","y","ya","yi","yo"]
|
V.fromList ["a","e","i","u","o","a","ai","aiu","aiue","e","i","ia","iau","iu","o","u","y","ya","yi","yo"]
|
||||||
, V.fromList ["bh","br","c'th","cn","ct","cth","cx","d","d'","g","gh","ghr","gr","h","k","kh","kth","mh","mh'","ml","n","ng","sh","t","th","tr","v","v'","vh","vh'","vr","x","z","z'","zh"]
|
, V.fromList ["bh","br","c'th","cn","ct","cth","cx","d","d'","g","gh","ghr","gr","h","k","kh","kth","mh","mh'","ml","n","ng","sh","t","th","tr","v","v'","vh","vh'","vr","x","z","z'","zh"]
|
||||||
|
@ -60,19 +52,7 @@ nameparts = [
|
||||||
--
|
--
|
||||||
-- This is a nice helper function to use when you want to estimate the optimal
|
-- This is a nice helper function to use when you want to estimate the optimal
|
||||||
-- length of your ids
|
-- length of your ids
|
||||||
--
|
|
||||||
-- @
|
|
||||||
-- > collisionProbability 1000 4
|
|
||||||
-- 3.8580246913580245e-2
|
|
||||||
--
|
|
||||||
-- > collisionProbability 10000 5
|
|
||||||
-- 6.430041152263374e-2
|
|
||||||
--
|
|
||||||
-- > collisionProbability 10000 6
|
|
||||||
-- 1.0716735253772291e-3
|
|
||||||
-- @
|
|
||||||
collisionProbability :: Double -- ^ nb of generated names
|
collisionProbability :: Double -- ^ nb of generated names
|
||||||
-> Double -- ^ length parameter used
|
-> Double -- ^ length parameter used
|
||||||
-> Double
|
-> Double
|
||||||
collisionProbability n l = min ((n**2) / (2 * (fromIntegral size ** l))) 1
|
collisionProbability = Helpers.collisionProbability nameparts
|
||||||
where size = product (fmap (V.length . V.filter (/= "")) nameparts) -- filtering the "" give a better approximation
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
{-# OPTIONS_GHC -fno-warn-type-defaults #-}
|
||||||
{-|
|
{-|
|
||||||
module : HFIG.Short
|
module : HFIG.Short
|
||||||
Description : Short strategy to generate random human friendly ids
|
Description : Short strategy to generate random human friendly ids
|
||||||
|
@ -12,82 +13,34 @@ Yet not the best for preventing collision.
|
||||||
-}
|
-}
|
||||||
module HFIG.Short
|
module HFIG.Short
|
||||||
( idgen
|
( idgen
|
||||||
, idgenVariableLength
|
|
||||||
, collisionProbability
|
|
||||||
)
|
)
|
||||||
where
|
where
|
||||||
|
|
||||||
import Protolude
|
import Protolude
|
||||||
|
|
||||||
import qualified System.Random.MWC as Random
|
|
||||||
import qualified System.Random.MWC.Distributions as Distr
|
|
||||||
import qualified Control.Monad.Primitive as Prim
|
|
||||||
import qualified Data.Vector as V
|
import qualified Data.Vector as V
|
||||||
|
|
||||||
|
import qualified HFIG.Helpers as Helpers
|
||||||
|
|
||||||
-- | Will generate readable short names The integer parameter determine the
|
-- | Will generate readable short names The integer parameter determine the
|
||||||
-- length in number of syllabus of the name
|
-- length in number of syllabus of the name
|
||||||
idgen :: Int -> IO Text
|
idgen :: Int -> IO Text
|
||||||
idgen n = Random.withSystemRandom $ \gen -> do
|
idgen = Helpers.idgen "" nameparts
|
||||||
mconcat <$> replicateM n (genSylab gen)
|
|
||||||
|
|
||||||
-- | Approximate collision probability other n generated name with complexity
|
|
||||||
-- parameter equal to l
|
|
||||||
--
|
|
||||||
-- For example if you generate 1000 words randomly with complexity parameter 4
|
|
||||||
-- We estimate the probability of collision to 3.85%
|
|
||||||
--
|
|
||||||
-- This is a nice helper function to use when you want to estimate the optimal
|
|
||||||
-- length of your ids
|
|
||||||
--
|
|
||||||
-- @
|
|
||||||
-- > collisionProbability 1000 4
|
|
||||||
-- 3.8580246913580245e-2
|
|
||||||
--
|
|
||||||
-- > collisionProbability 10000 5
|
|
||||||
-- 6.430041152263374e-2
|
|
||||||
--
|
|
||||||
-- > collisionProbability 10000 6
|
|
||||||
-- 1.0716735253772291e-3
|
|
||||||
-- @
|
|
||||||
collisionProbability :: Double -- ^ nb of generated names
|
|
||||||
-> Double -- ^ length parameter used
|
|
||||||
-> Double
|
|
||||||
collisionProbability n l = min ((n**2) / (2 * (nbSylabs ** l))) 1
|
|
||||||
|
|
||||||
-- | Will generate readable short names
|
nameparts :: Helpers.NameParts
|
||||||
-- The integer parameter determine the maximal length in number of sillabus of the name
|
nameparts = [ consonnants
|
||||||
idgenVariableLength :: Int -> IO Text
|
, voyels
|
||||||
idgenVariableLength n = Random.withSystemRandom $ \gen -> do
|
, V.fromList (replicate (2*(length consonnants)) "") <> consonnants]
|
||||||
val <- Distr.exponential nbSylabs gen
|
|
||||||
let len = n - (min (truncate $ val * fromIntegral n) (n - 1))
|
|
||||||
mconcat <$> replicateM len (genSylab gen)
|
|
||||||
|
|
||||||
nbSylabs :: Double
|
|
||||||
nbSylabs = fromIntegral $ V.length consonnants * V.length voyels
|
|
||||||
|
|
||||||
genSylab :: Random.Gen Prim.RealWorld -> IO Text
|
|
||||||
genSylab gen = mappend <$> genConsonnant gen <*> genVoyel gen
|
|
||||||
|
|
||||||
genConsonnant :: Random.Gen (Prim.PrimState IO) -> IO Text
|
|
||||||
genConsonnant gen = do
|
|
||||||
(k :: Int) <- Random.uniformR (0, V.length consonnants - 1) gen
|
|
||||||
return (consonnants V.! k)
|
|
||||||
|
|
||||||
consonnants :: V.Vector Text
|
consonnants :: V.Vector Text
|
||||||
consonnants = V.fromList basics
|
consonnants = V.fromList $ basics
|
||||||
where
|
where
|
||||||
basics = ["b" , "d", "f", "j", "k", "l", "m", "n", "p", "r", "s", "t", "v", "x", "z"]
|
basics = [ "b", "d", "f", "j", "k", "l", "m", "n"
|
||||||
-- you can also use basic <> composed but it will change the length of names in number of chars
|
, "p", "r", "s", "t", "v", "x", "z","w"
|
||||||
-- and provide slightly harder to pronounce, read and understand names
|
, "ch"
|
||||||
-- for now I prefer to put that aside
|
, "br", "dr", "pr", "tr"
|
||||||
-- composable = [ "b" , "d", "f", "k", "p", "s", "t", "v", "z"]
|
, "bl", "pl" ]
|
||||||
-- composed = mconcat (fmap (\x -> [ x <> c | c <- ["r", "l"]]) composable)
|
|
||||||
|
|
||||||
genVoyel :: Prim.PrimMonad m => Random.Gen (Prim.PrimState m) -> m Text
|
|
||||||
genVoyel gen = do
|
|
||||||
(k :: Int) <- Random.uniformR (0, V.length voyels - 1) gen
|
|
||||||
return (voyels V.! k)
|
|
||||||
|
|
||||||
voyels :: V.Vector Text
|
voyels :: V.Vector Text
|
||||||
voyels = V.fromList ["a","i","o","u"]
|
voyels = V.fromList ["a","i","o","u"]
|
||||||
|
|
Loading…
Reference in a new issue