Many bug fixes for new parser. Mainly dealing with spacing issues which cause botched parses.
This commit is contained in:
parent
42b364d623
commit
c6a5ef4a4d
5 changed files with 104 additions and 72 deletions
|
@ -27,10 +27,14 @@ sortOps = sortBy (\(i,_,_) (j,_,_) -> compare i j)
|
|||
|
||||
binops term anyOp = do
|
||||
e <- term
|
||||
(ops,es) <- liftM unzip $ many (do { op <- lexeme anyOp; e <- term; return (op,e) })
|
||||
(ops,es) <- liftM unzip $
|
||||
many (try $ do { whitespace
|
||||
; op <- anyOp
|
||||
; whitespace
|
||||
; e <- term; return (op,e) })
|
||||
case binopOf Map.empty (sortOps table) ops (e:es) of
|
||||
Right e -> return e
|
||||
Left msg -> mzero
|
||||
Left msg -> fail msg
|
||||
|
||||
binopSplit seen opTable i ops es =
|
||||
case (splitAt i ops, splitAt (i+1) es) of
|
||||
|
@ -40,8 +44,7 @@ binopSplit seen opTable i ops es =
|
|||
return $ Binop op e1 e2
|
||||
|
||||
binopOf _ _ _ [e] = return e
|
||||
binopOf _ [] ops es =
|
||||
return $ foldl' (flip ($)) (head es) $ zipWith Binop ops (tail es)
|
||||
binopOf seen [] ops es = binopOf seen [(9,L,head ops)] ops es
|
||||
|
||||
binopOf seen (tbl@((lvl, L, op):rest)) ops es =
|
||||
case elemIndices op ops of
|
||||
|
@ -54,7 +57,7 @@ binopOf seen (tbl@((lvl, assoc, op):rest)) ops es =
|
|||
case elemIndices op ops of
|
||||
[] -> binopOf seen rest ops es
|
||||
i:_ -> case Map.lookup lvl seen of
|
||||
Nothing -> binopSplit (Map.insert lvl (R,op) seen) tbl i ops es
|
||||
Nothing -> binopSplit (Map.insert lvl (assoc,op) seen) tbl i ops es
|
||||
Just (assoc',op') ->
|
||||
if assoc == assoc' && assoc /= N then
|
||||
binopSplit seen tbl i ops es
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
module ParseLib where
|
||||
|
||||
import Ast
|
||||
import Control.Applicative ((<$>))
|
||||
import Control.Monad
|
||||
import Data.Char (isSymbol)
|
||||
import Text.Parsec hiding (newline,spaces)
|
||||
|
@ -17,7 +18,7 @@ backslashed = do { char '\\'; c <- anyChar
|
|||
; return . read $ ['\'','\\',c,'\''] }
|
||||
|
||||
var :: (Monad m) => ParsecT [Char] u m String
|
||||
var = makeVar $ letter <|> char '_'
|
||||
var = makeVar (letter <|> char '_' <?> "variable")
|
||||
|
||||
lowVar :: (Monad m) => ParsecT [Char] u m String
|
||||
lowVar = makeVar (lower <?> "lower case variable")
|
||||
|
@ -26,7 +27,7 @@ capVar = makeVar (upper <?> "upper case variable")
|
|||
|
||||
makeVar p = do
|
||||
c <- p
|
||||
cs <- many $ alphaNum <|> char '_' <|> char '\''
|
||||
cs <- many (alphaNum <|> char '_' <|> char '\'' <?> "")
|
||||
guard $ c:cs `notElem` reserved
|
||||
return $ c:cs
|
||||
|
||||
|
@ -34,64 +35,86 @@ makeVar p = do
|
|||
anyOp :: (Monad m) => ParsecT [Char] u m String
|
||||
anyOp = betwixt '`' '`' var <|>
|
||||
(do op <- many1 (satisfy isSymbol <|> oneOf "+-/*=.$<>:&|^?%#@~!")
|
||||
guard (op `notElem` [ "=", "..", "->" ])
|
||||
guard (op `notElem` [ "=", "..", "->", "--" ])
|
||||
return op) <?> "infix operator"
|
||||
|
||||
lexeme p = do whitespace ; p
|
||||
|
||||
symbol :: (Monad m) => String -> ParsecT [Char] u m String
|
||||
symbol s = whitespace >> string s
|
||||
|
||||
arrow :: (Monad m) => ParsecT [Char] u m String
|
||||
arrow = symbol "->" <|> symbol "\8594"
|
||||
arrow = string "->" <|> string "\8594" <?> "arrow (->)"
|
||||
|
||||
commaSep p = p `sepBy` lexeme (char ',')
|
||||
commaSep :: (Monad m) => ParsecT [Char] u m a -> ParsecT [Char] u m [a]
|
||||
commaSep p = do
|
||||
x <- optionMaybe p
|
||||
case x of
|
||||
Just a -> (a:) <$> many (try (whitespace >> char ',' >> whitespace >> p))
|
||||
Nothing -> return []
|
||||
|
||||
semiSep1 :: (Monad m) => ParsecT [Char] u m a -> ParsecT [Char] u m [a]
|
||||
semiSep1 p = do a <- p
|
||||
(a:) <$> many (try (whitespace >> char ';' >> whitespace >> p))
|
||||
|
||||
pipeSep1 :: (Monad m) => ParsecT [Char] u m a -> ParsecT [Char] u m [a]
|
||||
pipeSep1 p = do a <- p
|
||||
(a:) <$> many (try (whitespace >> char '|' >> whitespace >> p))
|
||||
|
||||
spaceSep1 :: (Monad m) => ParsecT [Char] u m a -> ParsecT [Char] u m [a]
|
||||
spaceSep1 p = do a <- p
|
||||
(a:) <$> many (try (forcedWS >> p))
|
||||
|
||||
betwixt a b c = do char a ; out <- c
|
||||
char b <?> "closing '" ++ [b] ++ "'" ; return out
|
||||
betwixtSpcs a b c = do char a ; whitespace ; out <- c ; whitespace
|
||||
char b <?> "closing '" ++ [b] ++ "'" ; return out
|
||||
|
||||
listOf p = betwixtSpcs '[' ']' $ p `sepBy` lexeme (char ',')
|
||||
surround a z name p = do
|
||||
char a ; whitespace ; a <- p ; whitespace
|
||||
char z <?> unwords ["closing", name, show z]
|
||||
return a
|
||||
|
||||
braces :: (Monad m) => ParsecT [Char] u m a -> ParsecT [Char] u m a
|
||||
braces = surround '[' ']' "brace"
|
||||
|
||||
parens :: (Monad m) => ParsecT [Char] u m a -> ParsecT [Char] u m a
|
||||
parens = surround '(' ')' "paren"
|
||||
|
||||
brackets :: (Monad m) => ParsecT [Char] u m a -> ParsecT [Char] u m a
|
||||
brackets = surround '{' '}' "bracket"
|
||||
|
||||
accessible expr = do
|
||||
e <- expr
|
||||
access <- optionMaybe (char '.')
|
||||
access <- optionMaybe . try $
|
||||
(do { char '.' ; notFollowedBy (char '.') } <?> "field access (e.g. List.map)")
|
||||
case access of
|
||||
Just '.' -> accessible (Access e `liftM` var)
|
||||
Just _ -> accessible (Access e `liftM` var <?> "field access (e.g. List.map)")
|
||||
Nothing -> return e
|
||||
|
||||
|
||||
spaces :: (Monad m) => ParsecT [Char] u m String
|
||||
spaces = many1 ((multiComment <|> string " ") >> return ' ') <?> "spaces"
|
||||
spaces = many1 ((multiComment <|> string " " <?> "") >> return ' ') <?> "spaces"
|
||||
|
||||
forcedWS :: (Monad m) => ParsecT [Char] u m [String]
|
||||
forcedWS = try (do { spaces; many nl_space }) <|> try (many1 nl_space)
|
||||
where nl_space = try $ many1 newline >> spaces
|
||||
|
||||
whitespace :: (Monad m) => ParsecT [Char] u m ()
|
||||
whitespace = optional forcedWS <?> "whitespace"
|
||||
whitespace = optional forcedWS <?> ""
|
||||
|
||||
freshLine :: (Monad m) => ParsecT [Char] u m [[String]]
|
||||
freshLine = try (do { many1 newline; many space_nl }) <|> try (many1 space_nl) <?> "fresh line"
|
||||
freshLine = try (do { many1 newline; many space_nl }) <|> try (many1 space_nl) <?> ""
|
||||
where space_nl = try $ spaces >> many1 newline
|
||||
|
||||
newline :: (Monad m) => ParsecT [Char] u m String
|
||||
newline = simpleNewline <|> lineComment <?> "newline"
|
||||
newline = simpleNewline <|> lineComment <?> ""
|
||||
|
||||
simpleNewline :: (Monad m) => ParsecT [Char] u m String
|
||||
simpleNewline = try (string "\r\n") <|> string "\n"
|
||||
|
||||
lineComment :: (Monad m) => ParsecT [Char] u m String
|
||||
lineComment = do try $ string "--"
|
||||
lineComment = do try (string "--")
|
||||
manyTill anyChar $ simpleNewline <|> (eof >> return "\n")
|
||||
|
||||
multiComment :: (Monad m) => ParsecT [Char] u m String
|
||||
multiComment = do { try $ string "{-"; closeComment }
|
||||
multiComment = do { try (string "{-"); closeComment }
|
||||
|
||||
closeComment :: (Monad m) => ParsecT [Char] u m String
|
||||
closeComment = manyTill anyChar . choice $
|
||||
[ try $ string "-}"
|
||||
[ try (string "-}") <?> "close comment"
|
||||
, do { try $ string "{-"; closeComment; closeComment }
|
||||
]
|
|
@ -9,7 +9,7 @@ import Data.List (lookup)
|
|||
import Text.Parsec
|
||||
|
||||
import ParseLib
|
||||
import Types
|
||||
import Types hiding (string,parens)
|
||||
import Guid
|
||||
|
||||
data ParseType = VarPT String
|
||||
|
@ -23,10 +23,10 @@ typeVar :: (Monad m) => ParsecT [Char] u m ParseType
|
|||
typeVar = VarPT <$> lowVar <?> "type variable"
|
||||
|
||||
typeList :: (Monad m) => ParsecT [Char] u m ParseType
|
||||
typeList = listPT <$> betwixtSpcs '[' ']' typeExpr
|
||||
typeList = listPT <$> braces typeExpr
|
||||
|
||||
typeTuple :: (Monad m) => ParsecT [Char] u m ParseType
|
||||
typeTuple = do ts <- betwixtSpcs '(' ')' (commaSep typeExpr)
|
||||
typeTuple = do ts <- parens (commaSep typeExpr)
|
||||
return $ case ts of { [t] -> t ; _ -> tuplePT ts }
|
||||
|
||||
typeUnambiguous :: (Monad m) => ParsecT [Char] u m ParseType
|
||||
|
@ -44,22 +44,23 @@ typeApp = do name <- capVar
|
|||
|
||||
typeExpr :: (Monad m) => ParsecT [Char] u m ParseType
|
||||
typeExpr = do
|
||||
whitespace
|
||||
t1 <- typeVar <|> typeApp <|> typeUnambiguous
|
||||
arrow <- optionMaybe arrow
|
||||
whitespace ; arrow <- optionMaybe arrow ; whitespace
|
||||
case arrow of Just _ -> LambdaPT t1 <$> typeExpr
|
||||
Nothing -> return t1
|
||||
|
||||
typeConstructor :: (Monad m) => ParsecT [Char] u m (String, [ParseType])
|
||||
typeConstructor = do name <- capVar
|
||||
args <- many (typeSimple <|> typeUnambiguous)
|
||||
typeConstructor = do
|
||||
name <- capVar
|
||||
args <- many (try (forcedWS >> (typeSimple <|> typeUnambiguous)))
|
||||
return $ (,) name args
|
||||
|
||||
datatype :: (Monad m) => ParsecT [Char] u m ([String], [Expr], GuidCounter [Type])
|
||||
datatype = do
|
||||
symbol "data" <?> "datatype definition"
|
||||
name <- capVar ; args <- many lowVar ; symbol "="
|
||||
tcs <- typeConstructor `sepBy1` symbol "|"
|
||||
string "data" <?> "datatype definition (data T = A | B | ...)"
|
||||
forcedWS ; name <- capVar ; args <- many (try (forcedWS >> lowVar))
|
||||
whitespace ; string "=" ; whitespace
|
||||
tcs <- pipeSep1 typeConstructor
|
||||
return $ (map fst tcs , map toFunc tcs , toTypes name args tcs)
|
||||
|
||||
beta = liftM VarT guid
|
||||
|
|
|
@ -37,27 +37,28 @@ chrTerm = Chr <$> betwixt '\'' '\'' (backslashed <|> satisfy (/='\'')) <?> "char
|
|||
|
||||
-------- Complex Terms --------
|
||||
|
||||
listTerm = expecting "list" . betwixtSpcs '[' ']' $ choice
|
||||
[ try $ do { lo <- expr; symbol ".." ; Range lo <$> expr }
|
||||
listTerm = braces $ choice
|
||||
[ try $ do { lo <- expr; whitespace; string ".." ; whitespace
|
||||
; Range lo <$> expr }
|
||||
, list <$> commaSep expr
|
||||
]
|
||||
|
||||
parensTerm = expecting "parenthesized expression" . betwixtSpcs '(' ')' $ choice
|
||||
parensTerm = parens $ choice
|
||||
[ do op <- anyOp
|
||||
return . Lambda "x" . Lambda "y" $ Binop op (Var "x") (Var "y")
|
||||
, do es <- commaSep expr
|
||||
return $ case es of { [e] -> e; _ -> tuple es }
|
||||
]
|
||||
|
||||
term = lexeme (choice [ numTerm, strTerm, chrTerm
|
||||
term = choice [ numTerm, strTerm, chrTerm
|
||||
, accessible varTerm
|
||||
, listTerm, parensTerm ])
|
||||
, listTerm, parensTerm ]
|
||||
<?> "basic term (number, variable, etc.)"
|
||||
|
||||
-------- Applications --------
|
||||
|
||||
appExpr = do
|
||||
tlist <- many1 (try term)
|
||||
tlist <- spaceSep1 term
|
||||
return $ case tlist of
|
||||
t:[] -> t
|
||||
t:ts -> foldl' App t ts
|
||||
|
@ -66,20 +67,20 @@ appExpr = do
|
|||
|
||||
binaryExpr = binops appExpr anyOp
|
||||
|
||||
ifExpr = expecting "if expression" $
|
||||
do symbol "if" ; e1 <- expr
|
||||
symbol "then" ; e2 <- expr
|
||||
expecting "else branch" $ do
|
||||
symbol "else" ; If e1 e2 <$> expr
|
||||
ifExpr = do string "if" ; forcedWS ; e1 <- expr ; forcedWS
|
||||
string "then" ; forcedWS ; e2 <- expr ; (forcedWS <?> "an 'else' branch")
|
||||
string "else" <?> "an 'else' branch" ; forcedWS ; If e1 e2 <$> expr
|
||||
|
||||
lambdaExpr = do lexeme $ oneOf "\\\x03BB"
|
||||
args <- patternTerm `endBy1` whitespace
|
||||
arrow
|
||||
lambdaExpr = do char '\\' <|> char '\x03BB' <?> "anonymous function"
|
||||
whitespace
|
||||
args <- spaceSep1 patternTerm
|
||||
whitespace ; arrow ; whitespace
|
||||
e <- expr
|
||||
return $ makeFunction args e
|
||||
|
||||
assignExpr = do
|
||||
patterns <- (patternTerm <?> "definition") `endBy1` whitespace; symbol "="; exp <- expr
|
||||
patterns <- spaceSep1 (patternTerm <?> "the definition of a variable (x = ...)")
|
||||
whitespace; string "="; whitespace; exp <- expr
|
||||
case patterns of
|
||||
PVar f : args -> return (f, makeFunction args exp)
|
||||
[PData x ps] -> if "Tuple" == take 5 x && all isDigit (drop 5 x) then
|
||||
|
@ -90,32 +91,36 @@ assignExpr = do
|
|||
"only named variables, named functions, and tuples are okay."
|
||||
|
||||
letExpr = do
|
||||
symbol "let"; brace <- optionMaybe . lexeme $ char '{'
|
||||
string "let"
|
||||
brace <- optionMaybe . try $ do
|
||||
whitespace
|
||||
char '{' <?> "a set of definitions { x = ... ; y = ... }"
|
||||
case brace of
|
||||
Nothing -> do f <- assignExpr; symbol "in"; e <- expr; return (Let [f] e)
|
||||
Just '{' -> do fs <- assignExpr `sepBy1` symbol ";"
|
||||
symbol "}" ; symbol "in"; e <- expr
|
||||
Nothing -> do forcedWS; f <- assignExpr; forcedWS; string "in"; forcedWS;
|
||||
Let [f] <$> expr
|
||||
Just '{' -> do whitespace ; fs <- semiSep1 assignExpr ; whitespace
|
||||
string "}" <?> "closing bracket '}'"
|
||||
whitespace; string "in"; forcedWS; e <- expr
|
||||
return (Let fs e)
|
||||
|
||||
caseExpr = do
|
||||
symbol "case"; e <- expr; symbol "of"
|
||||
betwixtSpcs '{' '}' $ Case e <$> case_ `sepBy1` symbol ";"
|
||||
where case_ = do p <- patternExpr; whitespace; arrow; e <- expr; return (p,e)
|
||||
string "case"; forcedWS; e <- expr; forcedWS; string "of"; whitespace
|
||||
Case e <$> brackets (semiSep1 (case_ <?> "cases { x -> ... }"))
|
||||
where case_ = do p <- patternExpr; whitespace; arrow; whitespace
|
||||
(,) p <$> expr
|
||||
|
||||
|
||||
expr = choice [ ifExpr, letExpr, caseExpr, lambdaExpr, binaryExpr ]
|
||||
expr = choice [ ifExpr, letExpr, caseExpr, lambdaExpr, binaryExpr ] <?> "expression"
|
||||
|
||||
def = do (f,e) <- assignExpr
|
||||
return ([f], [e], guid >>= \x -> return [VarT x])
|
||||
|
||||
manyDefs = try (do { d <- anyDef ; ds <- many freshDef ; return (d:ds) })
|
||||
<|> try (many1 freshDef)
|
||||
where freshDef = try $ many1 freshLine >> anyDef
|
||||
anyDef = def <|> datatype
|
||||
defs1 = do optional freshLine
|
||||
d <- datatype <|> def
|
||||
(d:) <$> many (try (freshLine >> (datatype <|> def)))
|
||||
|
||||
defs = do
|
||||
ds <- manyDefs
|
||||
let (fss,ess,tss) = unzip3 ds
|
||||
(fss,ess,tss) <- unzip3 <$> defs1
|
||||
let (fs,es,ts) = (concat fss, concat ess, concat `liftM` sequence tss)
|
||||
optional freshLine ; optional whitespace ; eof
|
||||
return (Let (zip fs es) (Var "main"), liftM (zip fs) ts)
|
||||
|
|
|
@ -21,19 +21,19 @@ patternBasic =
|
|||
]
|
||||
|
||||
patternTuple :: Monad m => ParsecT [Char] u m Pattern
|
||||
patternTuple = do ps <- betwixtSpcs '(' ')' $ patternExpr `sepBy` lexeme (char ',')
|
||||
patternTuple = do ps <- parens (commaSep patternExpr)
|
||||
return $ case ps of { [p] -> p; _ -> ptuple ps }
|
||||
|
||||
patternCons :: Monad m => ParsecT [Char] u m Pattern
|
||||
patternCons = do
|
||||
p <- patternTerm
|
||||
colon <- optionMaybe (char ':')
|
||||
colon <- optionMaybe (char ':' <?> "more complicated pattern")
|
||||
case colon of
|
||||
Just ':' -> pcons p <$> patternExpr
|
||||
Nothing -> return p
|
||||
|
||||
patternList :: Monad m => ParsecT [Char] u m Pattern
|
||||
patternList = plist <$> listOf patternExpr
|
||||
patternList = plist <$> braces (commaSep patternExpr)
|
||||
|
||||
patternTerm :: Monad m => ParsecT [Char] u m Pattern
|
||||
patternTerm = patternTuple <|> patternList <|> patternBasic <?> "pattern"
|
||||
|
|
Loading…
Reference in a new issue