Many bug fixes for new parser. Mainly dealing with spacing issues which cause botched parses.

This commit is contained in:
evancz 2012-06-09 11:47:56 -05:00
parent 42b364d623
commit c6a5ef4a4d
5 changed files with 104 additions and 72 deletions

View file

@ -27,10 +27,14 @@ sortOps = sortBy (\(i,_,_) (j,_,_) -> compare i j)
binops term anyOp = do
e <- term
(ops,es) <- liftM unzip $ many (do { op <- lexeme anyOp; e <- term; return (op,e) })
(ops,es) <- liftM unzip $
many (try $ do { whitespace
; op <- anyOp
; whitespace
; e <- term; return (op,e) })
case binopOf Map.empty (sortOps table) ops (e:es) of
Right e -> return e
Left msg -> mzero
Left msg -> fail msg
binopSplit seen opTable i ops es =
case (splitAt i ops, splitAt (i+1) es) of
@ -40,8 +44,7 @@ binopSplit seen opTable i ops es =
return $ Binop op e1 e2
binopOf _ _ _ [e] = return e
binopOf _ [] ops es =
return $ foldl' (flip ($)) (head es) $ zipWith Binop ops (tail es)
binopOf seen [] ops es = binopOf seen [(9,L,head ops)] ops es
binopOf seen (tbl@((lvl, L, op):rest)) ops es =
case elemIndices op ops of
@ -54,7 +57,7 @@ binopOf seen (tbl@((lvl, assoc, op):rest)) ops es =
case elemIndices op ops of
[] -> binopOf seen rest ops es
i:_ -> case Map.lookup lvl seen of
Nothing -> binopSplit (Map.insert lvl (R,op) seen) tbl i ops es
Nothing -> binopSplit (Map.insert lvl (assoc,op) seen) tbl i ops es
Just (assoc',op') ->
if assoc == assoc' && assoc /= N then
binopSplit seen tbl i ops es

View file

@ -2,6 +2,7 @@
module ParseLib where
import Ast
import Control.Applicative ((<$>))
import Control.Monad
import Data.Char (isSymbol)
import Text.Parsec hiding (newline,spaces)
@ -17,7 +18,7 @@ backslashed = do { char '\\'; c <- anyChar
; return . read $ ['\'','\\',c,'\''] }
var :: (Monad m) => ParsecT [Char] u m String
var = makeVar $ letter <|> char '_'
var = makeVar (letter <|> char '_' <?> "variable")
lowVar :: (Monad m) => ParsecT [Char] u m String
lowVar = makeVar (lower <?> "lower case variable")
@ -26,7 +27,7 @@ capVar = makeVar (upper <?> "upper case variable")
makeVar p = do
c <- p
cs <- many $ alphaNum <|> char '_' <|> char '\''
cs <- many (alphaNum <|> char '_' <|> char '\'' <?> "")
guard $ c:cs `notElem` reserved
return $ c:cs
@ -34,64 +35,86 @@ makeVar p = do
anyOp :: (Monad m) => ParsecT [Char] u m String
anyOp = betwixt '`' '`' var <|>
(do op <- many1 (satisfy isSymbol <|> oneOf "+-/*=.$<>:&|^?%#@~!")
guard (op `notElem` [ "=", "..", "->" ])
guard (op `notElem` [ "=", "..", "->", "--" ])
return op) <?> "infix operator"
lexeme p = do whitespace ; p
symbol :: (Monad m) => String -> ParsecT [Char] u m String
symbol s = whitespace >> string s
arrow :: (Monad m) => ParsecT [Char] u m String
arrow = symbol "->" <|> symbol "\8594"
arrow = string "->" <|> string "\8594" <?> "arrow (->)"
commaSep p = p `sepBy` lexeme (char ',')
commaSep :: (Monad m) => ParsecT [Char] u m a -> ParsecT [Char] u m [a]
commaSep p = do
x <- optionMaybe p
case x of
Just a -> (a:) <$> many (try (whitespace >> char ',' >> whitespace >> p))
Nothing -> return []
semiSep1 :: (Monad m) => ParsecT [Char] u m a -> ParsecT [Char] u m [a]
semiSep1 p = do a <- p
(a:) <$> many (try (whitespace >> char ';' >> whitespace >> p))
pipeSep1 :: (Monad m) => ParsecT [Char] u m a -> ParsecT [Char] u m [a]
pipeSep1 p = do a <- p
(a:) <$> many (try (whitespace >> char '|' >> whitespace >> p))
spaceSep1 :: (Monad m) => ParsecT [Char] u m a -> ParsecT [Char] u m [a]
spaceSep1 p = do a <- p
(a:) <$> many (try (forcedWS >> p))
betwixt a b c = do char a ; out <- c
char b <?> "closing '" ++ [b] ++ "'" ; return out
betwixtSpcs a b c = do char a ; whitespace ; out <- c ; whitespace
char b <?> "closing '" ++ [b] ++ "'" ; return out
listOf p = betwixtSpcs '[' ']' $ p `sepBy` lexeme (char ',')
surround a z name p = do
char a ; whitespace ; a <- p ; whitespace
char z <?> unwords ["closing", name, show z]
return a
braces :: (Monad m) => ParsecT [Char] u m a -> ParsecT [Char] u m a
braces = surround '[' ']' "brace"
parens :: (Monad m) => ParsecT [Char] u m a -> ParsecT [Char] u m a
parens = surround '(' ')' "paren"
brackets :: (Monad m) => ParsecT [Char] u m a -> ParsecT [Char] u m a
brackets = surround '{' '}' "bracket"
accessible expr = do
e <- expr
access <- optionMaybe (char '.')
access <- optionMaybe . try $
(do { char '.' ; notFollowedBy (char '.') } <?> "field access (e.g. List.map)")
case access of
Just '.' -> accessible (Access e `liftM` var)
Just _ -> accessible (Access e `liftM` var <?> "field access (e.g. List.map)")
Nothing -> return e
spaces :: (Monad m) => ParsecT [Char] u m String
spaces = many1 ((multiComment <|> string " ") >> return ' ') <?> "spaces"
spaces = many1 ((multiComment <|> string " " <?> "") >> return ' ') <?> "spaces"
forcedWS :: (Monad m) => ParsecT [Char] u m [String]
forcedWS = try (do { spaces; many nl_space }) <|> try (many1 nl_space)
where nl_space = try $ many1 newline >> spaces
whitespace :: (Monad m) => ParsecT [Char] u m ()
whitespace = optional forcedWS <?> "whitespace"
whitespace = optional forcedWS <?> ""
freshLine :: (Monad m) => ParsecT [Char] u m [[String]]
freshLine = try (do { many1 newline; many space_nl }) <|> try (many1 space_nl) <?> "fresh line"
freshLine = try (do { many1 newline; many space_nl }) <|> try (many1 space_nl) <?> ""
where space_nl = try $ spaces >> many1 newline
newline :: (Monad m) => ParsecT [Char] u m String
newline = simpleNewline <|> lineComment <?> "newline"
newline = simpleNewline <|> lineComment <?> ""
simpleNewline :: (Monad m) => ParsecT [Char] u m String
simpleNewline = try (string "\r\n") <|> string "\n"
lineComment :: (Monad m) => ParsecT [Char] u m String
lineComment = do try $ string "--"
lineComment = do try (string "--")
manyTill anyChar $ simpleNewline <|> (eof >> return "\n")
multiComment :: (Monad m) => ParsecT [Char] u m String
multiComment = do { try $ string "{-"; closeComment }
multiComment = do { try (string "{-"); closeComment }
closeComment :: (Monad m) => ParsecT [Char] u m String
closeComment = manyTill anyChar . choice $
[ try $ string "-}"
[ try (string "-}") <?> "close comment"
, do { try $ string "{-"; closeComment; closeComment }
]

View file

@ -9,7 +9,7 @@ import Data.List (lookup)
import Text.Parsec
import ParseLib
import Types
import Types hiding (string,parens)
import Guid
data ParseType = VarPT String
@ -23,10 +23,10 @@ typeVar :: (Monad m) => ParsecT [Char] u m ParseType
typeVar = VarPT <$> lowVar <?> "type variable"
typeList :: (Monad m) => ParsecT [Char] u m ParseType
typeList = listPT <$> betwixtSpcs '[' ']' typeExpr
typeList = listPT <$> braces typeExpr
typeTuple :: (Monad m) => ParsecT [Char] u m ParseType
typeTuple = do ts <- betwixtSpcs '(' ')' (commaSep typeExpr)
typeTuple = do ts <- parens (commaSep typeExpr)
return $ case ts of { [t] -> t ; _ -> tuplePT ts }
typeUnambiguous :: (Monad m) => ParsecT [Char] u m ParseType
@ -44,22 +44,23 @@ typeApp = do name <- capVar
typeExpr :: (Monad m) => ParsecT [Char] u m ParseType
typeExpr = do
whitespace
t1 <- typeVar <|> typeApp <|> typeUnambiguous
arrow <- optionMaybe arrow
whitespace ; arrow <- optionMaybe arrow ; whitespace
case arrow of Just _ -> LambdaPT t1 <$> typeExpr
Nothing -> return t1
typeConstructor :: (Monad m) => ParsecT [Char] u m (String, [ParseType])
typeConstructor = do name <- capVar
args <- many (typeSimple <|> typeUnambiguous)
typeConstructor = do
name <- capVar
args <- many (try (forcedWS >> (typeSimple <|> typeUnambiguous)))
return $ (,) name args
datatype :: (Monad m) => ParsecT [Char] u m ([String], [Expr], GuidCounter [Type])
datatype = do
symbol "data" <?> "datatype definition"
name <- capVar ; args <- many lowVar ; symbol "="
tcs <- typeConstructor `sepBy1` symbol "|"
string "data" <?> "datatype definition (data T = A | B | ...)"
forcedWS ; name <- capVar ; args <- many (try (forcedWS >> lowVar))
whitespace ; string "=" ; whitespace
tcs <- pipeSep1 typeConstructor
return $ (map fst tcs , map toFunc tcs , toTypes name args tcs)
beta = liftM VarT guid

View file

@ -37,27 +37,28 @@ chrTerm = Chr <$> betwixt '\'' '\'' (backslashed <|> satisfy (/='\'')) <?> "char
-------- Complex Terms --------
listTerm = expecting "list" . betwixtSpcs '[' ']' $ choice
[ try $ do { lo <- expr; symbol ".." ; Range lo <$> expr }
listTerm = braces $ choice
[ try $ do { lo <- expr; whitespace; string ".." ; whitespace
; Range lo <$> expr }
, list <$> commaSep expr
]
parensTerm = expecting "parenthesized expression" . betwixtSpcs '(' ')' $ choice
parensTerm = parens $ choice
[ do op <- anyOp
return . Lambda "x" . Lambda "y" $ Binop op (Var "x") (Var "y")
, do es <- commaSep expr
return $ case es of { [e] -> e; _ -> tuple es }
]
term = lexeme (choice [ numTerm, strTerm, chrTerm
term = choice [ numTerm, strTerm, chrTerm
, accessible varTerm
, listTerm, parensTerm ])
, listTerm, parensTerm ]
<?> "basic term (number, variable, etc.)"
-------- Applications --------
appExpr = do
tlist <- many1 (try term)
tlist <- spaceSep1 term
return $ case tlist of
t:[] -> t
t:ts -> foldl' App t ts
@ -66,20 +67,20 @@ appExpr = do
binaryExpr = binops appExpr anyOp
ifExpr = expecting "if expression" $
do symbol "if" ; e1 <- expr
symbol "then" ; e2 <- expr
expecting "else branch" $ do
symbol "else" ; If e1 e2 <$> expr
ifExpr = do string "if" ; forcedWS ; e1 <- expr ; forcedWS
string "then" ; forcedWS ; e2 <- expr ; (forcedWS <?> "an 'else' branch")
string "else" <?> "an 'else' branch" ; forcedWS ; If e1 e2 <$> expr
lambdaExpr = do lexeme $ oneOf "\\\x03BB"
args <- patternTerm `endBy1` whitespace
arrow
lambdaExpr = do char '\\' <|> char '\x03BB' <?> "anonymous function"
whitespace
args <- spaceSep1 patternTerm
whitespace ; arrow ; whitespace
e <- expr
return $ makeFunction args e
assignExpr = do
patterns <- (patternTerm <?> "definition") `endBy1` whitespace; symbol "="; exp <- expr
patterns <- spaceSep1 (patternTerm <?> "the definition of a variable (x = ...)")
whitespace; string "="; whitespace; exp <- expr
case patterns of
PVar f : args -> return (f, makeFunction args exp)
[PData x ps] -> if "Tuple" == take 5 x && all isDigit (drop 5 x) then
@ -90,32 +91,36 @@ assignExpr = do
"only named variables, named functions, and tuples are okay."
letExpr = do
symbol "let"; brace <- optionMaybe . lexeme $ char '{'
string "let"
brace <- optionMaybe . try $ do
whitespace
char '{' <?> "a set of definitions { x = ... ; y = ... }"
case brace of
Nothing -> do f <- assignExpr; symbol "in"; e <- expr; return (Let [f] e)
Just '{' -> do fs <- assignExpr `sepBy1` symbol ";"
symbol "}" ; symbol "in"; e <- expr
Nothing -> do forcedWS; f <- assignExpr; forcedWS; string "in"; forcedWS;
Let [f] <$> expr
Just '{' -> do whitespace ; fs <- semiSep1 assignExpr ; whitespace
string "}" <?> "closing bracket '}'"
whitespace; string "in"; forcedWS; e <- expr
return (Let fs e)
caseExpr = do
symbol "case"; e <- expr; symbol "of"
betwixtSpcs '{' '}' $ Case e <$> case_ `sepBy1` symbol ";"
where case_ = do p <- patternExpr; whitespace; arrow; e <- expr; return (p,e)
string "case"; forcedWS; e <- expr; forcedWS; string "of"; whitespace
Case e <$> brackets (semiSep1 (case_ <?> "cases { x -> ... }"))
where case_ = do p <- patternExpr; whitespace; arrow; whitespace
(,) p <$> expr
expr = choice [ ifExpr, letExpr, caseExpr, lambdaExpr, binaryExpr ]
expr = choice [ ifExpr, letExpr, caseExpr, lambdaExpr, binaryExpr ] <?> "expression"
def = do (f,e) <- assignExpr
return ([f], [e], guid >>= \x -> return [VarT x])
manyDefs = try (do { d <- anyDef ; ds <- many freshDef ; return (d:ds) })
<|> try (many1 freshDef)
where freshDef = try $ many1 freshLine >> anyDef
anyDef = def <|> datatype
defs1 = do optional freshLine
d <- datatype <|> def
(d:) <$> many (try (freshLine >> (datatype <|> def)))
defs = do
ds <- manyDefs
let (fss,ess,tss) = unzip3 ds
(fss,ess,tss) <- unzip3 <$> defs1
let (fs,es,ts) = (concat fss, concat ess, concat `liftM` sequence tss)
optional freshLine ; optional whitespace ; eof
return (Let (zip fs es) (Var "main"), liftM (zip fs) ts)

View file

@ -21,19 +21,19 @@ patternBasic =
]
patternTuple :: Monad m => ParsecT [Char] u m Pattern
patternTuple = do ps <- betwixtSpcs '(' ')' $ patternExpr `sepBy` lexeme (char ',')
patternTuple = do ps <- parens (commaSep patternExpr)
return $ case ps of { [p] -> p; _ -> ptuple ps }
patternCons :: Monad m => ParsecT [Char] u m Pattern
patternCons = do
p <- patternTerm
colon <- optionMaybe (char ':')
colon <- optionMaybe (char ':' <?> "more complicated pattern")
case colon of
Just ':' -> pcons p <$> patternExpr
Nothing -> return p
patternList :: Monad m => ParsecT [Char] u m Pattern
patternList = plist <$> listOf patternExpr
patternList = plist <$> braces (commaSep patternExpr)
patternTerm :: Monad m => ParsecT [Char] u m Pattern
patternTerm = patternTuple <|> patternList <|> patternBasic <?> "pattern"