module Parser where import Omega import Control.Applicative (Applicative(..)) import Control.Monad (liftM, ap, guard) import Data.Char {- Implementation based on ideas in Monadic Parser Combinators paper http://www.cs.nott.ac.uk/~pszgmh/monparsing.pdf -} -- Parser type takes input string and returns a list of possible parses newtype Parser a = Parser (String -> [(a, String)]) -- Necessary AMP additions for Parser instance instance Functor Parser where fmap = liftM instance Applicative Parser where pure a = Parser (\cs -> [(a,cs)]) (<*>) = ap -- Monad instance, generators use the first parser then apply f to the result instance Monad Parser where return = pure p >>= f = Parser (\cs -> concat [parse (f a) cs' | (a,cs') <- parse p cs]) -- parser deconstructor parse (Parser p) = p -- takes a string and splits on the first char or fails item :: Parser Char item = Parser (\cs -> case cs of "" -> [] (c:cs) -> [(c,cs)]) -- combines the results of 2 parsers on an input string -- shortcircuits on the first result returned or fails (+++) :: Parser a -> Parser a -> Parser a p +++ q = Parser (\cs -> case parse p cs ++ parse q cs of [] -> [] (x:xs) -> [x]) -- failure parser zerop = Parser (\cs -> []) -- parses an element and returns if they satisfy a predicate sat :: (Char -> Bool) -> Parser Char sat p = do {c <- item; if p c then return c else zerop} -- parses chars only char :: Char -> Parser Char char c = sat (c ==) -- parses a string of chars string :: String -> Parser String string = mapM char -- parses 0 or more elements many :: Parser a -> Parser [a] many p = many1 p +++ return [] -- parses 1 or more elements many1 :: Parser a -> Parser [a] many1 p = do a <- p as <- many p return (a:as) -- parses 0 or more whitespace space :: Parser String space = many (sat isSpace) space1 :: Parser String space1 = many1 (sat isSpace) -- trims whitespace between an expression spaces :: Parser a -> Parser a spaces p = do space x <- p space return x -- parses a single string symb :: String -> Parser String symb = string -- apply a parser to a string apply :: Parser a -> String -> [(a,String)] apply p = parse (do {space; p}) keywords = ["let", "lett", "=", ".", ":", "::", "Nat", "z", "s"] -- 1 or more chars str :: Parser String str = do s <- many1 $ sat isLower if elem s keywords then zerop else return s -- 1 or more chars strT :: Parser String strT = do s <- many1 $ sat (\x -> isUpper x && isAlpha x) if elem s keywords then zerop else return s -- left recursion chainl1 :: Parser a -> Parser (a -> a -> a) -> Parser a p `chainl1` op = do {a <- p; rest a} where rest a = (do f <- op b <- p rest (f a b)) +++ return a -- bracket parses away brackets as you'd expect bracket :: Parser a -> Parser a bracket p = do symb "(" x <- p symb ")" return x -- Proper kinds are "*" packaged up kindVar = do symb "*" return KVar -- arrow kinds are kinds surrounded by arrows kindArr = do x <- spaces $ kindExpr symb "=>" y <- spaces $ kindTerm return $ KArr x y -- top level CFG for kinds kindTerm = kindArr +++ kindExpr -- second level CFG for kinds kindExpr = (bracket kindTerm) +++ kindVar -- abstraction allows escaped backslash or lambda typeLam = do spaces $ identifier lambdas x <- strT spaces (symb "::") t <- kindTerm spaces (symb ".") e <- spaces $ typeTerm return $ TAbs x t e -- arrow type parser typeArr = do x <- spaces $ type2 symb "->" y <- spaces $ type3 return $ TArr x y -- app has zero or more spaces typeApp = chainl1 type4 $ do space1 return $ TApp -- type vars are Strings typeVar = do x <- strT return $ TVar x -- type vars are "o" packaged up typeNat = do symb "Nat" return TNat -- top level CFG for arrow types are "(X -> Y)" packaged up typeTerm = typeLam +++ type2 type2 = typeArr +++ type3 type3 = typeApp +++ type4 -- bottom level of cfg for types type4 = (bracket typeTerm) +++ typeNat +++ typeVar -- parser for term variables termVar = do x <- str return $ Var x zero = do char 'z' return Zero succ = do char 's' return Succ -- abstraction allows escaped backslash or lambda lambdas = ['\x03bb','\\', 'λ'] lam = do spaces $ identifier lambdas x <- str spaces (symb ":") t <- typeTerm spaces (symb ".") e <- spaces term return $ Abs x t e -- app has zero or more spaces app = chainl1 expr $ do space1 return $ App -- parser for let expressions pLet = do space symb "let" space1 v <- str spaces $ symb "=" t <- term return (v, Left t) -- parser for type let expressions pTypeLet = do space symb "lett" space1 v <- strT spaces $ symb "=" t <- typeTerm return (v,Right t) --right signify type let pTerm = do t <- spaces $ term return ("", Left t) -- expression follows CFG form with bracketing convention expr = (bracket term) +++ termVar +++ zero +++ Parser.succ -- top level of CFG Grammar term = lam +++ app -- identifies key words identifier :: [Char] -> Parser Char identifier xs = do x <- sat (\x -> elem x xs) return x -- Kinds, consisting of proper types (*), or combinations of kinds data K = KVar | KArr K K deriving (Eq, Ord) -- Simple show instance instance Show K where show KVar = "*" show (KArr a b) = paren (iskArr a) (show a) ++ "=>" ++ show b paren :: Bool -> String -> String paren True x = "(" ++ x ++ ")" paren False x = x iskArr :: K -> Bool iskArr (KArr _ _) = True iskArr _ = False -- Omega Types, type variables are strings as it's easy to rename -- Unit/Top is used as the final type, meant to represent Object -- records are lists of variables and types (todo make this Set) data T = TVar String | TArr T T | TAbs String K T | TApp T T | TNat deriving (Eq, Ord) -- show implementation, uses Unicode for Unit -- uses bracketing convention for types instance Show T where show (TVar x) = x show (TNat) = "Nat" show (TArr a b) = paren (isArr a) (show a) ++ "->" ++ show b show (TAbs x k t)= "\x03bb" ++ x ++ "::" ++ show k ++ "." ++ show t show (TApp t1 t2)= paren (isTAbs t1) (show t1) ++ ' ' : paren (isTAbs t2 || isTApp t2) (show t2) isTAbs :: T -> Bool isTAbs (TAbs _ _ _) = True isTAbs _ = False isTApp :: T -> Bool isTApp (TApp _ _) = True isTApp _ = False isArr :: T -> Bool isArr (TArr _ _) = True isArr _ = False -- Omega Term -- variables are strings -- Abstractions carry the type Church style\ data OTerm = Var String | Abs String T OTerm | App OTerm OTerm | Zero | Succ deriving Ord -- alpha termEqualityalence of terms uses instance Eq OTerm where t1 == t2 = termEquality (t1, t2) (M.empty, M.empty) 0 -- checks for equality of terms, has a map (term, id) for each variable -- each abstraction adds vars to the map and increments the id -- also checks that each term is identical -- variable occurrence checks for ocurrences in t1 and t2 using the logic: -- if both bound, check that s is same in both maps -- if neither is bound, check literal equality -- if bound t1 XOR bound t2 == true then False -- application recursively checks both the LHS and RHS termEquality :: (OTerm, OTerm) -> (Map String Int, Map String Int) -> Int -> Bool termEquality (Var x, Var y) (m1, m2) s = case M.lookup x m1 of Just a -> case M.lookup y m2 of Just b -> a == b _ -> False _ -> x == y termEquality (Abs x t1 l1, Abs y t2 l2) (m1, m2) s = t1 == t2 && termEquality (l1, l2) (m1', m2') (s+1) where m1' = M.insert x s m1 m2' = M.insert y s m2 termEquality (App a1 b1, App a2 b2) c s = termEquality (a1, a2) c s && termEquality (b1, b2) c s termEquality (Succ, Succ) _ _ = True termEquality (Zero, Zero) _ _ = True termEquality _ _ _ = False -- show implementation for Omega terms -- uses bracketing convention for terms instance Show OTerm where show (Zero) = "z" show (Succ) = "s" show (Var x) = x show (App t1 t2) = paren (isAbs t1) (show t1) ++ ' ' : paren (isAbs t2 || isApp t2 || isSucc t2) (show t2) show (Abs x t l1) = "\x03bb" ++ x ++ ":" ++ show t ++ "." ++ show l1 isSucc :: OTerm -> Bool isSucc Succ = True isSucc _ = False isAbs :: OTerm -> Bool isAbs (Abs _ _ _) = True isAbs _ = False isApp :: OTerm -> Bool isApp (App _ _) = True isApp _ = False