Last active
August 22, 2018 18:32
-
-
Save lukeg101/ac1f2319e965f600b2e8b45a372f5fda to your computer and use it in GitHub Desktop.
Monadic Parser Combinators for the Lambda Omega Simply Typed Lambda Calculus with Type Operators
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| module Parser where | |
| import Omega | |
| import Control.Applicative (Applicative(..)) | |
| import Control.Monad (liftM, ap, guard) | |
| import Data.Char | |
| {- | |
| Implementation based on ideas in Monadic Parser Combinators paper | |
| http://www.cs.nott.ac.uk/~pszgmh/monparsing.pdf | |
| -} | |
| -- Parser type takes input string and returns a list of possible parses | |
| newtype Parser a = Parser (String -> [(a, String)]) | |
| -- Necessary AMP additions for Parser instance | |
| instance Functor Parser where | |
| fmap = liftM | |
| instance Applicative Parser where | |
| pure a = Parser (\cs -> [(a,cs)]) | |
| (<*>) = ap | |
| -- Monad instance, generators use the first parser then apply f to the result | |
| instance Monad Parser where | |
| return = pure | |
| p >>= f = Parser (\cs -> concat [parse (f a) cs' | (a,cs') <- parse p cs]) | |
| -- parser deconstructor | |
| parse (Parser p) = p | |
| -- takes a string and splits on the first char or fails | |
| item :: Parser Char | |
| item = Parser (\cs -> case cs of | |
| "" -> [] | |
| (c:cs) -> [(c,cs)]) | |
| -- combines the results of 2 parsers on an input string | |
| -- shortcircuits on the first result returned or fails | |
| (+++) :: Parser a -> Parser a -> Parser a | |
| p +++ q = Parser (\cs -> case parse p cs ++ parse q cs of | |
| [] -> [] | |
| (x:xs) -> [x]) | |
| -- failure parser | |
| zerop = Parser (\cs -> []) | |
| -- parses an element and returns if they satisfy a predicate | |
| sat :: (Char -> Bool) -> Parser Char | |
| sat p = do {c <- item; if p c then return c else zerop} | |
| -- parses chars only | |
| char :: Char -> Parser Char | |
| char c = sat (c ==) | |
| -- parses a string of chars | |
| string :: String -> Parser String | |
| string = mapM char | |
| -- parses 0 or more elements | |
| many :: Parser a -> Parser [a] | |
| many p = many1 p +++ return [] | |
| -- parses 1 or more elements | |
| many1 :: Parser a -> Parser [a] | |
| many1 p = do | |
| a <- p | |
| as <- many p | |
| return (a:as) | |
| -- parses 0 or more whitespace | |
| space :: Parser String | |
| space = many (sat isSpace) | |
| space1 :: Parser String | |
| space1 = many1 (sat isSpace) | |
| -- trims whitespace between an expression | |
| spaces :: Parser a -> Parser a | |
| spaces p = do | |
| space | |
| x <- p | |
| space | |
| return x | |
| -- parses a single string | |
| symb :: String -> Parser String | |
| symb = string | |
| -- apply a parser to a string | |
| apply :: Parser a -> String -> [(a,String)] | |
| apply p = parse (do {space; p}) | |
| keywords = ["let", "lett", "=", ".", ":", "::", "Nat", "z", "s"] | |
| -- 1 or more chars | |
| str :: Parser String | |
| str = do | |
| s <- many1 $ sat isLower | |
| if elem s keywords then zerop else return s | |
| -- 1 or more chars | |
| strT :: Parser String | |
| strT = do | |
| s <- many1 $ sat (\x -> isUpper x && isAlpha x) | |
| if elem s keywords then zerop else return s | |
| -- left recursion | |
| chainl1 :: Parser a -> Parser (a -> a -> a) -> Parser a | |
| p `chainl1` op = do {a <- p; rest a} | |
| where | |
| rest a = (do | |
| f <- op | |
| b <- p | |
| rest (f a b)) +++ return a | |
| -- bracket parses away brackets as you'd expect | |
| bracket :: Parser a -> Parser a | |
| bracket p = do | |
| symb "(" | |
| x <- p | |
| symb ")" | |
| return x | |
| -- Proper kinds are "*" packaged up | |
| kindVar = do | |
| symb "*" | |
| return KVar | |
| -- arrow kinds are kinds surrounded by arrows | |
| kindArr = do | |
| x <- spaces $ kindExpr | |
| symb "=>" | |
| y <- spaces $ kindTerm | |
| return $ KArr x y | |
| -- top level CFG for kinds | |
| kindTerm = kindArr +++ kindExpr | |
| -- second level CFG for kinds | |
| kindExpr = (bracket kindTerm) +++ kindVar | |
| -- abstraction allows escaped backslash or lambda | |
| typeLam = do | |
| spaces $ identifier lambdas | |
| x <- strT | |
| spaces (symb "::") | |
| t <- kindTerm | |
| spaces (symb ".") | |
| e <- spaces $ typeTerm | |
| return $ TAbs x t e | |
| -- arrow type parser | |
| typeArr = do | |
| x <- spaces $ type2 | |
| symb "->" | |
| y <- spaces $ type3 | |
| return $ TArr x y | |
| -- app has zero or more spaces | |
| typeApp = chainl1 type4 $ do | |
| space1 | |
| return $ TApp | |
| -- type vars are Strings | |
| typeVar = do | |
| x <- strT | |
| return $ TVar x | |
| -- type vars are "o" packaged up | |
| typeNat = do | |
| symb "Nat" | |
| return TNat | |
| -- top level CFG for arrow types are "(X -> Y)" packaged up | |
| typeTerm = typeLam +++ type2 | |
| type2 = typeArr +++ type3 | |
| type3 = typeApp +++ type4 | |
| -- bottom level of cfg for types | |
| type4 = (bracket typeTerm) | |
| +++ typeNat | |
| +++ typeVar | |
| -- parser for term variables | |
| termVar = do | |
| x <- str | |
| return $ Var x | |
| zero = do | |
| char 'z' | |
| return Zero | |
| succ = do | |
| char 's' | |
| return Succ | |
| -- abstraction allows escaped backslash or lambda | |
| lambdas = ['\x03bb','\\', 'λ'] | |
| lam = do | |
| spaces $ identifier lambdas | |
| x <- str | |
| spaces (symb ":") | |
| t <- typeTerm | |
| spaces (symb ".") | |
| e <- spaces term | |
| return $ Abs x t e | |
| -- app has zero or more spaces | |
| app = chainl1 expr $ do | |
| space1 | |
| return $ App | |
| -- parser for let expressions | |
| pLet = do | |
| space | |
| symb "let" | |
| space1 | |
| v <- str | |
| spaces $ symb "=" | |
| t <- term | |
| return (v, Left t) | |
| -- parser for type let expressions | |
| pTypeLet = do | |
| space | |
| symb "lett" | |
| space1 | |
| v <- strT | |
| spaces $ symb "=" | |
| t <- typeTerm | |
| return (v,Right t) --right signify type let | |
| pTerm = do | |
| t <- spaces $ term | |
| return ("", Left t) | |
| -- expression follows CFG form with bracketing convention | |
| expr = (bracket term) +++ termVar | |
| +++ zero +++ Parser.succ | |
| -- top level of CFG Grammar | |
| term = lam +++ app | |
| -- identifies key words | |
| identifier :: [Char] -> Parser Char | |
| identifier xs = do | |
| x <- sat (\x -> elem x xs) | |
| return x | |
| -- Kinds, consisting of proper types (*), or combinations of kinds | |
| data K | |
| = KVar | |
| | KArr K K | |
| deriving (Eq, Ord) | |
| -- Simple show instance | |
| instance Show K where | |
| show KVar = "*" | |
| show (KArr a b) = paren (iskArr a) (show a) ++ "=>" ++ show b | |
| paren :: Bool -> String -> String | |
| paren True x = "(" ++ x ++ ")" | |
| paren False x = x | |
| iskArr :: K -> Bool | |
| iskArr (KArr _ _) = True | |
| iskArr _ = False | |
| -- Omega Types, type variables are strings as it's easy to rename | |
| -- Unit/Top is used as the final type, meant to represent Object | |
| -- records are lists of variables and types (todo make this Set) | |
| data T | |
| = TVar String | |
| | TArr T T | |
| | TAbs String K T | |
| | TApp T T | |
| | TNat | |
| deriving (Eq, Ord) | |
| -- show implementation, uses Unicode for Unit | |
| -- uses bracketing convention for types | |
| instance Show T where | |
| show (TVar x) = x | |
| show (TNat) = "Nat" | |
| show (TArr a b) = paren (isArr a) (show a) ++ "->" ++ show b | |
| show (TAbs x k t)= | |
| "\x03bb" ++ x ++ "::" ++ show k ++ "." ++ show t | |
| show (TApp t1 t2)= | |
| paren (isTAbs t1) (show t1) ++ ' ' : paren (isTAbs t2 || isTApp t2) (show t2) | |
| isTAbs :: T -> Bool | |
| isTAbs (TAbs _ _ _) = True | |
| isTAbs _ = False | |
| isTApp :: T -> Bool | |
| isTApp (TApp _ _) = True | |
| isTApp _ = False | |
| isArr :: T -> Bool | |
| isArr (TArr _ _) = True | |
| isArr _ = False | |
| -- Omega Term | |
| -- variables are strings | |
| -- Abstractions carry the type Church style\ | |
| data OTerm | |
| = Var String | |
| | Abs String T OTerm | |
| | App OTerm OTerm | |
| | Zero | |
| | Succ | |
| deriving Ord | |
| -- alpha termEqualityalence of terms uses | |
| instance Eq OTerm where | |
| t1 == t2 = termEquality (t1, t2) (M.empty, M.empty) 0 | |
| -- checks for equality of terms, has a map (term, id) for each variable | |
| -- each abstraction adds vars to the map and increments the id | |
| -- also checks that each term is identical | |
| -- variable occurrence checks for ocurrences in t1 and t2 using the logic: | |
| -- if both bound, check that s is same in both maps | |
| -- if neither is bound, check literal equality | |
| -- if bound t1 XOR bound t2 == true then False | |
| -- application recursively checks both the LHS and RHS | |
| termEquality :: (OTerm, OTerm) | |
| -> (Map String Int, Map String Int) | |
| -> Int | |
| -> Bool | |
| termEquality (Var x, Var y) (m1, m2) s = case M.lookup x m1 of | |
| Just a -> case M.lookup y m2 of | |
| Just b -> a == b | |
| _ -> False | |
| _ -> x == y | |
| termEquality (Abs x t1 l1, Abs y t2 l2) (m1, m2) s = | |
| t1 == t2 && termEquality (l1, l2) (m1', m2') (s+1) | |
| where | |
| m1' = M.insert x s m1 | |
| m2' = M.insert y s m2 | |
| termEquality (App a1 b1, App a2 b2) c s = | |
| termEquality (a1, a2) c s && termEquality (b1, b2) c s | |
| termEquality (Succ, Succ) _ _ = True | |
| termEquality (Zero, Zero) _ _ = True | |
| termEquality _ _ _ = False | |
| -- show implementation for Omega terms | |
| -- uses bracketing convention for terms | |
| instance Show OTerm where | |
| show (Zero) = "z" | |
| show (Succ) = "s" | |
| show (Var x) = x | |
| show (App t1 t2) = | |
| paren (isAbs t1) (show t1) ++ ' ' : paren (isAbs t2 || isApp t2 || isSucc t2) (show t2) | |
| show (Abs x t l1) = "\x03bb" ++ x ++ ":" ++ show t ++ "." ++ show l1 | |
| isSucc :: OTerm -> Bool | |
| isSucc Succ = True | |
| isSucc _ = False | |
| isAbs :: OTerm -> Bool | |
| isAbs (Abs _ _ _) = True | |
| isAbs _ = False | |
| isApp :: OTerm -> Bool | |
| isApp (App _ _) = True | |
| isApp _ = False |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment