Back to Index Page

TinyAPL part 4: Finally, Parsing!

#tinyapl#apl#haskell

Madeline Vergani

Almost four months after the last post, I finally figured out how to parse APL!

As usual, this post is also available as a GitHub repo.

Proper Show for errors

Quick change that makes the multiline errors returned while parsing look correct.

instance Show Error where
  show err = let
    (errorName, errorMessage) = case err of
      (DomainError msg) -> ("Domain error", msg)
      (LengthError msg) -> ("Length error", msg)
      (RankError msg) -> ("Rank error", msg)
      (NYIError msg) -> ("Not yet implemented!", msg)
      (SyntaxError msg) -> ("Syntax error", msg)
    hasNewline = '\n' `elem` errorMessage
    in if hasNewline
      then errorName ++ '\n' : errorMessage
      else errorName ++ ": " ++ errorMessage

Tokenization

For tokenization I'm using the Parsec parser combinator library (version 3.1.17.0).

The token structure is tree-like: some tokens (like the dfn token) contain other tokens as children.

Each token also has a SourcePos (from Parsec) that is used for displaying error messages.

data Token
  = TokenNumber (Complex Double) SourcePos
  | TokenChar String SourcePos
  | TokenString String SourcePos
  | TokenPrimArray Char SourcePos
  | TokenPrimFunction Char SourcePos
  | TokenPrimAdverb Char SourcePos
  | TokenPrimConjunction Char SourcePos
  | TokenDfn [[Token]] SourcePos
  | TokenDadv [[Token]] SourcePos
  | TokenDconj [[Token]] SourcePos
  | TokenArrayName String SourcePos
  | TokenFunctionName String SourcePos
  | TokenAdverbName String SourcePos
  | TokenConjunctionName String SourcePos
  | TokenArrayAssign String [Token] SourcePos
  | TokenFunctionAssign String [Token] SourcePos
  | TokenAdverbAssign String [Token] SourcePos
  | TokenConjunctionAssign String [Token] SourcePos
  | TokenParens [Token] SourcePos
  | TokenGuard [Token] [Token] SourcePos
  | TokenExit [Token] SourcePos
  deriving (Show)

tokenPos :: Token -> SourcePos
tokenPos (TokenNumber _ pos) = pos
tokenPos (TokenChar _ pos) = pos
tokenPos (TokenString _ pos) = pos
tokenPos (TokenPrimArray _ pos) = pos
tokenPos (TokenPrimFunction _ pos) = pos
tokenPos (TokenPrimAdverb _ pos) = pos
tokenPos (TokenPrimConjunction _ pos) = pos
tokenPos (TokenDfn _ pos) = pos
tokenPos (TokenDadv _ pos) = pos
tokenPos (TokenDconj _ pos) = pos
tokenPos (TokenArrayName _ pos) = pos
tokenPos (TokenFunctionName _ pos) = pos
tokenPos (TokenAdverbName _ pos) = pos
tokenPos (TokenConjunctionName _ pos) = pos
tokenPos (TokenArrayAssign _ _ pos) = pos
tokenPos (TokenFunctionAssign _ _ pos) = pos
tokenPos (TokenAdverbAssign _ _ pos) = pos
tokenPos (TokenConjunctionAssign _ _ pos) = pos
tokenPos (TokenParens _ pos) = pos
tokenPos (TokenGuard _ _ pos) = pos
tokenPos (TokenExit _ pos) = pos

An utility for creating syntax errors with indication to where in the source the error is, and an utility for turning Parsec ParseErrors into SyntaxErrors:

makeSyntaxError :: SourcePos -> String -> String -> Error
makeSyntaxError pos source msg = let
  line = sourceLine pos
  column = sourceColumn pos
  theLine = if null $ lines source then "" else lines source !! (line - 1)
  in SyntaxError $ theLine ++ "\n" ++ replicate (column - 1) ' ' ++ "^\n" ++ msg

makeParseError :: String -> ParseError -> Error
makeParseError source err = let
  pos = errorPos err
  msgs = tail $ showErrorMessages "or" "unknown parse error" "expecting" "unexpected" "end of input" $ errorMessages err
  in makeSyntaxError pos source msgs

And now, the actual tokenization function:

tokenize :: SourceName -> String -> Result [[Token]]
tokenize file source = first (makeParseError source) $ Text.Parsec.parse (sepBy1 bits separator <* eof) file source where
  withPos :: Parser (SourcePos -> a) -> Parser a
  withPos = (<**>) getPosition

  bit :: Parser Token
  bit = try bracketed <|> try conjunction <|> try adverb <|> try function <|> try array

  bits :: Parser [Token]
  bits = many1 bit

  bracketed :: Parser Token
  bracketed = withPos $ TokenParens <$> between (char $ fst G.parens) (char $ snd G.parens) bits

  separator :: Parser ()
  separator = oneOf [G.separator, '\n'] $> ()

Identifiers follow this rule: arrays start wiht a lowercase letter (or , , ), functions start with an uppercase letter (or , , , ), monadic operators start with an underscore, dyadic operators start and end with an underscore.

  arrayStart :: String
  arrayStart = G.delta : G.alpha : G.omega : ['a'..'z']
  functionStart :: String
  functionStart = G.deltaBar : G.alphaBar : G.omegaBar : G.del : ['A'..'Z']
  identifierRest :: String
  identifierRest = arrayStart ++ functionStart ++ ['0'..'9']
  array :: Parser Token
  array = between spaces spaces (try number <|> try charVec <|> try str <|> try arrayAssign <|> try (withPos $ TokenArrayName <$> arrayName) <|> primArray) where
    number :: Parser Token
    number = withPos $ TokenNumber <$> complex where

Parsing integers:

      sign :: Parser Double
      sign = option 1 (char G.negative $> (-1))

      natural :: Parser String
      natural = many1 digit

      integer :: Parser (Double, String)
      integer = liftA2 (,) sign natural

Floats:

      float :: Parser Double
      float = do
        (s, i) <- integer
        d <- option "" $ liftA2 (:) (char G.decimal) (many1 digit)
        return $ s * read (i ++ d)

      scientific :: Parser Double
      scientific = do
        f <- float
        option f $ do
          _ <- char G.exponent
          (es, ei) <- integer
          return $ f * 10 ** (es * read ei)

And complex numbers:

      complex :: Parser (Complex Double)
      complex = liftA2 (:+) scientific (option 0 (char G.imaginary *> scientific))

Parsing charcter literals:

    charVec :: Parser Token
    charVec = withPos $ TokenChar <$> between (char G.charDelimiter) (char G.charDelimiter) (many $ noneOf ['\''])

And strings, which support escapes with :

    str :: Parser Token
    str = withPos $ TokenString <$> between (char G.stringDelimiter) (char G.stringDelimiter) (many (escape <|> nonEscape)) where
      escape :: Parser Char
      escape = do
        _ <- char G.stringEscape
        c <- oneOf [G.stringDelimiter, G.stringEscape, 'n', 'r', 't']
        return $ fromJust $ lookup c
          [ (G.stringDelimiter, G.stringDelimiter)
          , (G.stringEscape, G.stringEscape)
          , ('n', '\n')
          , ('r', '\r')
          , ('t', '\t') ]

      nonEscape :: Parser Char
      nonEscape = noneOf [G.stringDelimiter, G.stringEscape]

Lastly, primitive arrays, array names and assignment to array names:

    primArray :: Parser Token
    primArray = withPos $ TokenPrimArray <$> oneOf G.arrays

    arrayName :: Parser String
    arrayName = singleton <$> oneOf [G.quad, G.quadQuote] <|> liftA2 (:) (oneOf arrayStart) (many $ oneOf identifierRest)

    arrayAssign :: Parser Token
    arrayAssign = withPos $ liftA2 TokenArrayAssign arrayName (between spaces spaces (char G.assign) *> bits)

Dfns (use for early exit):

  guard :: Parser Token
  guard = withPos $ liftA2 TokenGuard bits (char G.guard *> definedBits)

  exit :: Parser Token
  exit = withPos $ TokenExit <$> (char G.exit *> bits)

  definedBits :: Parser [Token]
  definedBits = many1 (try guard <|> try exit <|> bit)

  function :: Parser Token
  function = between spaces spaces (try dfn <|> try functionAssign <|> try (withPos $ TokenFunctionName <$> functionName) <|> primFunction) where
    dfn :: Parser Token
    dfn = withPos $ TokenDfn <$> (string [fst G.braces] *> sepBy1 definedBits separator <* string [snd G.braces])

Primitive functions, function names and function assignment:

    primFunction :: Parser Token
    primFunction = withPos $ TokenPrimFunction <$> oneOf G.functions

    functionName :: Parser String
    functionName = liftA2 (:) (oneOf functionStart) (many $ oneOf identifierRest)

    functionAssign :: Parser Token
    functionAssign = withPos $ liftA2 TokenFunctionAssign functionName (between spaces spaces (char G.assign) *> bits)

Operators are similar, except that they use _{/} for dadvs and _{/}_ for dconjs.

  adverb :: Parser Token
  adverb = between spaces spaces (try dadv <|> try adverbAssign <|> try (withPos $ TokenAdverbName <$> adverbName) <|> primAdverb) where
    dadv :: Parser Token
    dadv = withPos $ TokenDadv <$> (string [G.underscore, fst G.braces] *> sepBy1 definedBits separator <* string [snd G.braces])

    primAdverb :: Parser Token
    primAdverb = withPos $ TokenPrimAdverb <$> oneOf G.adverbs

    adverbName :: Parser String
    adverbName = liftA2 (:) (char G.underscore) (many1 $ oneOf identifierRest)

    adverbAssign :: Parser Token
    adverbAssign = withPos $ liftA2 TokenAdverbAssign adverbName (between spaces spaces (char G.assign) *> bits)

  conjunction :: Parser Token
  conjunction = between spaces spaces (try dconj <|> try conjunctionAssign <|> try (withPos $ TokenConjunctionName <$> conjunctionName) <|> primConjunction) where
    dconj :: Parser Token
    dconj = withPos $ TokenDconj <$> (string [G.underscore, fst G.braces] *> sepBy1 definedBits separator <* string [snd G.braces, G.underscore])

    primConjunction :: Parser Token
    primConjunction = withPos $ TokenPrimConjunction <$> oneOf G.conjunctions

    conjunctionName :: Parser String
    conjunctionName = liftA3 (\a b c -> a : b ++ [c]) (char G.underscore) (many1 $ oneOf identifierRest) (char G.underscore)

    conjunctionAssign :: Parser Token
    conjunctionAssign = withPos $ liftA2 TokenConjunctionAssign conjunctionName (between spaces spaces (char G.assign) *> bits)

Bunda-Gerth parsing

Turning tokens into a tree is done with a variation of Bunda-Gerth parsing, adapted to work with the token structure of TinyAPL.

First, each tree gets a category:

data Category
  = CatArray
  | CatFunction
  | CatAppliedFunction
  | CatAdverb
  | CatConjunction
  deriving (Enum, Bounded, Eq, Ord)

instance Show Category where
  show CatArray           = "array"
  show CatFunction        = "function"
  show CatAppliedFunction = "applied function"
  show CatAdverb          = "monadic operator"
  show CatConjunction     = "dyadic operator"

Next, the AST structure:

data Tree
  = Leaf { leafCategory :: Category, leafToken :: Token }
  | MonadCallBranch { monadCallBranchLeft :: Tree, monadCallBranchRight :: Tree }
  | DyadCallBranch { dyadCallBranchLeft :: Tree, dyadCallBranchRight :: Tree }
  | AdverbCallBranch { adverbCallBranchLeft :: Tree, adverbCallBranchRight :: Tree }
  | ConjunctionCallBranch { conjunctionCallBranchLeft :: Tree, conjunctionCallBranchRight :: Tree }
  | AssignBranch { assignmentBranchCategory :: Category, assignmentName :: String, assignmentValue :: Tree }
  | DefinedBranch { definedBranchCategory :: Category, definedBranchStatements :: [Tree] }
  | GuardBranch { guardBranchCheck :: Tree, guardBranchResult :: Tree }
  | ExitBranch { exitBranchResult :: Tree }

instance Show Tree where
  show tree = unlines $ go 0 tree where
    indentCount = 2
    go :: Int -> Tree -> [String]
    go i t = let indent = replicate (indentCount * i) ' ' in case t of
      (Leaf c l)                  -> [indent ++ show c ++ ": " ++ show l]
      (MonadCallBranch l r)       -> [indent ++ "monad call"] ++ go (i + 1) l ++ go (i + 1) r
      (DyadCallBranch l r)        -> [indent ++ "dyad left call"] ++ go (i + 1) l ++ go (i + 1) r
      (AdverbCallBranch l r)      -> [indent ++ "adverb call"] ++ go (i + 1) l ++ go (i + 1) r
      (ConjunctionCallBranch l r) -> [indent ++ "conjunction right call"] ++ go (i + 1) l ++ go (i + 1) r
      (AssignBranch c n v)        -> (indent ++ show c ++ " " ++ n ++ " ←") : go (i + 1) v
      (DefinedBranch c ts)        -> (indent ++ show c ++ " {") : concatMap (go (i + 1)) ts
      (GuardBranch ch res)        -> [indent ++ "guard"] ++ go (i + 1) ch ++ [indent ++ ":"] ++ go (i + 1) res
      (ExitBranch res)            -> (indent ++ "■") : go (i + 1) res

treeCategory :: Tree -> Category
treeCategory (Leaf c _)                  = c
treeCategory (MonadCallBranch _ _)       = CatArray
treeCategory (DyadCallBranch _ _)        = CatAppliedFunction
treeCategory (AdverbCallBranch _ _)      = CatFunction
treeCategory (ConjunctionCallBranch _ _) = CatAdverb
treeCategory (AssignBranch c _ _)        = c
treeCategory (DefinedBranch c _)         = c
treeCategory (GuardBranch _ _)           = CatArray
treeCategory (ExitBranch _)              = CatArray

In Bunda-Gerth, each pair of adjacent trees gets a binding strength (which by default is 0, indicating no bind can happen). bindingMap specifies the bind strengths and results:

bindingMap :: [((Category, Category), (Int, Tree -> Tree -> Tree))]
bindingMap =
  [ ((CatArray,           CatFunction), (2, DyadCallBranch))
  , ((CatFunction,        CatArray),    (1, MonadCallBranch))
  , ((CatAppliedFunction, CatArray),    (1, MonadCallBranch))
  , ((CatFunction,        CatAdverb),   (3, AdverbCallBranch))
  , ((CatArray,           CatAdverb),   (3, AdverbCallBranch))
  , ((CatConjunction,     CatArray),    (3, ConjunctionCallBranch))
  , ((CatConjunction,     CatFunction), (3, ConjunctionCallBranch)) ]

The bindPair function is the core of Bunda-Gerth: it finds the next pair to bind (the first pair with highest precedence) and joins the tree.

pairs :: [Tree] -> [(Int, Tree -> Tree -> Tree)]
pairs = mapAdjacent $ fromMaybe (0, undefined) .: (curry (`lookup` bindingMap) `on` treeCategory)

bindPair :: [Tree] -> Result [Tree]
bindPair [] = err $ SyntaxError "Bind empty array"
bindPair x@[_] = pure x
bindPair xs = let

First, we find all binding strengths and results.

  (sts, trees) = unzip $ pairs xs

Next, we find the maximum bind and its index.

  maxBind = maximum sts
  nextBind = fromJust $ maxBind `elemIndex` sts
  tree = trees !! nextBind

Lastly, we check that the bind is actually valid (not with strength 0) and transform the tree list.

  indexed = zip [0..] xs
  in if maxBind == 0 then err $ SyntaxError "No binding found" else pure $ mapMaybe (\(idx, el) ->
    if idx == nextBind then Just $ tree el $ xs !! (idx + 1)
    else if idx == nextBind + 1 then Nothing
    else Just el) indexed

Now we just need to apply the transformation for each pair in the tree list.

bindAll :: [Tree] -> Result Tree
bindAll [] = err $ SyntaxError "Bind empty array"
bindAll [x] = pure x
bindAll xs = bindPair xs >>= bindAll

Finally, we need to turn Tokens into Trees that the binder can consume.

categorize :: SourceName -> String -> Result [[Tree]]
categorize name source = tokenize name source >>= mapM categorizeTokens where
  categorizeTokens :: [Token] -> Result [Tree]
  categorizeTokens = mapM tokenToTree

  categorizeAndBind :: [Token] -> Result Tree
  categorizeAndBind = categorizeTokens >=> bindAll

  requireOfCategory :: Category -> (Category -> Error) -> Tree -> Result Tree
  requireOfCategory cat msg tree | treeCategory tree == cat = pure tree
                                 | otherwise                = err $ msg $ treeCategory tree

Most tokens are Leafs, and need no special processing.

  tokenToTree :: Token -> Result Tree
  tokenToTree num@(TokenNumber _ _)                = return $ Leaf CatArray num
  tokenToTree ch@(TokenChar _ _)                   = return $ Leaf CatArray ch
  tokenToTree str@(TokenString _ _)                = return $ Leaf CatArray str
  tokenToTree arr@(TokenPrimArray _ _)             = return $ Leaf CatArray arr
  tokenToTree fn@(TokenPrimFunction _ _)           = return $ Leaf CatFunction fn
  tokenToTree adv@(TokenPrimAdverb _ _)            = return $ Leaf CatAdverb adv
  tokenToTree conj@(TokenPrimConjunction _ _)      = return $ Leaf CatConjunction conj
  tokenToTree arr@(TokenArrayName _ _)             = return $ Leaf CatArray arr
  tokenToTree fn@(TokenFunctionName _ _)           = return $ Leaf CatFunction fn
  tokenToTree adv@(TokenAdverbName _ _)            = return $ Leaf CatAdverb adv
  tokenToTree conj@(TokenConjunctionName _ _)      = return $ Leaf CatConjunction conj
  tokenToTree (TokenParens ts pos)                 = categorizeAndBind ts

Next, defined functions and operators.

  defined :: Category -> String -> [[Token]] -> SourcePos -> Result Tree
  defined cat name statements pos = do
    ss <- mapM categorizeAndBind statements
    if null ss then err $ makeSyntaxError pos source $ "Invalid empty " ++ name
    else if treeCategory (last ss) /= CatArray then err $ makeSyntaxError (tokenPos $ head $ last statements) source $ "Invalid " ++ name ++ ": last statement must be an array"
    else Right $ DefinedBranch cat ss

  tokenToTree (TokenDfn statements pos)            = defined CatFunction "dfn" statements pos
  tokenToTree (TokenDadv statements pos)           = defined CatAdverb "dadv" statements pos
  tokenToTree (TokenDconj statements pos)          = defined CatConjunction "dconj" statements pos

Assignments:

  assignment :: Category -> String -> [Token] -> SourcePos -> Result Tree
  assignment cat name ts pos = AssignBranch cat name <$> (categorizeAndBind ts >>=
    requireOfCategory cat (\c -> makeSyntaxError pos source $ "Invalid assignment of " ++ show c ++ " to " ++ show cat ++ " name"))

  tokenToTree (TokenArrayAssign name ts pos)       = assignment CatArray name ts pos
  tokenToTree (TokenFunctionAssign name ts pos)    = assignment CatFunction name ts pos
  tokenToTree (TokenAdverbAssign name ts pos)      = assignment CatAdverb name ts pos
  tokenToTree (TokenConjunctionAssign name ts pos) = assignment CatConjunction name ts pos

And finally, guards and exit statements.

  tokenToTree (TokenGuard check result pos)        = do
    c <- categorizeAndBind check >>= requireOfCategory CatArray (\c -> makeSyntaxError (tokenPos $ head check) source $ "Invalid guard of type " ++ show c ++ ", array required")
    r <- categorizeAndBind result
    return $ GuardBranch c r
  tokenToTree (TokenExit result pos)               = ExitBranch <$> (categorizeAndBind result >>= requireOfCategory CatArray (\c -> makeSyntaxError (tokenPos $ head result) source $ "Invalid exit statement of type " ++ show c ++ ", array required"))

To tie everything together, a parse function that goes from string to Tree.

parse :: SourceName -> String -> Result [Tree]
parse name = categorize name >=> mapM bindAll

Scopes

Functions and operators now need access to variables, which are captured in the Scope structure:

data Scope = Scope
  { scopeArrays :: [(String, Array)]
  , scopeFunctions :: [(String, Function)]
  , scopeAdverbs :: [(String, Adverb)]
  , scopeConjunctions :: [(String, Conjunction)]
  , scopeParent :: Maybe Scope }
  deriving (Show)

Scopes support looking up a name:

scopeLookupArray :: String -> Scope -> Maybe Array
scopeLookupArray name sc =
  lookup name (scopeArrays sc) <|> (scopeParent sc >>= scopeLookupArray name)

scopeLookupFunction :: String -> Scope -> Maybe Function
scopeLookupFunction name sc =
  lookup name (scopeFunctions sc) <|> (scopeParent sc >>= scopeLookupFunction name)

scopeLookupAdverb :: String -> Scope -> Maybe Adverb
scopeLookupAdverb name sc =
  lookup name (scopeAdverbs sc) <|> (scopeParent sc >>= scopeLookupAdverb name)

scopeLookupConjunction :: String -> Scope -> Maybe Conjunction
scopeLookupConjunction name sc =
  lookup name (scopeConjunctions sc) <|> (scopeParent sc >>= scopeLookupConjunction name)

And storing a value in a name:

scopeUpdateArray :: String -> Array -> Scope -> Scope
scopeUpdateArray name val sc = sc{ scopeArrays = update name val (scopeArrays sc) }

scopeUpdateFunction :: String -> Function -> Scope -> Scope
scopeUpdateFunction name val sc = sc{ scopeFunctions = update name val (scopeFunctions sc) }

scopeUpdateAdverb :: String -> Adverb -> Scope -> Scope
scopeUpdateAdverb name val sc = sc{ scopeAdverbs = update name val (scopeAdverbs sc) }

scopeUpdateConjunction :: String -> Conjunction -> Scope -> Scope
scopeUpdateConjunction name val sc = sc{ scopeConjunctions = update name val (scopeConjunctions sc) }

We need an environment for functions and operators to work in, with access to the current scope. I chose to use the StateT monad transformer on the ExceptT monad transformer (both from mtl version 2.3.1) on IO.

type St = StateT Scope (ExceptT Error IO)

Now functions and operators need to work in the St monad:

data Function
  = DefinedFunction
    { dfnMonad :: Maybe (Array -> St Array)
    , dfnDyad  :: Maybe (Array -> Array -> St Array)
    , dfnRepr  :: String }
  -- ...

callMonad :: Function -> Array -> St Array
callDyad :: Function -> Array -> Array -> St Array

data Adverb = Adverb
  { adverbOnArray    :: Maybe (Array    -> St Function)
  , adverbOnFunction :: Maybe (Function -> St Function)
  , adverbRepr       :: String }

callOnArray :: Adverb -> Array -> St Function
callOnFunction :: Adverb -> Function -> St Function

data Conjunction = Conjunction
  { conjOnArrayArray       :: Maybe (Array    -> Array    -> St Function)
  , conjOnArrayFunction    :: Maybe (Array    -> Function -> St Function)
  , conjOnFunctionArray    :: Maybe (Function -> Array    -> St Function)
  , conjOnFunctionFunction :: Maybe (Function -> Function -> St Function)
  , conjRepr               :: String }

callOnArrayAndArray :: Conjunction -> Array -> Array -> St Function
callOnArrayAndFunction :: Conjunction -> Array -> Function -> St Function
callOnFunctionAndArray :: Conjunction -> Function -> Array -> St Function
callOnFunctionAndFunction :: Conjunction -> Function -> Function -> St Function

All functions in the Primitives module need to be changed to work under the St monad. Since they're all pure functions, I created a wrapper function:

pureFunction m d = DefinedFunction
  ((\f x -> case f x of
    Left  e -> throwError e
    Right r -> return r) <$> m)
  ((\f x y -> case f x y of
    Left  e -> throwError e
    Right r -> return r) <$> d)

All definitions inside Primitives were changed to call pureFunction instead.

Interpreting

While interpreting, all four types of values (arrays, functions, monadic and dyadic operators) need to be passed around as an unit. Value does exactly that:

data Value
  = VArray Array
  | VFunction Function
  | VAdverb Adverb
  | VConjunction Conjunction

instance Show Value where
  show (VArray arr)        = show arr
  show (VFunction fn)      = show fn
  show (VAdverb adv)       = show adv
  show (VConjunction conj) = show conj

unwrapArray :: Error -> Value -> St Array
unwrapArray _ (VArray val) = return val
unwrapArray e _            = throwError e

unwrapFunction :: Error -> Value -> St Function
unwrapFunction _ (VFunction val) = return val
unwrapFunction e _               = throwError e

unwrapAdverb :: Error -> Value -> St Adverb
unwrapAdverb _ (VAdverb val) = return val
unwrapAdverb e _             = throwError e

unwrapConjunction :: Error -> Value -> St Conjunction
unwrapConjunction _ (VConjunction val) = return val
unwrapConjunction e _                  = throwError e

Functions for looking up and updating scopes that work with Values:

scopeLookup :: String -> Scope -> Maybe Value
scopeLookup name sc = (VArray <$> scopeLookupArray name sc)
                  <|> (VFunction <$> scopeLookupFunction name sc)
                  <|> (VAdverb <$> scopeLookupAdverb name sc)
                  <|> (VConjunction <$> scopeLookupConjunction name sc)

scopeUpdate :: String -> Value -> Scope -> Scope
scopeUpdate name (VArray val) sc       = scopeUpdateArray name val sc
scopeUpdate name (VFunction val) sc    = scopeUpdateFunction name val sc
scopeUpdate name (VAdverb val) sc      = scopeUpdateAdverb name val sc
scopeUpdate name (VConjunction val) sc = scopeUpdateConjunction name val sc

A function for creating a child scope, and running code inside it:

inChildScope :: Monad m => [(String, Value)] -> StateT Scope m a -> Scope -> m a
inChildScope vals x parent = let
  child = foldr (\(name, val) sc -> scopeUpdate name val sc) (Scope [] [] [] [] (Just parent)) vals
  in evalStateT x child

Functions for interpreting a Tree, as well as the whole pipeline for strings to interpreted result (these call eval, which is defined later):

interpret :: Tree -> Scope -> ResultIO (Value, Scope)
interpret tree = runStateT (eval tree)

run :: String -> String -> Scope -> ResultIO (Value, Scope)
run file src scope = do
  trees <- except (parse file src)
  join $ foldlM (\last next -> interpret next . snd <$> last) (return (undefined, scope)) trees

And finally, the core of the interpreting step: eval.

eval :: Tree -> St Value

First, we interpret Leafs:

eval (Leaf _ tok)                   = evalLeaf tok

evalLeaf :: Token -> St Value

Numbers, character literals and strings simply wrap the leaf value into a VArray:

evalLeaf (TokenNumber num _)           = return $ VArray $ scalar $ Number num
evalLeaf (TokenChar [x] _)             = return $ VArray $ scalar $ Character x
evalLeaf (TokenChar xs _)              = return $ VArray $ vector $ Character <$> xs
evalLeaf (TokenString xs _)            = return $ VArray $ vector $ Character <$> xs

Lookup of primitives:

evalLeaf (TokenPrimArray n _)          =
  lift $ except $ maybeToEither (SyntaxError $ "Unknown primitive array " ++ [n]) $ VArray <$> lookup n P.arrays
evalLeaf (TokenPrimFunction n _)       =
  lift $ except $ maybeToEither (SyntaxError $ "Unknown primitive function " ++ [n]) $ VFunction <$> lookup n P.functions
evalLeaf (TokenPrimAdverb n _)         =
  lift $ except $ maybeToEither (SyntaxError $ "Unknown primitive adverb " ++ [n]) $ VAdverb <$> lookup n P.adverbs
evalLeaf (TokenPrimConjunction n _)    =
  lift $ except $ maybeToEither (SyntaxError $ "Unknown primitive conjunction " ++ [n]) $ VConjunction <$> lookup n P.conjunctions

Looking up array names has a special check for and

evalLeaf (TokenArrayName name _)
  | name == [G.quad]                   = do
    liftIO $ putStr $ G.quad : ": "
    liftIO $ hFlush stdout
    code <- liftIO getLine
    scope <- get
    (res, scope') <- lift $ run [G.quad] code scope
    put scope'
    return res
  | name == [G.quadQuote]              = do
    str <- liftIO getLine
    return $ VArray $ vector $ Character <$> str
  | otherwise                          =
    get >>= (lift . except . maybeToEither (SyntaxError $ "Variable " ++ name ++ " does not exist") . fmap VArray . scopeLookupArray name)

Other names can be directly looked up:

evalLeaf (TokenFunctionName name _)    =
  get >>= (lift . except . maybeToEither (SyntaxError $ "Variable " ++ name ++ " does not exist") . fmap VFunction . scopeLookupFunction name)
evalLeaf (TokenAdverbName name _)      =
  get >>= (lift . except . maybeToEither (SyntaxError $ "Variable " ++ name ++ " does not exist") . fmap VAdverb . scopeLookupAdverb name)
evalLeaf (TokenConjunctionName name _) =
  get >>= (lift . except . maybeToEither (SyntaxError $ "Variable " ++ name ++ " does not exist") . fmap VConjunction . scopeLookupConjunction name)
evalLeaf _                             = throwError $ DomainError "Invalid leaf type in evaluation"

Next, evaluation for function and operator applications:

eval (MonadCallBranch l r)          = do
  r' <- eval r
  l' <- eval l
  evalMonadCall l' r'
eval (DyadCallBranch l r)           = do
  r' <- eval r
  l' <- eval l
  evalDyadCall l' r'
eval (AdverbCallBranch l r)         = do
  r' <- eval r
  l' <- eval l
  evalAdverbCall l' r'
eval (ConjunctionCallBranch l r)    = do
  r' <- eval r
  l' <- eval l
  evalConjunctionCall l' r'

evalMonadCall :: Value -> Value -> St Value
evalMonadCall (VFunction fn) (VArray arr) = VArray <$> callMonad fn arr
evalMonadCall _ _                         = throwError $ DomainError "Invalid arguments to monad call evaluation"

evalDyadCall :: Value -> Value -> St Value
evalDyadCall (VArray arr) (VFunction f) =
  return $ VFunction $ DefinedFunction { dfnMonad = Just $ callDyad f arr, dfnDyad = Nothing, dfnRepr = "(" ++ show arr ++ show f ++ ")" }
evalDyadCall _ _                        = throwError $ DomainError "Invalid arguments to dyad call evaluation"

evalAdverbCall :: Value -> Value -> St Value
evalAdverbCall (VArray l) (VAdverb adv)    = VFunction <$> callOnArray adv l
evalAdverbCall (VFunction l) (VAdverb adv) = VFunction <$> callOnFunction adv l
evalAdverbCall _ _                         = throwError $ DomainError "Invalid arguments to adverb call evaluation"

evalConjunctionCall :: Value -> Value -> St Value
evalConjunctionCall (VConjunction conj) (VArray r)    =
  return $ VAdverb $ Adverb { adverbOnArray = Just (\x -> callOnArrayAndArray conj x r), adverbOnFunction = Just (\x -> callOnFunctionAndArray conj x r), adverbRepr = "(" ++ show conj ++ show r ++ ")" }
evalConjunctionCall (VConjunction conj) (VFunction r) =
  return $ VAdverb $ Adverb { adverbOnArray = Just (\x -> callOnArrayAndFunction conj x r), adverbOnFunction = Just (\x -> callOnFunctionAndFunction conj x r), adverbRepr = "(" ++ show conj ++ show r ++ ")" }
evalConjunctionCall _ _                               = throwError $ DomainError "Invalid arguments to conjunction call evaluation"

Then, evaluation for assignment:

eval (AssignBranch _ n val)         = eval val >>= evalAssign n

Just like for name lookup, there is a special case for and .

evalAssign :: String -> Value -> St Value
evalAssign name val
  | name == [G.quad] = do
    arr <- unwrapArray (DomainError "Cannot print non-array") val
    liftIO $ print arr
    return val
  | name == [G.quadQuote] = do
    arr <- unwrapArray (DomainError "Cannot print non-array") val
    liftIO $ hPutStr stderr $ show arr
    liftIO $ hFlush stderr
    return val
  | otherwise = do
    sc <- get
    put $ scopeUpdate name val sc
    return val

Finally, dfn/dop evaluation:

eval (DefinedBranch cat statements) = evalDefined statements cat
eval _                              = throwError $ DomainError "Invalid branch in evaluation"

evalDefined :: [Tree] -> Category -> St Value
evalDefined statements cat = let

First, a function that does evaluation inside a dfn context, which means that it also supports guards and exits:

  ev :: Tree -> St (Value, Bool)
  ev (GuardBranch check result) = do
    c <- eval check >>= unwrapArray (DomainError "Guard check not array") >>= lift . except . asScalar (DomainError "Guard check not scalar") >>= lift . except . asBool (DomainError "Guard check not boolean")
    if c then ev result
    else return (VArray $ Array [0, 0] [], False)
  ev (ExitBranch result) = (, True) <$> eval result
  ev other = (, False) <$> eval other

The Bool is used as an indicator of early return:

  runDefined :: [Tree] -> St Value
  runDefined [] = throwError $ DomainError "Eval empty dfn/dadv/dconj"
  runDefined [x] = fst <$> ev x
  runDefined (x:xs) = do
    (v, r) <- ev x
    if r then return v else runDefined xs

A helper function:

  run xs sc = lift (inChildScope xs (runDefined statements) sc) >>= unwrapArray (DomainError "Dfn must return an array")

Finally, the actual interpreting of dfns/dops:

  in do
    sc <- get
    case cat of
      CatArray -> throwError $ DomainError "Defined of type array?"
      CatFunction -> let
        dfn = VFunction (DefinedFunction
          { dfnRepr = "{...}"
          , dfnMonad = Just $ \x -> run [([G.omega], VArray x), ([G.del], dfn)] sc
          , dfnDyad = Just $ \x y -> run [([G.alpha], VArray x), ([G.omega], VArray y), ([G.del], dfn)] sc } )
        in return dfn
      CatAdverb -> let
        dadv = VAdverb (Adverb
          { adverbRepr = "_{...}"
          , adverbOnArray = Just $ \a -> let
            dfn = (DefinedFunction
              { dfnRepr = "(" ++ show a ++ ")_{...}"
              , dfnMonad = Just $ \x -> run
                [ ([G.alpha, G.alpha], VArray a)
                , ([G.omega], VArray x)
                , ([G.underscore, G.del], dadv)
                , ([G.del], VFunction dfn) ] sc
              , dfnDyad = Just $ \x y -> run
                [ ([G.alpha, G.alpha], VArray a)
                , ([G.alpha], VArray x)
                , ([G.omega], VArray y)
                , ([G.underscore, G.del], dadv)
                , ([G.del], VFunction dfn) ] sc } )
            in return dfn
          , adverbOnFunction = Just $ \a -> let
            dfn = (DefinedFunction
              { dfnRepr = "(" ++ show a ++ ")_{...}"
              , dfnMonad = Just $ \x -> run
                [ ([G.alphaBar, G.alphaBar], VFunction a)
                , ([G.omega], VArray x)
                , ([G.underscore, G.del], dadv)
                , ([G.del], VFunction dfn) ] sc
              , dfnDyad = Just $ \x y -> run
                [ ([G.alphaBar, G.alphaBar], VFunction a)
                , ([G.alpha], VArray x)
                , ([G.omega], VArray y)
                , ([G.underscore, G.del], dadv)
                , ([G.del], VFunction dfn) ] sc } )
            in return dfn } )
        in return dadv
      CatConjunction -> let
        dconj = VConjunction (Conjunction
          { conjRepr = "_{...}_"
          , conjOnArrayArray = Just $ \a b -> let
            dfn = (DefinedFunction
              { dfnRepr = "(" ++ show a ++ ")_{...}_(" ++ show b ++ ")"
              , dfnMonad = Just $ \x -> run
                [ ([G.alpha, G.alpha], VArray a)
                , ([G.omega, G.omega], VArray b)
                , ([G.omega], VArray x)
                , ([G.underscore, G.del, G.underscore], dconj)
                , ([G.del], VFunction dfn) ] sc
              , dfnDyad = Just $ \x y -> run
                [ ([G.alpha, G.alpha], VArray a)
                , ([G.omega, G.omega], VArray b)
                , ([G.alpha], VArray x)
                , ([G.omega], VArray y)
                , ([G.underscore, G.del, G.underscore], dconj)
                , ([G.del], VFunction dfn) ] sc } )
            in return dfn
          , conjOnArrayFunction = Just $ \a b -> let
            dfn = (DefinedFunction
              { dfnRepr = "(" ++ show a ++ ")_{...}_(" ++ show b ++ ")"
              , dfnMonad = Just $ \x -> run
                [ ([G.alpha, G.alpha], VArray a)
                , ([G.omegaBar, G.omegaBar], VFunction b)
                , ([G.omega], VArray x)
                , ([G.underscore, G.del, G.underscore], dconj)
                , ([G.del], VFunction dfn) ] sc
              , dfnDyad = Just $ \x y -> run
                [ ([G.alpha, G.alpha], VArray a)
                , ([G.omegaBar, G.omegaBar], VFunction b)
                , ([G.alpha], VArray x)
                , ([G.omega], VArray y)
                , ([G.underscore, G.del, G.underscore], dconj)
                , ([G.del], VFunction dfn) ] sc } )
            in return dfn
          , conjOnFunctionArray = Just $ \a b -> let
            dfn = (DefinedFunction
              { dfnRepr = "(" ++ show a ++ ")_{...}_(" ++ show b ++ ")"
              , dfnMonad = Just $ \x -> run
                [ ([G.alphaBar, G.alphaBar], VFunction a)
                , ([G.omega, G.omega], VArray b)
                , ([G.omega], VArray x)
                , ([G.underscore, G.del, G.underscore], dconj)
                , ([G.del], VFunction dfn) ] sc
              , dfnDyad = Just $ \x y -> run
                [ ([G.alphaBar, G.alphaBar], VFunction a)
                , ([G.omega, G.omega], VArray b)
                , ([G.alpha], VArray x)
                , ([G.omega], VArray y)
                , ([G.underscore, G.del, G.underscore], dconj)
                , ([G.del], VFunction dfn) ] sc } )
            in return dfn
          , conjOnFunctionFunction = Just $ \a b -> let
            dfn = (DefinedFunction
              { dfnRepr = "(" ++ show a ++ ")_{...}_(" ++ show b ++ ")"
              , dfnMonad = Just $ \x -> run
                [ ([G.alphaBar, G.alphaBar], VFunction a)
                , ([G.omegaBar, G.omegaBar], VFunction b)
                , ([G.omega], VArray x)
                , ([G.underscore, G.del, G.underscore], dconj)
                , ([G.del], VFunction dfn) ] sc
              , dfnDyad = Just $ \x y -> run
                [ ([G.alphaBar, G.alphaBar], VFunction a)
                , ([G.omegaBar, G.omegaBar], VFunction b)
                , ([G.alpha], VArray x)
                , ([G.omega], VArray y)
                , ([G.underscore, G.del, G.underscore], dconj)
                , ([G.del], VFunction dfn) ] sc } )
            in return dfn } )
        in return dconj

This code looks scary, but it's only doing one thing: creating DefinedFunctions that run in a child scope where the correct variables (, , ⍺⍺, ⍵⍵, ⍶⍶, ⍹⍹, , _∇, _∇_) are defined.

The TinyAPL REPL

Now that we can parse and interpret code, we need a way to actually run it!

runCode :: Bool -> String -> String -> Scope -> IO Scope
runCode output file code scope = do
  result <- runResult $ run file code scope
  case result of
    Left err -> hPrint stderr err $> scope
    Right (res, scope) -> if output then print res $> scope else return scope

repl :: Scope -> IO ()
repl scope = let
  go :: Scope -> IO Scope
  go scope = do
    putStr "> "
    hFlush stdout
    line <- getLine
    if line == "" then return scope
    else runCode True "<repl>" line scope >>= go
  in do
    putStrLn "TinyAPL REPL, empty line to exit"
    putStrLn "Supported primitives:"
    putStrLn $ "  " ++ unwords (singleton . fst <$> P.arrays)
    putStrLn $ "  " ++ unwords (singleton . fst <$> P.functions)
    putStrLn $ "  " ++ unwords (singleton . fst <$> P.adverbs)
    putStrLn $ "  " ++ unwords (singleton . fst <$> P.conjunctions)
    putStrLn "Supported features:"
    putStrLn $ "* dfns " ++ [fst G.braces] ++ "code" ++ [snd G.braces] ++ ", d-monadic-ops " ++ [G.underscore, fst G.braces] ++ "code" ++ [snd G.braces] ++ ", d-dyadic-ops " ++ [G.underscore, fst G.braces] ++ "code" ++ [snd G.braces, G.underscore]
    putStrLn $ "  " ++ [G.alpha] ++ " left argument, " ++ [G.omega] ++ " right argument,"
    putStrLn $ "  " ++ [G.alpha, G.alpha] ++ " left array operand, " ++ [G.alphaBar, G.alphaBar] ++ " left function operand, " ++ [G.omega, G.omega] ++ " right array operand, " ++ [G.omegaBar, G.omegaBar] ++ " right function operand,"
    putStrLn $ "  " ++ [G.del] ++ " recurse function, " ++ [G.underscore, G.del] ++ " recurse monadic op, " ++ [G.underscore, G.del, G.underscore] ++ " recurse dyadic op"
    putStrLn $ "  " ++ [G.exit] ++ " early exit, " ++ [G.guard] ++ " guard"
    putStrLn $ "  " ++ [G.separator] ++ " multiple statements"
    putStrLn $ "* numbers: " ++ [G.decimal] ++ " decimal separator, " ++ [G.negative] ++ " negative sign, " ++ [G.exponent] ++ " exponent notation, " ++ [G.imaginary] ++ " complex separator"
    putStrLn $ "* character literals: " ++ [G.charDelimiter] ++ "abc" ++ [G.charDelimiter]
    putStrLn $ "* string literals: " ++ [G.stringDelimiter] ++ "abc" ++ [G.stringDelimiter] ++ " with escapes using " ++ [G.stringEscape]
    putStrLn $ "* names: abc array, Abc function, _Abc monadic op, _Abc_ dyadic op, assignment with " ++ [G.assign]
    putStrLn $ "* get " ++ [G.quad] ++ " read evaluated input, get " ++ [G.quadQuote] ++ " read string input, set " ++ [G.quad] ++ " print with newline, set " ++ [G.quadQuote] ++ " print without newline"
    void $ go scope

main :: IO ()
main = do
  hSetEncoding stdout utf8
  hSetEncoding stderr utf8

The scope in which the file/REPL input is ran has some builtin entries, for convenience:

  let a = vector $ Number <$> [1, 2, -1]
  let b = vector $ Number <$> [5, 2.1, 3 :+ (-0.5)]

  let i = arrayReshaped [3, 3] $ Number <$> [ 1, 0, 0
                                            , 0, 1, 0
                                            , 0, 0, 1 ]

  let inc = BindRight P.plus (scalar $ Number 1)
  
  putStrLn "a"; print a
  putStrLn "b"; print b
  putStrLn "i"; print i
  putStrLn "I"; print inc

  let scope = Scope [("a", a), ("b", b), ("i", i)] [("I", inc)] [] [] Nothing
  args <- getArgs
  case args of
    []     -> repl scope
    [path] -> do
      code <- readFile path
      void $ runCode False path code scope
    _      -> do
      hPutStrLn stderr "Usage:"
      hPutStrLn stderr "tinyapl         Start a REPL"
      hPutStrLn stderr "tinyapl path    Run a file"

Floor, ceiling and round

Of course, no TinyAPL update would be complete without adding some primitives! This time it's just a few easy ones: floor, ceiling and round ().

-- as argument to floor = pureFunction
Just $ monadN2N' $ \(a :+ b) -> fromInteger (Prelude.floor a) :+ fromInteger (Prelude.floor b)
-- as argument to ceil = pureFunction
Just $ monadN2N' $ \(a :+ b) -> fromInteger (ceiling a) :+ fromInteger (ceiling b)
round = pureFunction (Just $ monadN2N' $ \(a :+ b) -> let
  r x = Prelude.floor $ x + 0.5
  in fromInteger (r a) :+ fromInteger (r b)) Nothing [G.round]

Note: Round doesn't use Prelude.round because that one rounds x.5 to even, and I wanted round above instead.