Back to Index Page

TinyAPL part 14: (Wrap Your Head Around) Dictionaries

#tinyapl#apl#haskell

Madeline Vergani

For a while I've been developing the concept of dictionaries, a sort of associative array that is actually an array and not a scalar like structs or Kap's maps. But first, as usual, some random primitives.

You can find all the code for this post on GitHub.

Leading Axis Agreement

Leading axis agreement adds a way of combining two arrays of differing rank with scalar functions.

boxyDyad :: MonadError Error m => (ScalarValue -> ScalarValue -> m ScalarValue) -> ScalarValue -> ScalarValue -> m ScalarValue
boxyDyad f (Box as) (Box bs) = Box <$> scalarDyad f as bs
boxyDyad f (Box as) b = Box <$> scalarDyad f as (scalar b)
boxyDyad f a (Box bs) = Box <$> scalarDyad f (scalar a) bs
boxyDyad f a b = f a b

scalarDyad :: MonadError Error m =>
  (ScalarValue -> ScalarValue -> m ScalarValue)
      -> Array ->       Array -> m Array
scalarDyad f a@(Array ash as) b@(Array bsh bs)
  | null ash && null bsh = let ([a'], [b']) = (as, bs) in scalar <$> f' a' b'
  | null ash = let [a'] = as in Array bsh <$> mapM (a' `f'`) bs
  | null bsh = let [b'] = bs in Array ash <$> mapM (`f'` b') as
  | ash == bsh = Array (arrayShape a) <$> zipWithM f' (arrayContents a) (arrayContents b)
  | ash `isPrefixOf` bsh || bsh `isPrefixOf` ash = fromMajorCells <$> zipWithM (scalarDyad f) (majorCells a) (majorCells b)
  | length ash /= length bsh = throwError $ RankError "Mismatched left and right argument ranks"
  | otherwise = throwError $ LengthError "Mismatched left and right argument shapes"
  where
    f' = boxyDyad f

Join

Join is the monad of Catenate and is basically a catenate reduction:

join :: MonadError Error m => [Noun] -> m Noun
join = fold (catenate `after` first) (vector [])

Deal

Deal ? selects a number of unique numbers in a range.

deal :: (MonadError Error m, MonadIO m) => Natural -> Natural -> m [Natural]
deal count max
  | count > max = throwError $ DomainError "Deal left must be less than or equal to right argument"
  | otherwise = do
    let go 0 _ = pure []
        go n xs = do
          index <- randomR (0, length xs - 1)
          let num = xs !! index
          (num :) <$> go (n - 1) (filter (/= num) xs)
    go count [0..max-1]

Partition, Group, Partitioned Enclose

Partition , Group and Partitioned Enclose are three related functions that all partiton an array in different ways.

group :: MonadError Error m => [Integer] -> [a] -> m [[a]]
group is xs = do
  let buckets = genericTake (1 + ((-1) `Prelude.max` maximum is)) $ Prelude.repeat []
  let go :: MonadError Error m => [[a]] -> [Integer] -> [a] -> m [[a]]
      go buckets _ [] = pure buckets
      go buckets (i:is) (x:xs)
        | i < 0 = go buckets is xs
        | otherwise = go ((genericTake i buckets) ++ [genericIndex buckets i `snoc` x] ++ (genericDrop (i + 1) buckets)) is xs
      go _ [] _ = throwError $ DomainError "Group left argument too short"
  go buckets is xs

partition :: MonadError Error m => [Natural] -> [a] -> m [[a]]
partition is xs = do
  let areNewPartitions = True : mapAdjacent (/=) is
  let ws = zip3 ((0 /=) <$> is) areNewPartitions xs
  pure $ Prelude.reverse $ fmap Prelude.reverse $ foldl' (\ps (keep, newPart, x) ->
    if Prelude.not keep then ps
    else if newPart then [x] : ps
    else (x : headPromise ps) : tailPromise ps) [] ws

partitionEnclose :: MonadError Error m => [Natural] -> [a] -> m [[a]]
partitionEnclose ms xs = pure $ Prelude.reverse $ fmap Prelude.reverse $ foldl' (\ps (co, x) ->
  if (co == Nothing || co == Just 0) && null ps then ps
  else if co == Nothing || co == Just 0 then (fromJust x : headPromise ps) : tailPromise ps
  else fromMaybe [] (Data.List.singleton <$> x) : genericTake (fromJust co - 1) (Prelude.repeat []) ++ ps) [] $ zipLongest ms xs

⎕Unicode

⎕Unicode is a primitive that converts between Unicode encodings. Its implementation is quite lengthy and annoying. Here are some highlights.

unicodeS :: MonadError Error m => ScalarValue -> m ScalarValue
unicodeS (Number y) = do
  let err = DomainError $ G.quad : "Unicode argument must be a natural or character"
  n <- asNat err y
  pure $ Character $ chr $ fromIntegral n
unicodeS (Character y) = pure $ Number $ (fromIntegral $ ord y) :+ 0
unicodeS _ = throwError $ DomainError $ G.quad : "Unicode argument must be a natural or character"

toUtf8 :: MonadError Error m => String -> m [Word8]
toUtf8 str = do
  let text = T.pack str
  let bs = T.encodeUtf8 text
  case S.runGet (getAllOf S.getWord8) bs of
    Right codes -> pure codes
    Left err -> throwError $ DomainError $ "Error in encoding: " ++ err

fromUtf8 :: (MonadError Error m, MonadIO m, MonadCatch m) => [Word8] -> m String
fromUtf8 codes = do
  let bs = S.runPut $ putAllOf S.putWord8 codes
  text <- handle (\(T.DecodeError e _) -> throwError $ DomainError $ "Error in decoding: " ++ e) $ liftIO $ evaluate (T.decodeUtf8With T.strictDecode bs)
  pure $ T.unpack text

-- same for utf16 and utf32

encode :: MonadError Error m => EncodeDecodeType -> String -> m [Natural]
encode Utf8 str = fmap (fromInteger . toInteger) <$> toUtf8 str
encode Utf16 str = fmap (fromInteger . toInteger) <$> toUtf16 str
encode Utf32 str = fmap (fromInteger . toInteger) <$> toUtf32 str

decode :: (MonadError Error m, MonadIO m, MonadCatch m) => EncodeDecodeType -> [Natural] -> m String
decode Utf8 codes = fromUtf8 $ fromInteger . toInteger <$> codes
decode Utf16 codes = fromUtf16 $ fromInteger . toInteger <$> codes
decode Utf32 codes = fromUtf32 $ fromInteger . toInteger <$> codes

Beginnings of ⎕file

⎕file is a struct containing file operations. For now, I've implemented ⎕file→Read and ⎕file→Write:

-- F.read and F.write force writing files as UTF-8

read' :: Noun -> St Noun
read' s = do
  let err = DomainError "read: argument must be a string path"
  path <- asString err s
  vector . fmap Character <$> F.read path

write' :: Noun -> Noun -> St Noun
write' s d = do
  let err = DomainError "write: arguments must be a string path and string data"
  path <- asString err s
  da <- asString err d
  F.write path da
  pure $ vector []

Case functions

I removed ⎕C from planning and introduced case overloads to Floor (lowercase), Ceiling (uppercase), Magnitude | (case fold) and Sign × (query case).

floor (Character y) = pure $ Character $ toLower y
ceil (Character y) = pure $ Character $ toUpper y
abs (Character y) = pure $ Character $ toLower $ toUpper y
sign (Character y) = pure $ Number $ if isUpperCase y then 1 else if isLowerCase y then -1 else 0

Dictionaries

The crux of all the changes is this:

data Noun
  = Array
    { arrayShape :: [Natural]
    , arrayContents :: [ScalarValue] }
  | Dictionary
    { dictKeys :: [ScalarValue]
    , dictValues :: [ScalarValue] }

Next was a very boring half an hour of changing every Array type to Noun, while spotting which functions only made sense for arrays (and putting errors in dictionary versions) and which required a dictionary implementation. Most of the changes are therefore very boring. Here are some highlights.

Parsing dictionaries isn't hard but a bit too boring to include here. The syntax is ⟨key: value ⋄ key2: value2⟩ and ⟨:⟩ for an empty dictionary (though there's also Empty Dictionary which is the dictionary friend of zilde)

Scalar functions on dictionaries

These operate on the values.

boxyMonad :: MonadError Error m => (ScalarValue -> m ScalarValue) -> ScalarValue -> m ScalarValue
boxyMonad f (Box xs) = Box <$> scalarMonad f xs
boxyMonad f x = f x

scalarMonad :: MonadError Error m =>
  (ScalarValue -> m ScalarValue)
       -> Noun -> m Noun
scalarMonad f (Array sh cs) = Array sh <$> mapM (boxyMonad f) cs
scalarMonad f (Dictionary ks vs) = Dictionary ks <$> mapM (boxyMonad f) vs

boxyDyad :: MonadError Error m => (ScalarValue -> ScalarValue -> m ScalarValue) -> ScalarValue -> ScalarValue -> m ScalarValue
boxyDyad f (Box as) (Box bs) = Box <$> scalarDyad f as bs
boxyDyad f (Box as) b = Box <$> scalarDyad f as (scalar b)
boxyDyad f a (Box bs) = Box <$> scalarDyad f (scalar a) bs
boxyDyad f a b = f a b

scalarDyad :: MonadError Error m =>
  (ScalarValue -> ScalarValue -> m ScalarValue)
       -> Noun ->        Noun -> m Noun
scalarDyad f a@(Array ash as) b@(Array bsh bs)
  | null ash && null bsh = let ([a'], [b']) = (as, bs) in scalar <$> f' a' b'
  | null ash = let [a'] = as in Array bsh <$> mapM (a' `f'`) bs
  | null bsh = let [b'] = bs in Array ash <$> mapM (`f'` b') as
  | ash == bsh = Array (arrayShape a) <$> zipWithM f' (arrayContents a) (arrayContents b)
  | ash `isPrefixOf` bsh || bsh `isPrefixOf` ash = fromMajorCells <$> zipWithM (scalarDyad f) (majorCells a) (majorCells b)
  | length ash /= length bsh = throwError $ RankError "Mismatched left and right argument ranks"
  | otherwise = throwError $ LengthError "Mismatched left and right argument shapes"
  where
    f' = boxyDyad f
scalarDyad f (Dictionary aks avs) (Dictionary bks bvs)
  = dictionary <$> mapM (\k ->
      if k `elem` aks && k `elem` bks then (k, ) <$> boxyDyad f (fromJust $ lookup k $ zip aks avs) (fromJust $ lookup k $ zip bks bvs)
      else if k `elem` aks then pure (k, fromJust $ lookup k $ zip aks avs)
      else if k `elem` bks then pure (k, fromJust $ lookup k $ zip bks bvs)
      else throwError $ DomainError "???") (nub $ aks ++ bks)
scalarDyad f (Dictionary aks avs) (Array [] [b]) = Dictionary aks <$> mapM (\a' -> boxyDyad f a' b) avs
scalarDyad f (Array [] [a]) (Dictionary bks bvs) = Dictionary bks <$> mapM (\b' -> boxyDyad f a b') bvs
scalarDyad _ _ _ = throwError $ DomainError "Cannot combine dictionary and non-scalar array"

Indexing dictionaries

There are two ways of indexing a dictionary: Index which gets just one key (boxing it) and From which gets an array of keys.

indexElement :: MonadError Error m => ScalarValue -> Noun -> m (Maybe ScalarValue)
indexElement _ (Array _ _) = throwError $ DomainError "Array cannot be element-indexed"
indexElement i (Dictionary ks vs) = pure $ lookup i $ zip ks vs

squad i d@(Dictionary _ _) = indexElement (toScalar i) d >>= (\case
  Just r -> pure $ scalar r
  Nothing -> throwError $ IndexError "Key not found in dictionary")

from x y = ((\x' -> (first `before` squad) x' y) `atRank1` 0) x

Replicating dictionaries

Replicate can take two dictionaries and use the left as a compress array of the keys of the right.

replicateDict :: MonadError Error m => Eq a => [(a, Bool)] -> [(a, b)] -> m [(a, b)]
replicateDict sels xs = pure $ filter (\(k, _) -> fromMaybe False $ lookup k sels) xs

Searching dictionaries

Searching a dictionary searches values. Index Of has no "length" equivalent on dictionaries so throws on key not found.

searchFunction :: MonadError Error m => (Noun -> [Noun] -> m Noun) -> (ScalarValue -> [ScalarValue] -> [ScalarValue] -> m ScalarValue) -> Noun -> Noun -> m Noun
searchFunction f _ ns hs@(Array _ _) = -- ...
searchFunction _ g n (Dictionary ks vs) = fromScalar <$> g (box n) ks vs

elementOf = searchFunction (pure .: scalar .: boolToScalar .: elem) (\e _ v -> pure $ boolToScalar $ e `elem` v)

count = searchFunction (pure .: scalar .: Number .: (:+ 0) .: countEqual) (\e _ v -> pure $ Number $ countEqual e v :+ 0)

indexOf = flip $ searchFunction
  (pure .: scalar .: Number .: (:+ 0) .: (\n hs -> fromMaybe (genericLength hs) $ n `genericElemIndex` hs))
  (\e k v -> case find (\(_, u) -> e == u) (zip k v) of
    Just (i, _) -> pure i
    Nothing -> throwError $ IndexError "Value not found in dictionary")

Constructing dictionaries

There are four functions that construct dictionaries: Key-Value Pair ߹ which makes a singleton dictionary, From Pairs ߹, From Keys and Values , From Inverted Table .

keyValuePair :: MonadError Error m => Noun -> Noun -> m Noun
keyValuePair k v = pure $ dictionary [(box k, box v)]

fromPairs :: MonadError Error m => Noun -> m Noun
fromPairs (Array [_] xs) = do
  let err = DomainError "From Pairs argument must be a 2-column matrix or vector of pairs or dictionaries"
  dictionary . concat <$> mapM (\x -> do
    let x' = fromScalar x
    case x' of
      (Dictionary ks vs) -> pure $ zip ks vs
      (Array [2] [a, b]) -> pure $ [(a, b)]
      _ -> throwError err) xs
fromPairs arr@(Array [_, 2] _) = majorCells' arr >>= fromPairs
fromPairs _ = throwError $ DomainError "From Pairs argument must be a 2-column matrix or vector of pairs or dictionaries"

fromKeysAndValues :: MonadError Error m => [ScalarValue] -> [ScalarValue] -> m Noun
fromKeysAndValues ks vs
  | length ks == length vs = pure $ Dictionary ks vs
  | otherwise = throwError $ LengthError "From Keys and Values arguments must have the same length"

fromKeysAndValues' :: MonadError Error m => Noun -> Noun -> m Noun
fromKeysAndValues' k v = do
  let err = DomainError "From Keys and Values arguments must be vectors"
  ks <- asVector err k
  vs <- asVector err v
  fromKeysAndValues ks vs

fromInvertedTable :: MonadError Error m => Noun -> m Noun
fromInvertedTable (Array [2] [k, v]) = do
  let err = DomainError "From Inverted Table argument must be a pair of vectors"
  ks <- asVector err $ fromScalar k
  vs <- asVector err $ fromScalar v
  fromKeysAndValues ks vs
fromInvertedTable es@(Array [2, _] _) = do
  let err = DomainError "From Inverted Table argument must be a pair of vectors"
  let [k, v] = majorCells es
  ks <- asVector err k
  vs <- asVector err v
  fromKeysAndValues ks vs
fromInvertedTable _ = throwError $ DomainError "From Inverted Table argument must be a pair of vectors"

Deconstructing dictionaries

Transpose and Major Cells have been extended to dictionary arguments respectively as Inverted Table and Key-Value Pairs.

invertedTable :: MonadError Error m => Noun -> m ([ScalarValue], [ScalarValue])
invertedTable (Dictionary ks vs) = pure (ks, vs)
invertedTable _ = throwError $ DomainError "Inverted Table argument must be a dictionary"

majorCells (Dictionary ks vs) = zipWith (\a b -> vector [a, b]) ks vs

What I haven't said

Many more functions support dictionaries: Catenate (right-biased join), set functions, grades, Enlist, Ravel, and probably more. Of course all the new glyphs have been added to the SBCS.