Translate Haskell into English Manually, Part II

It's still a pipeline, but now it's syntactically convenient.

c $ b $ a ctx

-- |> is like a UNIX pipe.
infixl 9 |>
x |> f = f x

c $ b $ a ctx
ctx |> a |> b |> c
do a
   b
   c

f :: State ParseContext ()

"f" is a function that has type "State ParseContext ()", which is to say
that "f" operates within the "State" monad where the "State" monad is
encapsulating a "ParseContext" object.

import Control.Monad.State

data TokenType = Identifier | Qualifier | Type | Symbol Char
  deriving (Show, Eq)

data Token = Token {
  tokenType :: TokenType,
  tokenValue :: String
} deriving Show

data ParseContext = ParseContext {
  input :: String,    -- The input that has not been parsed yet.
  output :: String,   -- The output generated so far.
  currTok :: Token,   -- The current token, if defined.
  stack :: [Token]    -- A stack of tokens we haven't dealt with yet.
} deriving Show

makeCool :: State ParseContext ()
makeCool = do
  ParseContext {input = s} <- get
  put (ParseContext {input = "", output = s ++ " is cool!\n"})
  return ()

main = do
  s <- getContents
  let ctx = ParseContext {input = s, output = ""} in
    putStrLn $ output $ execState makeCool $ ctx

$ echo -n "Haskell" | runhugs -98 makeCoolState.hs
Haskell is cool!

putStrLn $ output $ makeCool ctx

putStrLn $ output $ execState makeCool $ ctx

execState makeCool $ ctx
ctx
makeCool
makeCool :: State ParseContext ()
makeCool = modify (\ctx ->
  ctx {input = "", output = input ctx ++ " is cool!"})

"makeCool" is a "ParseContext transformer".  Modify the "ctx" by setting
"input" to "" and setting "output" to the original input plus the string
" is cool!".

import Control.Monad.State

data TokenType = Identifier | Qualifier | Type | Symbol Char
  deriving (Show, Eq)

data Token = Token {
  tokenType :: TokenType,
  tokenValue :: String
} deriving Show

data ParseContext = ParseContext {
  input :: String,    -- The input that has not been parsed yet.
  output :: String,   -- The output generated so far.
  currTok :: Token,   -- The current token, if defined.
  stack :: [Token]    -- A stack of tokens we haven't dealt with yet.
} deriving Show

-- Read one token from input and put it on the stack.
getToken :: State ParseContext ()
getToken = do
  ctx@(ParseContext {input = c:input', stack = stack'}) <- get
  let tok = Token {tokenType = Symbol c, tokenValue = [c]} in
    put (ctx {input = input', currTok = tok, stack = tok:stack'})

-- This is a completely different way of writing getToken.
getTokenAnotherWay :: State ParseContext ()
getTokenAnotherWay = modify getTokenAnotherWay'
  where
    getTokenAnotherWay' ctx =
      ctx {input = input', currTok = tok, stack = tok:stack'}
      where
        c = head $ input $ ctx
        input' = tail $ input $ ctx
        tok = Token {tokenType = Symbol c, tokenValue = [c]}
        stack' = stack $ ctx

-- Call getToken a few times.
parse :: State ParseContext ()
parse = do
  getToken            -- I would not normally duplicate lines like this.
  getToken            -- I'm just trying to illustrate a point.
  getTokenAnotherWay
  getTokenAnotherWay

main = do
  s <- getContents    -- currTok is undefined below.  It's defined later.
  let ctx = ParseContext {input = s, output = "", stack = []} in
    print $ execState parse $ ctx

$ echo "Haskell" | runhugs -98 monadPipes.hs
ParseContext {
  input = "ell\n",
  output = "",
  currTok = Token {tokenType = Symbol 'k', tokenValue = "k"},
  stack = [
    Token {tokenType = Symbol 'k', tokenValue = "k"},
    Token {tokenType = Symbol 's', tokenValue = "s"},
    Token {tokenType = Symbol 'a', tokenValue = "a"},
    Token {tokenType = Symbol 'H', tokenValue = "H"}]}

print $ execState parse $ ctx

parse = do
  getToken            -- I would not normally duplicate lines like this.
  getToken            -- I'm just trying to illustrate a point.
  getTokenAnotherWay
  getTokenAnotherWay

{- Translate C type declarations into English.
  This exercise was taken from "Expert C Programming:  Deep C Secrets", p. 84.
   Example: echo -n "int *p;" | runhugs -98 cdecl.hs
-}

import Char
import Control.Monad.State

data TokenType = Identifier | Qualifier | Type | Symbol Char
  deriving (Show, Eq)

data Token = Token {
  tokenType :: TokenType,
  tokenValue :: String
} deriving Show

data ParseContext = ParseContext {
  input :: String,    -- The input that has not been parsed yet.
  output :: String,   -- The output generated so far.
  currTok :: Token,   -- The current token, if defined.
  stack :: [Token]    -- A stack of tokens we haven't dealt with yet.
} deriving Show

-- Convenience functions:
currTokType = tokenType . currTok
currTokValue = tokenValue . currTok
stackHead = head . stack

-- "Write" to a ParseContext's output.  Use ++ for simplicity.
writeOutput :: String -> State ParseContext ()
writeOutput s = modify (\ctx -> ctx {output = output ctx ++ s})

-- Pop the stack.
pop :: State ParseContext ()
pop = modify (\ctx -> ctx {stack = tail $ stack $ ctx})

-- Write the value of the top of the stack and then pop it.
popAndWrite :: State ParseContext ()
popAndWrite = do
  top <- gets stackHead
  writeOutput (tokenValue top)
  pop

-- Classify a string into a Token.
classifyString :: String -> Token
classifyString "const"  = Token Qualifier "read-only"
classifyString "*"      = Token (Symbol '*') "pointer to"
classifyString [c]
  | not (isAlphaNum c)  = Token (Symbol c) [c]
classifyString s        = Token tokType s
  where
    tokType = case s of
      "volatile" -> Qualifier
      x | x `elem` ["void", "char", "signed", "unsigned", "short",
                    "int", "long", "float", "double", "struct",
                    "union", "enum"] -> Type
      x -> Identifier

-- Read the next token into currTok.
getToken :: State ParseContext ()
getToken = modify getToken'
  where
    getToken' ctx@(ParseContext {input = s}) =
      ctx {currTok = token, input = theRest}
      where
        (token, theRest) = lexString $ lstrip $ s
        lstrip s = dropWhile isSpace s

-- Read a token.  Return it and the left-over portion of the string.
lexString :: String -> (Token, String)
lexString s@(c:cs) | isAlphaNum c = (token, theRest)
  where
    (tokString, theRest) = span isAlphaNum s
    token = classifyString tokString
lexString (c:cs) = (classifyString [c], cs)

-- Put tokens on the stack until we reach the first identifier.
readToFirstIdentifier :: State ParseContext ()
readToFirstIdentifier = do
  getToken
  pushUntilIdentifier
  afterIdentifier <- get
  let s = identifier ++ " is "
      identifier = currTokValue afterIdentifier in
    put (afterIdentifier {output = s})
  getToken

-- Keep pushing tokens until we hit an identifier.
pushUntilIdentifier :: State ParseContext ()
pushUntilIdentifier = do
  ctx <- get
  if currTokType ctx == Identifier
    then return ()                      -- Leave things as they are.
    else do
      put (ctx {stack = (currTok ctx) : (stack ctx)})
      getToken
      pushUntilIdentifier
      return ()

-- Deal with arrays.
dealWithArrays :: State ParseContext ()
dealWithArrays = do
  ctx <- get
  case currTokType ctx of
    Symbol '[' -> do
      writeOutput "array "
      getToken
      writeIfNumber
      getToken
      writeOutput "of "
      dealWithArrays
    _ -> return ()                      -- Recurse until we get past the ['s.
  where
    writeIfNumber = do                  -- Call writeSize if a number.
      tokValue <- gets currTokValue
      if isDigit $ head $ tokValue
        then do
          writeSize
          getToken
        else return ()
    writeSize = do                      -- Output the array size.
      tokValue <- gets currTokValue
      let num = show $ (+ -1) $ read $ tokValue
          s = "0.." ++ num ++ " " in    -- Can't use where instead of let here.
        writeOutput s

-- Deal with function arguments.
dealWithFunctionArgs :: State ParseContext ()
dealWithFunctionArgs = do
  getUntilParen
  getToken
  writeOutput "function returning "
  where
    getUntilParen = do                  -- Read tokens until we hit ).
      ctx <- get
      case currTokType ctx of
        Symbol ')' -> return ()
        _ -> do
          getToken
          getUntilParen

-- Deal with pointers.
dealWithPointers :: State ParseContext ()
dealWithPointers = do
  top <- gets stackHead
  case tokenType top of
    Symbol '*' -> do
      popAndWrite
      writeOutput " "
      dealWithPointers
    _ -> return ()                      -- Recurse until we get past the *'s.

-- Process tokens that we stacked while reading to identifier.
dealWithStack :: State ParseContext ()
dealWithStack = do
  stack' <- gets stack
  case stack' of
    [] -> return ()
    (x:xs) ->
      case tokenType x of
        Symbol '(' -> do
          pop
          getToken
          dealWithDeclarator
        _ -> popAndWrite

-- Do all parsing after first identifier.
dealWithDeclarator :: State ParseContext ()
dealWithDeclarator = do
  tokType <- gets currTokType
  case tokType of
    Symbol '[' -> dealWithArrays
    Symbol '(' -> dealWithFunctionArgs
    _ -> return ()                      -- "Exit" the case, not the function.
  dealWithPointers
  dealWithStack

-- Do all parsing.
parse :: State ParseContext ()
parse = do
  readToFirstIdentifier
  dealWithDeclarator

-- Translate a C type declaration into English.
translate :: String -> String
translate s =
  output $ execState parse $ ctx        -- Change "output" to "show" to debug.
  where ctx = ParseContext {input = s, output = "", stack = []}

main :: IO ()
main = do
  input <- getContents
  putStrLn $ translate $ input

Translate Haskell into English Manually, Part II

Software