-- $Id: ParserFunctions.hs,v 1.10 1998/06/05 21:20:42 leavens Exp leavens $

-- Haskell version of some of Graham Hutton's
-- "Higher-order functions for parsing", from the
-- Journal of Functional Programming, 2(3):323-343, July 1992.
-- Translated into Haskell by Gary T. Leavens.

module ParserFunctions where
infixl 5 $~, /$~, $~/
infixr 5 $->
infixl 0 $|

type Parser a b = [a] -> [(b,[a])]

-- primitive parsers (2.1)
succeed :: b -> Parser a b
succeed v inp = [(v,inp)]

p_fail :: Parser a b
p_fail inp = []

satisfy :: (a -> Bool) -> Parser a a
satisfy p [] = p_fail []
satisfy p (x:xs)
		| p x       = succeed x xs
		| otherwise = p_fail xs

literal :: Eq a => a -> Parser a a
literal s = satisfy (==s)

-- combinators (2.2)

alt :: Parser a b -> Parser a b -> Parser a b
alt p1 p2 inp = p1 inp ++ p2 inp

p_then :: Parser a b -> Parser a c -> Parser a (b,c)
p_then p1 p2 inp = [((v1, v2), out2) | (v1, out1) <- p1 inp,
					   (v2, out2) <- p2 out1]

-- abbreviations for the above
($|) = alt
($~) = p_then

-- manipulating values (2.3)

using :: Parser a b -> (b -> c) -> Parser a c
using p f inp = [(f v, out) | (v, out) <- p inp]

many :: Parser a b -> Parser a [b]
many p = ((p $~ many p) `using` cons) $| (succeed [])

cons = uncurry (:)

some :: Parser a b -> Parser a [b]
some p = (p $~ many p) `using` cons

-- other names for the above

star = many
plus = some

-- I added the following for the common case of lists separated by some token

some_sep_by :: Parser a c -> Parser a b -> Parser a [b]
some_sep_by separator p = (p $~ (many (separator /$~ p)))
                                        `using` cons
many_sep_by :: Parser a c -> Parser a b -> Parser a [b]
many_sep_by separator p = (some_sep_by separator p) $| (succeed [])


-- lexical analysis

number :: Parser Char String
number = some (satisfy digit)
digit x = ('0' <= x) && (x <= '9')  -- could use Haskell's isDigit

word :: Parser Char String
word = some (satisfy letter)
letter x = (('a' <= x) && (x <= 'z')) || (('A' <= x) && (x <= 'Z'))

alphanum :: Parser Char String
alphanum = some (satisfy (\x -> letter x || digit x))

-- I changed "string" to "these_lits" as it wasn't for string literals
these_lits :: Eq a => [a] -> Parser a [a]
these_lits [] = succeed []
these_lits (x:xs) = (literal x $~ these_lits xs) `using` cons

xthen :: Parser a b -> Parser a c -> Parser a c
thenx :: Parser a b -> Parser a c -> Parser a b

p1 `xthen` p2 = (p1 $~ p2) `using` snd
p1 `thenx` p2 = (p1 $~ p2) `using` fst

p_return :: Parser a b -> c -> Parser a c
p `p_return` v = p `using` (const v)
		 where const x y = x

-- abbreviations for the above

(/$~) = xthen
($~/) = thenx
($->) = p_return

-- free-format input (3.1)

nibble :: Parser Char b -> Parser Char b
nibble p = (white /$~ p) $~/ white
	   where white = many (p_any literal " \t\n")

p_any :: (a -> Parser b c) -> [a] -> Parser b c
p_any p = foldr (alt . p) p_fail

symbol :: String -> Parser Char String
symbol = nibble . these_lits


-- syntax analysis (4.5)

opt :: Parser a b -> b -> Parser a b
p `opt` v = p $| (succeed v)

-- result values (5.4)

-- The combinator `into` first parses using p, then sends the result to f,
-- which parses the remaining input. 
-- It acts like bind (i.e., >>=) of a monad.
-- (Note that "succeed" acts like "return" in this monad.)

into :: Parser a b -> (b -> Parser a c) -> Parser a c
p `into` f = (concatMap (uncurry f)) . p

-- equivalently:
-- (p `into` f) inp = g (p inp)
--                    where g ((v,inp'):others) = (f v inp') ++ (g others)
--                          g [] = []
