理解 Monads 來自實踐

本主題適用於中級到高階 F#開發人員

“什麼是 Monads?” 是一個常見的問題。這很容易回答, 但就像 Hitchhikers 指導銀河系一樣,我們意識到我們不理解答案,因為我們不知道我們要求的是什麼。

許多人認為理解 Monads 的方法是練習它們。作為程式設計師,我們通常不關心 Liskov 的替換原則,子型別或子類的數學基礎。通過使用這些想法,我們獲得了他們所代表的直覺。Monads 也是如此。

為了幫助你開始使用 Monads,此示例演示瞭如何構建 Monadic Parser Combinator 庫。這可能有助於你入門,但理解將來自編寫你自己的 Monadic 庫。

足夠的散文,程式碼的時間

解析器型別:

// A Parser<'T> is a function that takes a string and position
//  and returns an optionally parsed value and a position
//  A parsed value means the position points to the character following the parsed value
//  No parsed value indicates a parse failure at the position
type Parser<'T> = Parser of (string*int -> 'T option*int)

使用 Parser 的這個定義,我們定義了一些基本的解析器函式

// Runs a parser 't' on the input string 's'
let run t s =
  let (Parser tps) = t
  tps (s, 0)

// Different ways to create parser result
let succeedWith v p = Some v, p
let failAt        p = None  , p

// The 'satisfy' parser succeeds if the character at the current position 
//  passes the 'sat' function
let satisfy sat : Parser<char> = Parser <| fun (s, p) ->
  if p < s.Length && sat s.[p] then succeedWith s.[p] (p + 1)
  else failAt p

// 'eos' succeeds if the position is beyond the last character.
//  Useful when testing if the parser have consumed the full string
let eos : Parser<unit> = Parser <| fun (s, p) ->
  if p < s.Length then failAt p
  else succeedWith () p

let anyChar       = satisfy (fun _ -> true)
let char ch       = satisfy ((=) ch)
let digit         = satisfy System.Char.IsDigit
let letter        = satisfy System.Char.IsLetter

satisfy 是一個函式,如果我們沒有通過 EOS 並且當前位置的字元通過 sat 函式,則給定 sat 函式會生成一個成功的解析器。使用 satisfy,我們建立了許多有用的字元解析器。

在 FSI 中執行:

> run digit "";;
val it : char option * int = (null, 0)
> run digit "123";;
val it : char option * int = (Some '1', 1)
> run digit "hello";;
val it : char option * int = (null, 0)

我們有一些基本的解析器。我們將使用解析器組合函式將它們組合成更強大的解析器

// 'fail' is a parser that always fails
let fail<'T>      = Parser <| fun (s, p) -> failAt p
// 'return_' is a parser that always succeed with value 'v'
let return_ v     = Parser <| fun (s, p) -> succeedWith v p

// 'bind' let us combine two parser into a more complex parser
let bind t uf     = Parser <| fun (s, p) ->
  let (Parser tps) = t
  let tov, tp = tps (s, p)
  match tov with
  | None    -> None, tp
  | Some tv ->
    let u = uf tv
    let (Parser ups) = u
    ups (s, tp)

名稱和簽名不是任意選擇的, 但我們不會深思熟慮,而是讓我們看看我們如何使用 bind 將解析器組合成更復雜的解析器:

> run (bind digit (fun v -> digit)) "123";;
val it : char option * int = (Some '2', 2)
> run (bind digit (fun v -> bind digit (fun u -> return_ (v,u)))) "123";;
val it : (char * char) option * int = (Some ('1', '2'), 2)
> run (bind digit (fun v -> bind digit (fun u -> return_ (v,u)))) "1";;
val it : (char * char) option * int = (null, 1)

這告訴我們的是 bind 允許我們將兩個解析器組合成一個更復雜的解析器。由於 bind 的結果是一個解析器,反過來可以再次組合。

> run (bind digit (fun v -> bind digit (fun w -> bind digit (fun u -> return_ (v,w,u))))) "123";;
val it : (char * char * char) option * int = (Some ('1', '2', '3'), 3)

雖然我們將定義輔助函式來簡化語法,但 bind 將是我們組合解析器的基本方法。

可以簡化語法的一個方面是計算表示式 。它們很容易定義:

type ParserBuilder() =
  member x.Bind       (t, uf) = bind      t   uf
  member x.Return     v       = return_   v
  member x.ReturnFrom t       = t

// 'parser' enables us to combine parsers using 'parser { ... }' syntax
let parser = ParserBuilder()

FSI

let p = parser {
          let! v = digit
          let! u = digit
          return v,u
        }
run p "123"
val p : Parser<char * char> = Parser <fun:bind@49-1>
val it : (char * char) option * int = (Some ('1', '2'), 2)

這相當於:

> let p = bind digit (fun v -> bind digit (fun u -> return_ (v,u)))
run p "123";;
val p : Parser<char * char> = Parser <fun:bind@49-1>
val it : (char * char) option * int = (Some ('1', '2'), 2)

我們將要使用的另一個基本的解析器組合器是 orElse

// 'orElse' creates a parser that runs parser 't' first, if that is successful
//  the result is returned otherwise the result of parser 'u' is returned
let orElse t u    = Parser <| fun (s, p) ->
  let (Parser tps) = t
  let tov, tp = tps (s, p)
  match tov with
  | None    -> 
    let (Parser ups) = u
    ups (s, p)
  | Some tv -> succeedWith tv tp

這允許我們像這樣定義 letterOrDigit

> let letterOrDigit = orElse letter digit;;
val letterOrDigit : Parser<char> = Parser <fun:orElse@70-1>
> run letterOrDigit "123";;
val it : char option * int = (Some '1', 1)
> run letterOrDigit "hello";;
val it : char option * int = (Some 'h', 1)
> run letterOrDigit "!!!";;
val it : char option * int = (null, 0)

關於 Infix 運算子的說明

對 FP 的一個共同關注點是使用不常用的中綴運算子,如 >>=>=><- 等。然而,大多數人並不關心使用+-*/%,這些都是眾所周知的運算子,用於組成值。然而,FP 中的一個重要部分不僅是組成值,還包括函式。對於中級 FP 開發者來說,中綴運算子 >>=>=><- 是眾所周知的,應該具有特定的簽名和語義。

對於我們到目前為止定義的函式,我們將定義以下用於組合解析器的中綴運算子:

let (>>=)   t   uf  = bind t uf
let (<|>)   t   u   = orElse t u

所以 >>= 意味著 bind<|> 意味著 orElse

這允許我們組合解析器更簡潔:

let letterOrDigit = letter <|> digit
let p = digit >>= fun v -> digit >>= fun u -> return_ (v,u)

為了定義一些高階解析器組合器,它們將允許我們解析更復雜的表示式,我們定義了一些更簡單的解析器組合器:

// 'map' runs parser 't' and maps the result using 'm'
let map m t       = t >>= (m >> return_)
let (>>!) t m     = map m t
let (>>%) t v     = t >>! (fun _ -> v)

// 'opt' takes a parser 't' and creates a parser that always succeed but
//  if parser 't' fails the new parser will produce the value 'None'
let opt t         = (t >>! Some) <|> (return_ None)

// 'pair' runs parser 't' and 'u' and returns a pair of 't' and 'u' results
let pair t u      = 
  parser {
    let! tv = t
    let! tu = u
    return tv, tu
  }

我們已經準備好定義 manysepBy,它們在應用輸入解析器時會更加高階,直到它們失敗。然後 manysepBy 返回聚合結果:

// 'many' applies parser 't' until it fails and returns all successful
//  parser results as a list
let many t =
  let ot = opt t
  let rec loop vs = ot >>= function Some v -> loop (v::vs) | None -> return_ (List.rev vs)
  loop []

// 'sepBy' applies parser 't' separated by 'sep'. 
//  The values are reduced with the function 'sep' returns
let sepBy t sep     =
  let ots = opt (pair sep t)
  let rec loop v = ots >>= function Some (s, n) -> loop (s v n) | None -> return_ v
  t >>= loop

建立一個簡單的表示式解析器

使用我們建立的工具,我們現在可以為像 1+2*3 這樣的簡單表示式定義解析器

我們從底部開始,為整數 pint 定義一個解析器

// 'pint' parses an integer
let pint = 
  let f s v = 10*s + int v - int '0'
  parser {
    let! digits = many digit
    return! 
      match digits with
      | [] -> fail
      | vs -> return_ (List.fold f 0 vs)
  }

我們嘗試儘可能多地解析數字,結果是 char list。如果列表為空,我們 fail,否則我們將字元摺疊成整數。

在 FSI 中測試 pint

> run pint "123";;
val it : int option * int = (Some 123, 3)

另外,我們需要解析用於組合整數值的不同型別的運算子:

// operator parsers, note that the parser result is the operator function 
let padd      = char '+' >>% (+)
let psubtract = char '-' >>% (-)
let pmultiply = char '*' >>% (*)
let pdivide   = char '/' >>% (/)
let pmodulus  = char '%' >>% (%)

FSI:

> run padd "+";;
val it : (int -> int -> int) option * int = (Some <fun:padd@121-1>, 1)

把它們繫結在一起:

// 'pmullike' parsers integers separated by operators with same precedence as multiply
let pmullike  = sepBy pint (pmultiply <|> pdivide <|> pmodulus)
// 'paddlike' parsers sub expressions separated by operators with same precedence as add
let paddlike  = sepBy pmullike (padd <|> psubtract)
// 'pexpr' is the full expression
let pexpr     =
  parser {
    let! v = paddlike
    let! _ = eos      // To make sure the full string is consumed
    return v
  }

在 FSI 中執行它:

> run pexpr "2+123*2-3";;
val it : int option * int = (Some 245, 9)

結論

通過定義 Parser<'T>return_bind 並確保它們遵守 monadic 定律, 我們構建了一個簡單但強大的 Monadic Parser Combinator 框架。

Monads 和 Parsers 一起使用,因為 Parsers 在解析器狀態下執行。Monads 允許我們在隱藏解析器狀態的同時組合解析器,從而減少混亂並提高可組合性。

我們建立的框架很慢並且不會產生任何錯誤訊息,這是為了保持程式碼簡潔。 FParsec 提供可接受的效能以及出色的錯誤訊息。

然而,僅憑一個例子無法理解 Monads。一個人必須練習 Monads。

以下是你可以嘗試實施的 Monads 的一些示例,以達到你的理解:

  1. State Monad - 允許隱式環境狀態被隱式攜帶
  2. Tracer Monad - 允許隱式攜帶跟蹤狀態。State Monad 的變種
  3. Turtle Monad - 用於建立 Turtle(Logos) 程式的 Monad。State Monad 的變種
  4. 繼續 Monad - 一個協程 Monad。這方面的一個例子是 F#中的 async

要學習的最好方法是在你熟悉的域中提出 Monads 的應用程式。對我來說,這是解析器。

完整原始碼:

// A Parser<'T> is a function that takes a string and position
//  and returns an optionally parsed value and a position
//  A parsed value means the position points to the character following the parsed value
//  No parsed value indicates a parse failure at the position
type Parser<'T> = Parser of (string*int -> 'T option*int)

// Runs a parser 't' on the input string 's'
let run t s =
  let (Parser tps) = t
  tps (s, 0)

// Different ways to create parser result
let succeedWith v p = Some v, p
let failAt        p = None  , p

// The 'satisfy' parser succeeds if the character at the current position 
//  passes the 'sat' function
let satisfy sat : Parser<char> = Parser <| fun (s, p) ->
  if p < s.Length && sat s.[p] then succeedWith s.[p] (p + 1)
  else failAt p

// 'eos' succeeds if the position is beyond the last character.
//  Useful when testing if the parser have consumed the full string
let eos : Parser<unit> = Parser <| fun (s, p) ->
  if p < s.Length then failAt p
  else succeedWith () p

let anyChar       = satisfy (fun _ -> true)
let char ch       = satisfy ((=) ch)
let digit         = satisfy System.Char.IsDigit
let letter        = satisfy System.Char.IsLetter

// 'fail' is a parser that always fails
let fail<'T>      = Parser <| fun (s, p) -> failAt p
// 'return_' is a parser that always succeed with value 'v'
let return_ v     = Parser <| fun (s, p) -> succeedWith v p

// 'bind' let us combine two parser into a more complex parser
let bind t uf     = Parser <| fun (s, p) ->
  let (Parser tps) = t
  let tov, tp = tps (s, p)
  match tov with
  | None    -> None, tp
  | Some tv ->
    let u = uf tv
    let (Parser ups) = u
    ups (s, tp)

type ParserBuilder() =
  member x.Bind       (t, uf) = bind      t   uf
  member x.Return     v       = return_   v
  member x.ReturnFrom t       = t

// 'parser' enables us to combine parsers using 'parser { ... }' syntax
let parser = ParserBuilder()

// 'orElse' creates a parser that runs parser 't' first, if that is successful
//  the result is returned otherwise the result of parser 'u' is returned
let orElse t u    = Parser <| fun (s, p) ->
  let (Parser tps) = t
  let tov, tp = tps (s, p)
  match tov with
  | None    -> 
    let (Parser ups) = u
    ups (s, p)
  | Some tv -> succeedWith tv tp

let (>>=) t uf    = bind t uf
let (<|>) t u     = orElse t u

// 'map' runs parser 't' and maps the result using 'm'
let map m t       = t >>= (m >> return_)
let (>>!) t m     = map m t
let (>>%) t v     = t >>! (fun _ -> v)

// 'opt' takes a parser 't' and creates a parser that always succeed but
//  if parser 't' fails the new parser will produce the value 'None'
let opt t         = (t >>! Some) <|> (return_ None)

// 'pair' runs parser 't' and 'u' and returns a pair of 't' and 'u' results
let pair t u      = 
  parser {
    let! tv = t
    let! tu = u
    return tv, tu
  }

// 'many' applies parser 't' until it fails and returns all successful
//  parser results as a list
let many t =
  let ot = opt t
  let rec loop vs = ot >>= function Some v -> loop (v::vs) | None -> return_ (List.rev vs)
  loop []

// 'sepBy' applies parser 't' separated by 'sep'. 
//  The values are reduced with the function 'sep' returns
let sepBy t sep     =
  let ots = opt (pair sep t)
  let rec loop v = ots >>= function Some (s, n) -> loop (s v n) | None -> return_ v
  t >>= loop

// A simplistic integer expression parser

// 'pint' parses an integer
let pint = 
  let f s v = 10*s + int v - int '0'
  parser {
    let! digits = many digit
    return! 
      match digits with
      | [] -> fail
      | vs -> return_ (List.fold f 0 vs)
  }

// operator parsers, note that the parser result is the operator function 
let padd      = char '+' >>% (+)
let psubtract = char '-' >>% (-)
let pmultiply = char '*' >>% (*)
let pdivide   = char '/' >>% (/)
let pmodulus  = char '%' >>% (%)

// 'pmullike' parsers integers separated by operators with same precedence as multiply
let pmullike  = sepBy pint (pmultiply <|> pdivide <|> pmodulus)
// 'paddlike' parsers sub expressions separated by operators with same precedence as add
let paddlike  = sepBy pmullike (padd <|> psubtract)
// 'pexpr' is the full expression
let pexpr     =
  parser {
    let! v = paddlike
    let! _ = eos      // To make sure the full string is consumed
    return v
  }