很难不承认 Lean4 真香 —— 然后写了一个简单的 JSON 解析器

在之前的文章中我尝试了一下 Lean4 定理证明器——然后我是在并非干这个方向的所以后面就抛在脑后了

后面因为学习函数式语言于是又捡起了 Lean4 函数式编程,然后……好吧我真的不得不说真香啊

于是我又拿 Lean4 写了一个 JSON parser,说起来我真的好爱写 JSON 解析器啊(?)(毕竟 JSON 是我们平时接触的最实用又最简单的可以拿 LL1 文法表达的东西

首先我们定义一个经典的 JSON 结构。接触了一些 PL 理论以后不难看出这是一个 inductive type 即归纳类型

inductive Json where
  | Number (n : String)
  | Null
  | Bool (b : Bool)
  | String (s : String)
  | Array (a : List Json)
  | Object (o : List (String × Json))
deriving Repr

为了懒得解析我们不妨直接用 String 存放一个数字。现在我们要开始写一个解析器了!首先这些函数是互相递归的所以我们先来一个 mutual 块。 Lean4 里面互相递归的函数得被 mutual 包起来。

mutual

def parseJsonString (str: List Char) : Except String (String × List Char) := do
  sorry

def parseJsonObject (fuel: Nat) (str: List Char) : Except String (List (String × Json) × List Char) := do
  sorry

def parseJsonArray (fuel: Nat) (str: List Char) : Except String (List Json × List Char) := do
  sorry

def parseJsonNumber (str: List Char) : Except String (String × List Char) :=
  sorry

def parseJsonValue (fuel: Nat) (str: List Char): Except String (Json × List Char) := do
  sorry

def parseJson (str: String) : Except String Json := do
  sorry

end

fuel 在这里是最大递归层级,如果 fuel 用光了就强制停止 parse 的过程。为什么我们要引入 fuel 呢?答案很简单:

因为我不会证明它停机

虽然作为一个 JSON 解析器它停机是比较显然的,但是 Lean4 不这样觉得,所以你得向它证明停机。证明这个停机需要一些设计,反正我不会,所以在询问了 GPT 并且得不到正确的答复以后我选择从心:直接丢个最大递归深度进去肯定是停机的,Lean4 也很容易自动证明这一点

(当然你也可以直接使用 partial def 告诉 Lean4 这个函数根本就可能不停机,但是问题不大,何况这个函数明明就是会停机的我们应该坚守原则)

好的那么让我们上代码!

inductive Json where
  | Number (n : String)
  | Null
  | Bool (b : Bool)
  | String (s : String)
  | Array (a : List Json)
  | Object (o : List (String × Json))
deriving Repr

mutual

def trimList (str: List Char) : List Char :=
  str.dropWhile (·.isWhitespace)

def parseJsonString (str: List Char) : Except String (String × List Char) := do
  let mut xs := str
  let mut str : List Char := []
  for ch in xs do
    match ch with
    | '"' => return (.mk str, xs.tail) -- 可以直接写引号但是这个博客的幽默高亮有问题
    | x =>
      str := str ++ [x]
      xs := xs.tail
  return (.mk str, xs)

def parseJsonObject (fuel: Nat) (str: List Char) : Except String (List (String × Json) × List Char) := do
  match fuel with
  | 0 => .error "out of fuel"
  | .succ fuel =>
  let mut xs := trimList str
  let mut ret : List (String × Json) := []
  while true do
    match xs with
    | '"' :: next =>
      let (key, next) ← parseJsonString next
      xs := trimList next
      if xs.head? != some ':' then
        .error "expect `:` but got unexpected token"
      else
        xs := xs.tail
      let (val, next) ← parseJsonValue fuel xs
      ret := ret.append [(key, val)]
      match trimList next with
      | ',' :: next => xs := trimList next
      | '}' :: next =>
        return (ret, next)
      | x => .error $ "expect `,` or `}` but got unexpected token:" ++ String.mk x
    | '}' :: next =>
      return (ret, next)
    | x => .error $ "unexpected token:" ++ String.mk x
  return ([], xs.tail)
termination_by fuel

def parseJsonArray (fuel: Nat) (str: List Char) : Except String (List Json × List Char) := do
  match fuel with
  | 0 => .error "out of fuel"
  | .succ fuel =>
  let mut xs := trimList str
  let mut ret : List Json := []
  while xs.head? != some ']' do
    let (car, next) ← parseJsonValue fuel xs
    ret := ret.append [car]
    match trimList next with
    | ',' :: next => xs := trimList next
    | ']' :: next =>
      return (ret, next)
    | _ => .error "unexpected token"
  return ([], xs.tail)
termination_by fuel

def parseJsonNumber (str: List Char) : Except String (String × List Char) :=
  let (num, xs) := str.span (·.isDigit)
  match num.length with
  | 0 => .error "unexpected token"
  | _ => .ok (.mk num, xs)

def parseJsonValue (fuel: Nat) (str: List Char): Except String (Json × List Char) := do
  match fuel with
  | 0 => .error "out of fuel"
  | .succ fuel =>
  match trimList str with
  | 'n'::'u'::'l'::'l'::xs => .ok (Json.Null, xs)
  | 't'::'r'::'u'::'e'::xs => .ok $ (Json.Bool true, xs)
  | 'f'::'a'::'l'::'s'::'e'::xs => .ok $ (Json.Bool false, xs)
  | '[' :: xs => do
    let (inner, next) ← parseJsonArray fuel xs
    return (Json.Array inner, next)
  | '{' :: xs => do
    let (inner, next) ← parseJsonObject fuel xs
    return (Json.Object inner, next)
  | '"' :: xs => do
    let (str, next) ← parseJsonString xs
    return (Json.String str, next)
  | maybe_number => do
    let (num, next) ← parseJsonNumber maybe_number
    return (Json.Number num, next)
termination_by fuel

def parseJson (str: String) : Except String Json := do
  let (json, next) ← parseJsonValue 999 str.data
  match trimList next with
  | [] => return json
  | x => .error $ "unexpected token after json: ".append x.toString

end

#eval parseJsonNumber "1234abc".data

#eval "123waf32e".data.span (·.isDigit)


def read_and_parse : IO (Except String Json) := do
  let json ← IO.FS.readFile "some.json"
  IO.println json
  return parseJson json

#eval read_and_parse

#eval "true".data

#eval parseJson "true"
#eval parseJson "false"
#eval parseJson "[true, false]"
#eval parseJson "\"hello world\""
#eval parseJson "111"
#eval parseJson "[\"hello world\", true, false, [1,2,3, {\"s\": null}], null, [\"hello\"]]"
#eval parseJson "{\"hello\": \"world\", \"foo\": [1, 2, 3], \"bar\": {\"baz\": true}}"
#eval parseJson "{\"hello\": \"world\", \"foo\": null}"

在这份代码中你可以非常直观地看到 Lean4 的优越之处。作为一个 pure 的 functional language 它为你提供了原生的 for 和 while 语法糖。语法糖的底层细节我们懒得讲解,但这是 Lean4 中一个非常甜的地方——显然没人说 for / while 循环就不是纯函数了,那么为什么不使用甜甜的语法糖呢?

所以你可以直接写

def parseJsonString (str: List Char) : Except String (String × List Char) := do
  let mut xs := str
  let mut str : List Char := []
  for ch in xs do
    match ch with
    | '"' => return (.mk str, xs.tail)
    | x =>
      str := str ++ [x]
      xs := xs.tail
  return (.mk str, xs)

而在另一些不够甜的纯函数式语言中就不得不

fn read_str(str: String) -> Result(ParseStrResult, String) {
  case str |> string.pop_grapheme {
    Error(Nil) -> Error("Unexpected EOF")
    Ok(#(first, rest)) ->
      case first {
        "\"" -> Ok(ParseStrResult("", rest))
        _ ->
          read_str(rest)
          |> result.map(fn(res) {
            ParseStrResult(first <> res.result, res.rest)
          })
      }
  }
}

fn parse_str(str: String) -> Result(ParseStrResult, String) {
  let str = str |> string.trim_start
  case str {
    "\"" <> content -> read_str(content)
    x -> Error("unexpected token " <> x)
  }
}

一点也不直观,蠢蠢的

同时,你可以在这份代码中直观地看到 monads 的魅力。Except 显然也是一个 monad,所以可以使用 do 记号完美完成“不出错则继续,出错则跳出”的逻辑。不得不说,看到

  | '[' :: xs => do
    let (inner, next) ← parseJsonArray fuel xs
    return (Json.Array inner, next)

这样的代码,又直观又好看,这才是函数式语言优美的地方

总而言之,Lean4 真香,uwu

Previous  Next

Loading...