在之前的文章中我尝试了一下 Lean4 定理证明器——然后我是在并非干这个方向的所以后面就抛在脑后了
后面因为学习函数式语言于是又捡起了 Lean4 函数式编程,然后……好吧我真的不得不说真香啊
于是我又拿 Lean4 写了一个 JSON parser,说起来我真的好爱写 JSON 解析器啊(?)(毕竟 JSON 是我们平时接触的最实用又最简单的可以拿 LL1 文法表达的东西
首先我们定义一个经典的 JSON 结构。接触了一些 PL 理论以后不难看出这是一个 inductive type 即归纳类型
inductive Json where
| Number (n : String)
| Null
| Bool (b : Bool)
| String (s : String)
| Array (a : List Json)
| Object (o : List (String × Json))
deriving Repr
为了懒得解析我们不妨直接用 String 存放一个数字。现在我们要开始写一个解析器了!首先这些函数是互相递归的所以我们先来一个 mutual
块。 Lean4 里面互相递归的函数得被 mutual
包起来。
mutual
def parseJsonString (str: List Char) : Except String (String × List Char) := do
sorry
def parseJsonObject (fuel: Nat) (str: List Char) : Except String (List (String × Json) × List Char) := do
sorry
def parseJsonArray (fuel: Nat) (str: List Char) : Except String (List Json × List Char) := do
sorry
def parseJsonNumber (str: List Char) : Except String (String × List Char) :=
sorry
def parseJsonValue (fuel: Nat) (str: List Char): Except String (Json × List Char) := do
sorry
def parseJson (str: String) : Except String Json := do
sorry
end
fuel 在这里是最大递归层级,如果 fuel 用光了就强制停止 parse 的过程。为什么我们要引入 fuel 呢?答案很简单:
因为我不会证明它停机
虽然作为一个 JSON 解析器它停机是比较显然的,但是 Lean4 不这样觉得,所以你得向它证明停机。证明这个停机需要一些设计,反正我不会,所以在询问了 GPT 并且得不到正确的答复以后我选择从心:直接丢个最大递归深度进去肯定是停机的,Lean4 也很容易自动证明这一点
(当然你也可以直接使用 partial def
告诉 Lean4 这个函数根本就可能不停机,但是问题不大,何况这个函数明明就是会停机的我们应该坚守原则)
好的那么让我们上代码!
inductive Json where
| Number (n : String)
| Null
| Bool (b : Bool)
| String (s : String)
| Array (a : List Json)
| Object (o : List (String × Json))
deriving Repr
mutual
def trimList (str: List Char) : List Char :=
str.dropWhile (·.isWhitespace)
def parseJsonString (str: List Char) : Except String (String × List Char) := do
let mut xs := str
let mut str : List Char := []
for ch in xs do
match ch with
| '"' => return (.mk str, xs.tail) -- 可以直接写引号但是这个博客的幽默高亮有问题
| x =>
str := str ++ [x]
xs := xs.tail
return (.mk str, xs)
def parseJsonObject (fuel: Nat) (str: List Char) : Except String (List (String × Json) × List Char) := do
match fuel with
| 0 => .error "out of fuel"
| .succ fuel =>
let mut xs := trimList str
let mut ret : List (String × Json) := []
while true do
match xs with
| '"' :: next =>
let (key, next) ← parseJsonString next
xs := trimList next
if xs.head? != some ':' then
.error "expect `:` but got unexpected token"
else
xs := xs.tail
let (val, next) ← parseJsonValue fuel xs
ret := ret.append [(key, val)]
match trimList next with
| ',' :: next => xs := trimList next
| '}' :: next =>
return (ret, next)
| x => .error $ "expect `,` or `}` but got unexpected token:" ++ String.mk x
| '}' :: next =>
return (ret, next)
| x => .error $ "unexpected token:" ++ String.mk x
return ([], xs.tail)
termination_by fuel
def parseJsonArray (fuel: Nat) (str: List Char) : Except String (List Json × List Char) := do
match fuel with
| 0 => .error "out of fuel"
| .succ fuel =>
let mut xs := trimList str
let mut ret : List Json := []
while xs.head? != some ']' do
let (car, next) ← parseJsonValue fuel xs
ret := ret.append [car]
match trimList next with
| ',' :: next => xs := trimList next
| ']' :: next =>
return (ret, next)
| _ => .error "unexpected token"
return ([], xs.tail)
termination_by fuel
def parseJsonNumber (str: List Char) : Except String (String × List Char) :=
let (num, xs) := str.span (·.isDigit)
match num.length with
| 0 => .error "unexpected token"
| _ => .ok (.mk num, xs)
def parseJsonValue (fuel: Nat) (str: List Char): Except String (Json × List Char) := do
match fuel with
| 0 => .error "out of fuel"
| .succ fuel =>
match trimList str with
| 'n'::'u'::'l'::'l'::xs => .ok (Json.Null, xs)
| 't'::'r'::'u'::'e'::xs => .ok $ (Json.Bool true, xs)
| 'f'::'a'::'l'::'s'::'e'::xs => .ok $ (Json.Bool false, xs)
| '[' :: xs => do
let (inner, next) ← parseJsonArray fuel xs
return (Json.Array inner, next)
| '{' :: xs => do
let (inner, next) ← parseJsonObject fuel xs
return (Json.Object inner, next)
| '"' :: xs => do
let (str, next) ← parseJsonString xs
return (Json.String str, next)
| maybe_number => do
let (num, next) ← parseJsonNumber maybe_number
return (Json.Number num, next)
termination_by fuel
def parseJson (str: String) : Except String Json := do
let (json, next) ← parseJsonValue 999 str.data
match trimList next with
| [] => return json
| x => .error $ "unexpected token after json: ".append x.toString
end
#eval parseJsonNumber "1234abc".data
#eval "123waf32e".data.span (·.isDigit)
def read_and_parse : IO (Except String Json) := do
let json ← IO.FS.readFile "some.json"
IO.println json
return parseJson json
#eval read_and_parse
#eval "true".data
#eval parseJson "true"
#eval parseJson "false"
#eval parseJson "[true, false]"
#eval parseJson "\"hello world\""
#eval parseJson "111"
#eval parseJson "[\"hello world\", true, false, [1,2,3, {\"s\": null}], null, [\"hello\"]]"
#eval parseJson "{\"hello\": \"world\", \"foo\": [1, 2, 3], \"bar\": {\"baz\": true}}"
#eval parseJson "{\"hello\": \"world\", \"foo\": null}"
在这份代码中你可以非常直观地看到 Lean4 的优越之处。作为一个 pure 的 functional language 它为你提供了原生的 for 和 while 语法糖。语法糖的底层细节我们懒得讲解,但这是 Lean4 中一个非常甜的地方——显然没人说 for / while 循环就不是纯函数了,那么为什么不使用甜甜的语法糖呢?
所以你可以直接写
def parseJsonString (str: List Char) : Except String (String × List Char) := do
let mut xs := str
let mut str : List Char := []
for ch in xs do
match ch with
| '"' => return (.mk str, xs.tail)
| x =>
str := str ++ [x]
xs := xs.tail
return (.mk str, xs)
而在另一些不够甜的纯函数式语言中就不得不
fn read_str(str: String) -> Result(ParseStrResult, String) {
case str |> string.pop_grapheme {
Error(Nil) -> Error("Unexpected EOF")
Ok(#(first, rest)) ->
case first {
"\"" -> Ok(ParseStrResult("", rest))
_ ->
read_str(rest)
|> result.map(fn(res) {
ParseStrResult(first <> res.result, res.rest)
})
}
}
}
fn parse_str(str: String) -> Result(ParseStrResult, String) {
let str = str |> string.trim_start
case str {
"\"" <> content -> read_str(content)
x -> Error("unexpected token " <> x)
}
}
一点也不直观,蠢蠢的
同时,你可以在这份代码中直观地看到 monads 的魅力。Except 显然也是一个 monad,所以可以使用 do 记号完美完成“不出错则继续,出错则跳出”的逻辑。不得不说,看到
| '[' :: xs => do
let (inner, next) ← parseJsonArray fuel xs
return (Json.Array inner, next)
这样的代码,又直观又好看,这才是函数式语言优美的地方
总而言之,Lean4 真香,uwu