Skip to content

Commit a617ec4

Browse files
authored
Scan: support 2 format specifiers next to each other (#620)
1 parent 9ffc1c0 commit a617ec4

File tree

2 files changed

+164
-67
lines changed

2 files changed

+164
-67
lines changed

src/FSharpPlus/Parsing.fs

Lines changed: 97 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -4,27 +4,69 @@
44

55
[<AutoOpen>]
66
module Parsing =
7-
87
open System
98
open System.Text.RegularExpressions
109
open FSharpPlus
1110
open FSharpPlus.Internals
12-
open FSharpPlus.Internals.Prelude
13-
14-
let inline private getGroups (pf: PrintfFormat<_,_,_,_,_>) s =
15-
let formatters = [|"%A"; "%b"; "%B"; "%c"; "%d"; "%e"; "%E"; "%f"; "%F"; "%g"; "%G"; "%i"; "%M"; "%o"; "%O"; "%s"; "%u"; "%x"; "%X"|]
16-
let formatStr = replace "%%" "%" pf.Value
17-
let constants = split formatters formatStr
18-
let regex = Regex ("^" + String.Join ("(.*?)", constants |> Array.map Regex.Escape) + "$")
19-
let getGroup x =
20-
let groups =
21-
regex.Match(x).Groups
22-
|> Seq.cast<Group>
23-
|> Seq.skip 1
24-
groups
25-
|> Seq.map (fun g -> g.Value)
26-
|> Seq.toArray
27-
(getGroup s, getGroup pf.Value) ||> Array.zipShortest
11+
open Prelude
12+
13+
let inline private getGroups (pf: PrintfFormat<_,_,_,_,_>) =
14+
let format = pf.Value
15+
let regex = System.Text.StringBuilder "^"
16+
let groups = ResizeArray<char>(format.Length / 2) // worst case, there are only format specifiers
17+
let mutable i = 0
18+
while i < String.length format do
19+
match format[i] with
20+
| '%' ->
21+
i <- i + 1
22+
let mutable consumeSpacesAfter = false // consume spaces after if '-' specified
23+
let mutable consumeNumericPlus = false // consume plus before numeric values if '+' specified
24+
while
25+
match format[i] with
26+
| ' ' -> regex.Append @"\s*" |> ignore; true // consume spaces before if ' ' specified
27+
| '-' -> consumeSpacesAfter <- true; true
28+
| '+' -> consumeNumericPlus <- true; true
29+
| '*' | '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' -> true
30+
| _ -> false
31+
do i <- i + 1
32+
if format[i] <> '%' then groups.Add format[i] // %% does not capture a group
33+
match format[i] with
34+
| 'A' | 'O' -> "(.*?)"
35+
| 'b' -> "([Tt][Rr][Uu][Ee]|[Ff][Aa][Ll][Ss][Ee])"
36+
| 'B' ->
37+
if consumeNumericPlus then regex.Append @"\+?" |> ignore
38+
"([01]+)"
39+
| 'c' -> "(.)"
40+
| 'd' | 'i' ->
41+
regex.Append (if consumeNumericPlus then "([+-]?" else "(-?") |> ignore
42+
"[0-9]+)"
43+
| 'e' | 'E' | 'f' | 'F' | 'g' | 'G' | 'M' ->
44+
regex.Append (if consumeNumericPlus then "([+-]?" else "(-?") |> ignore
45+
@"(?:[0-9]+\.?[0-9]*|\.[0-9]+)(?:[eE][+-]?[0-9]+)?)"
46+
| 'o' ->
47+
if consumeNumericPlus then regex.Append @"\+?" |> ignore
48+
"([0-7]+)"
49+
| 'u' ->
50+
if consumeNumericPlus then regex.Append @"\+?" |> ignore
51+
"([0-9]+)"
52+
| 's' -> "(.*?)"
53+
| 'x' | 'X' ->
54+
if consumeNumericPlus then regex.Append @"\+?" |> ignore
55+
"([0-9a-fA-F]+)"
56+
| '%' -> "%"
57+
| x -> failwith $"Unknown format specifier: {x}"
58+
|> regex.Append |> ignore
59+
if consumeSpacesAfter then regex.Append @"\s*" else regex
60+
| '\\' | '*' | '+' | '?' | '|' | '{' | '[' | '(' | ')' | '^' | '$' | '.' | '#' | ' ' as escape ->
61+
regex.Append('\\').Append escape
62+
| c -> regex.Append c
63+
|> ignore
64+
i <- i + 1
65+
let regex = regex.Append '$' |> string
66+
fun str ->
67+
let m = Regex.Match(str, regex)
68+
if not m.Success then [||] else
69+
Array.init (m.Groups.Count - 1) <| fun i -> struct(m.Groups[i + 1].Value, groups[i])
2870

2971
let inline private conv (destType: System.Type) (b: int) (s: string) =
3072
match destType with
@@ -38,29 +80,29 @@ module Parsing =
3880
| t when t = typeof<int64> -> Convert.ToInt64 (s, b) |> box
3981
| _ -> invalidOp (sprintf "Type conversion from string to type %A with base %i is not supported" destType b)
4082

41-
let inline private parse (s: string, f: string) : 'r =
83+
let inline private parse struct(s: string, f: char) : 'r =
4284
match f with
43-
| "%B" -> conv typeof<'r> 2 s |> string |> parse
44-
| "%o" -> conv typeof<'r> 8 s |> string |> parse
45-
| "%x" | "%X" -> conv typeof<'r> 16 s |> string |> parse
85+
| 'B' -> conv typeof<'r> 2 s |> string |> parse
86+
| 'o' -> conv typeof<'r> 8 s |> string |> parse
87+
| 'x' | 'X' -> conv typeof<'r> 16 s |> string |> parse
4688
| _ -> parse s
4789

48-
let inline private tryParse (s: string, f: string) : 'r option =
90+
let inline private tryParse struct(s: string, f: char) : 'r option =
4991
match f with
50-
| "%B" -> Option.protect (conv typeof<'r> 2) s |> Option.map string |> Option.bind tryParse
51-
| "%o" -> Option.protect (conv typeof<'r> 8) s |> Option.map string |> Option.bind tryParse
52-
| "%x" | "%X" -> Option.protect (conv typeof<'r> 16) s |> Option.map string |> Option.bind tryParse
92+
| 'B' -> Option.protect (conv typeof<'r> 2) s |> Option.map string |> Option.bind tryParse
93+
| 'o' -> Option.protect (conv typeof<'r> 8) s |> Option.map string |> Option.bind tryParse
94+
| 'x' | 'X' -> Option.protect (conv typeof<'r> 16) s |> Option.map string |> Option.bind tryParse
5395
| _ -> tryParse s
5496

5597
type ParseArray =
56-
static member inline ParseArray (_: 't , _: obj) = fun (g: (string * string) []) -> (parse (g.[0])) : 't
98+
static member inline ParseArray (_: 't , _: obj) = fun (g: struct(string * char) []) -> (parse (g.[0])) : 't
5799

58-
static member inline Invoke (g: (string * string) []) =
100+
static member inline Invoke (g: struct(string * char) []) =
59101
let inline call_2 (a: ^a, b: ^b) = ((^a or ^b) : (static member ParseArray: _*_ -> _) b, a) g
60102
let inline call (a: 'a) = call_2 (a, Unchecked.defaultof<'r>) : 'r
61103
call Unchecked.defaultof<ParseArray>
62104

63-
static member inline ParseArray (t: 't, _: ParseArray) = fun (g: (string * string) []) ->
105+
static member inline ParseArray (t: 't, _: ParseArray) = fun (g: struct(string * char) []) ->
64106
let _f _ = Constraints.whenNestedTuple t : ('t1*'t2*'t3*'t4*'t5*'t6*'t7*'tr)
65107
let (t1: 't1) = parse (g.[0])
66108
let (t2: 't2) = parse (g.[1])
@@ -72,29 +114,29 @@ module Parsing =
72114
let (tr: 'tr) = ParseArray.Invoke (g.[7..])
73115
Tuple<_,_,_,_,_,_,_,_> (t1, t2, t3, t4, t5, t6, t7, tr) |> retype : 't
74116

75-
static member inline ParseArray (_: unit , _: ParseArray) = fun (_: (string * string) []) -> ()
76-
static member inline ParseArray (_: Tuple<'t1> , _: ParseArray) = fun (g: (string * string) []) -> Tuple<_> (parse g.[0]) : Tuple<'t1>
77-
static member inline ParseArray (_: Id<'t1> , _: ParseArray) = fun (g: (string * string) []) -> Id<_> (parse g.[0])
78-
static member inline ParseArray (_: 't1*'t2 , _: ParseArray) = fun (g: (string * string) []) -> parse g.[0], parse g.[1]
79-
static member inline ParseArray (_: 't1*'t2'*'t3 , _: ParseArray) = fun (g: (string * string) []) -> parse g.[0], parse g.[1], parse g.[2]
80-
static member inline ParseArray (_: 't1*'t2'*'t3*'t4 , _: ParseArray) = fun (g: (string * string) []) -> parse g.[0], parse g.[1], parse g.[2], parse g.[3]
81-
static member inline ParseArray (_: 't1*'t2'*'t3*'t4*'t5 , _: ParseArray) = fun (g: (string * string) []) -> parse g.[0], parse g.[1], parse g.[2], parse g.[3], parse g.[4]
82-
static member inline ParseArray (_: 't1*'t2'*'t3*'t4*'t5*'t6 , _: ParseArray) = fun (g: (string * string) []) -> parse g.[0], parse g.[1], parse g.[2], parse g.[3], parse g.[4], parse g.[5]
83-
static member inline ParseArray (_: 't1*'t2'*'t3*'t4*'t5*'t6*'t7, _: ParseArray) = fun (g: (string * string) []) -> parse g.[0], parse g.[1], parse g.[2], parse g.[3], parse g.[4], parse g.[5], parse g.[6]
84-
85-
let inline private tryParseElemAt i (g: (string * string) []) =
117+
static member inline ParseArray (_: unit , _: ParseArray) = fun (_: struct(string * char) []) -> ()
118+
static member inline ParseArray (_: Tuple<'t1> , _: ParseArray) = fun (g: struct(string * char) []) -> Tuple<_> (parse g.[0]) : Tuple<'t1>
119+
static member inline ParseArray (_: Id<'t1> , _: ParseArray) = fun (g: struct(string * char) []) -> Id<_> (parse g.[0])
120+
static member inline ParseArray (_: 't1*'t2 , _: ParseArray) = fun (g: struct(string * char) []) -> parse g.[0], parse g.[1]
121+
static member inline ParseArray (_: 't1*'t2'*'t3 , _: ParseArray) = fun (g: struct(string * char) []) -> parse g.[0], parse g.[1], parse g.[2]
122+
static member inline ParseArray (_: 't1*'t2'*'t3*'t4 , _: ParseArray) = fun (g: struct(string * char) []) -> parse g.[0], parse g.[1], parse g.[2], parse g.[3]
123+
static member inline ParseArray (_: 't1*'t2'*'t3*'t4*'t5 , _: ParseArray) = fun (g: struct(string * char) []) -> parse g.[0], parse g.[1], parse g.[2], parse g.[3], parse g.[4]
124+
static member inline ParseArray (_: 't1*'t2'*'t3*'t4*'t5*'t6 , _: ParseArray) = fun (g: struct(string * char) []) -> parse g.[0], parse g.[1], parse g.[2], parse g.[3], parse g.[4], parse g.[5]
125+
static member inline ParseArray (_: 't1*'t2'*'t3*'t4*'t5*'t6*'t7, _: ParseArray) = fun (g: struct(string * char) []) -> parse g.[0], parse g.[1], parse g.[2], parse g.[3], parse g.[4], parse g.[5], parse g.[6]
126+
127+
let inline private tryParseElemAt i (g: struct(string * char) []) =
86128
if i < Array.length g then tryParse (g.[i])
87129
else None
88130

89131
type TryParseArray =
90-
static member inline TryParseArray (_:'t, _:obj) = fun (g: (string * string) []) -> tryParseElemAt 0 g : 't option
132+
static member inline TryParseArray (_:'t, _:obj) = fun (g: struct(string * char) []) -> tryParseElemAt 0 g : 't option
91133

92-
static member inline Invoke (g: (string * string) []) =
134+
static member inline Invoke (g: struct(string * char) []) =
93135
let inline call_2 (a: ^a, b: ^b) = ((^a or ^b) : (static member TryParseArray: _*_ -> _) b, a) g
94136
let inline call (a: 'a) = call_2 (a, Unchecked.defaultof<'r>) : 'r option
95137
call Unchecked.defaultof<TryParseArray>
96138

97-
static member inline TryParseArray (t: 't, _: TryParseArray) = fun (g: (string * string) []) ->
139+
static member inline TryParseArray (t: 't, _: TryParseArray) = fun (g: struct(string * char) []) ->
98140
let _f _ = Constraints.whenNestedTuple t : ('t1*'t2*'t3*'t4*'t5*'t6*'t7*'tr)
99141
let (t1: 't1 option) = tryParseElemAt 0 g
100142
let (t2: 't2 option) = tryParseElemAt 1 g
@@ -108,31 +150,31 @@ module Parsing =
108150
| Some t1, Some t2, Some t3, Some t4, Some t5, Some t6, Some t7, Some tr -> Some (Tuple<_,_,_,_,_,_,_,_> (t1, t2, t3, t4, t5, t6, t7, tr) |> retype : 't)
109151
| _ -> None
110152

111-
static member inline TryParseArray (_: unit , _: TryParseArray) = fun (_: (string * string) []) -> ()
112-
static member inline TryParseArray (_: Tuple<'t1> , _: TryParseArray) = fun (g: (string * string) []) -> Tuple<_> <!> tryParseElemAt 0 g : Tuple<'t1> option
113-
static member inline TryParseArray (_: Id<'t1> , _: TryParseArray) = fun (g: (string * string) []) -> Id<_> <!> tryParseElemAt 0 g
114-
static member inline TryParseArray (_: 't1*'t2 , _: TryParseArray) = fun (g: (string * string) []) -> tuple2 <!> tryParseElemAt 0 g <*> tryParseElemAt 1 g
115-
static member inline TryParseArray (_: 't1*'t2'*'t3 , _: TryParseArray) = fun (g: (string * string) []) -> tuple3 <!> tryParseElemAt 0 g <*> tryParseElemAt 1 g <*> tryParseElemAt 2 g
116-
static member inline TryParseArray (_: 't1*'t2'*'t3*'t4 , _: TryParseArray) = fun (g: (string * string) []) -> tuple4 <!> tryParseElemAt 0 g <*> tryParseElemAt 1 g <*> tryParseElemAt 2 g <*> tryParseElemAt 3 g
117-
static member inline TryParseArray (_: 't1*'t2'*'t3*'t4*'t5 , _: TryParseArray) = fun (g: (string * string) []) -> tuple5 <!> tryParseElemAt 0 g <*> tryParseElemAt 1 g <*> tryParseElemAt 2 g <*> tryParseElemAt 3 g <*> tryParseElemAt 4 g
118-
static member inline TryParseArray (_: 't1*'t2'*'t3*'t4*'t5*'t6 , _: TryParseArray) = fun (g: (string * string) []) -> tuple6 <!> tryParseElemAt 0 g <*> tryParseElemAt 1 g <*> tryParseElemAt 2 g <*> tryParseElemAt 3 g <*> tryParseElemAt 4 g <*> tryParseElemAt 5 g
119-
static member inline TryParseArray (_: 't1*'t2'*'t3*'t4*'t5*'t6*'t7, _: TryParseArray) = fun (g: (string * string) []) -> tuple7 <!> tryParseElemAt 0 g <*> tryParseElemAt 1 g <*> tryParseElemAt 2 g <*> tryParseElemAt 3 g <*> tryParseElemAt 4 g <*> tryParseElemAt 5 g <*> tryParseElemAt 6 g
153+
static member inline TryParseArray (_: unit , _: TryParseArray) = fun (_: struct(string * char) []) -> ()
154+
static member inline TryParseArray (_: Tuple<'t1> , _: TryParseArray) = fun (g: struct(string * char) []) -> Tuple<_> <!> tryParseElemAt 0 g : Tuple<'t1> option
155+
static member inline TryParseArray (_: Id<'t1> , _: TryParseArray) = fun (g: struct(string * char) []) -> Id<_> <!> tryParseElemAt 0 g
156+
static member inline TryParseArray (_: 't1*'t2 , _: TryParseArray) = fun (g: struct(string * char) []) -> tuple2 <!> tryParseElemAt 0 g <*> tryParseElemAt 1 g
157+
static member inline TryParseArray (_: 't1*'t2'*'t3 , _: TryParseArray) = fun (g: struct(string * char) []) -> tuple3 <!> tryParseElemAt 0 g <*> tryParseElemAt 1 g <*> tryParseElemAt 2 g
158+
static member inline TryParseArray (_: 't1*'t2'*'t3*'t4 , _: TryParseArray) = fun (g: struct(string * char) []) -> tuple4 <!> tryParseElemAt 0 g <*> tryParseElemAt 1 g <*> tryParseElemAt 2 g <*> tryParseElemAt 3 g
159+
static member inline TryParseArray (_: 't1*'t2'*'t3*'t4*'t5 , _: TryParseArray) = fun (g: struct(string * char) []) -> tuple5 <!> tryParseElemAt 0 g <*> tryParseElemAt 1 g <*> tryParseElemAt 2 g <*> tryParseElemAt 3 g <*> tryParseElemAt 4 g
160+
static member inline TryParseArray (_: 't1*'t2'*'t3*'t4*'t5*'t6 , _: TryParseArray) = fun (g: struct(string * char) []) -> tuple6 <!> tryParseElemAt 0 g <*> tryParseElemAt 1 g <*> tryParseElemAt 2 g <*> tryParseElemAt 3 g <*> tryParseElemAt 4 g <*> tryParseElemAt 5 g
161+
static member inline TryParseArray (_: 't1*'t2'*'t3*'t4*'t5*'t6*'t7, _: TryParseArray) = fun (g: struct(string * char) []) -> tuple7 <!> tryParseElemAt 0 g <*> tryParseElemAt 1 g <*> tryParseElemAt 2 g <*> tryParseElemAt 3 g <*> tryParseElemAt 4 g <*> tryParseElemAt 5 g <*> tryParseElemAt 6 g
120162

121163

122164
/// Gets a tuple with the result of parsing each element of a string array.
123-
let inline parseArray (source: string []) : '``(T1 * T2 * ... * Tn)`` = ParseArray.Invoke (Array.map (fun x -> (x, "")) source)
165+
let inline parseArray (source: string []) : '``(T1 * T2 * ... * Tn)`` = ParseArray.Invoke (Array.map (fun x -> (x, '\000')) source)
124166

125167
/// Gets a tuple with the result of parsing each element of a formatted text.
126-
let inline sscanf (pf: PrintfFormat<_,_,_,_,'``(T1 * T2 * ... * Tn)``>) s : '``(T1 * T2 * ... * Tn)`` = getGroups pf s |> ParseArray.Invoke
168+
let inline sscanf (pf: PrintfFormat<_,_,_,_,'``(T1 * T2 * ... * Tn)``>) : string -> '``(T1 * T2 * ... * Tn)`` = getGroups pf >> ParseArray.Invoke
127169

128170
/// Gets a tuple with the result of parsing each element of a formatted text from the Console.
129171
let inline scanfn pf : '``(T1 * T2 * ... * Tn)`` = sscanf pf (Console.ReadLine ())
130172

131173
/// Gets a tuple with the result of parsing each element of a string array. Returns None in case of failure.
132-
let inline tryParseArray (source: string []) : '``(T1 * T2 * ... * Tn)`` option = TryParseArray.Invoke (Array.map (fun x -> (x, "")) source)
174+
let inline tryParseArray (source: string []) : '``(T1 * T2 * ... * Tn)`` option = TryParseArray.Invoke (Array.map (fun x -> x, '\000') source)
133175

134176
/// Gets a tuple with the result of parsing each element of a formatted text. Returns None in case of failure.
135-
let inline trySscanf (pf: PrintfFormat<_,_,_,_,'``(T1 * T2 * ... * Tn)``>) s : '``(T1 * T2 * ... * Tn)`` option = getGroups pf s |> TryParseArray.Invoke
177+
let inline trySscanf (pf: PrintfFormat<_,_,_,_,'``(T1 * T2 * ... * Tn)``>) : string -> '``(T1 * T2 * ... * Tn)`` option = getGroups pf >> TryParseArray.Invoke
136178

137179
/// Gets a tuple with the result of parsing each element of a formatted text from the Console. Returns None in case of failure.
138180
let inline tryScanfn pf : '``(T1 * T2 * ... * Tn)`` option = trySscanf pf (Console.ReadLine ())

0 commit comments

Comments
 (0)