|
1926 | 1926 | #?@(:cljs [IPrintWithWriter (-pr-writer [this writer opts] (pr-writer-schema this writer opts))])))) |
1927 | 1927 | #?@(:cljs [IPrintWithWriter (-pr-writer [this writer opts] (pr-writer-into-schema this writer opts))])))) |
1928 | 1928 |
|
| 1929 | +(comment |
| 1930 | +;; # Problem |
| 1931 | +;; |
| 1932 | +;; :ref validators don't "tie the knot" by compiling a truly recursive fn. |
| 1933 | +;; |
| 1934 | +;; Instead, they lazily compile each level of recursion as needed and cache the validator for each level |
| 1935 | +;; using a strong reference. |
| 1936 | +;; |
| 1937 | +;; There are two problems with this approach: |
| 1938 | +;; 1. memory consumption is unbounded and dynamically determined based on the depth of the input, |
| 1939 | +;; since we permanently extend the memoization cache to compile each level of ref-recursion. |
| 1940 | +;; 2. runtime performance of the validators themselves are hampered since you can never "fully" |
| 1941 | +;; compile a recursive validator. Concretely, calls to `-validator` will be triggered _during_ validation. |
| 1942 | +;; |
| 1943 | +;; For example, here's a recursive schema. |
| 1944 | +;; |
| 1945 | + |
| 1946 | +(def ConsCell [:schema {:registry {::cons [:maybe [:tuple int? |
| 1947 | + [:ref ::cons]]]}} |
| 1948 | + ::cons]) |
| 1949 | + |
| 1950 | +;; Calling `m/validator` on this does no compilation. |
| 1951 | +;; |
| 1952 | + |
| 1953 | +(m/validator ConsCell) |
| 1954 | + |
| 1955 | +;; |
| 1956 | +;; Instead it calls (-memoize (fn [] (-validator (rf)))), which simply waits for the validator to be called |
| 1957 | +;; before compiling the validator: |
| 1958 | +;; |
| 1959 | +;; ((m/validator ConsCell) [1 nil]) ;; compiles and caches 1 validator |
| 1960 | +;; |
| 1961 | +;; psuedo-code of compiled validator at this point |
| 1962 | + |
| 1963 | +(fn [x] (or (nil? x) |
| 1964 | + (and (vector? x) |
| 1965 | + (= 2 (count x)) |
| 1966 | + (int? (nth x 0)) |
| 1967 | + (let [x (nth x 1)] |
| 1968 | + (or (nil? x) |
| 1969 | + (vector? x) |
| 1970 | + (= 2 (count x)) |
| 1971 | + (int? (nth x 0)) |
| 1972 | + ;; lazy compilation |
| 1973 | + ((m/validator ConsCell) (nth x 1))))))) |
| 1974 | + |
| 1975 | +;; This happens for each new depth of validation |
| 1976 | +;; |
| 1977 | +((m/validator ConsCell) [1 [2 [3 nil]]]) ;; compiles and caches 3 validators |
| 1978 | + |
| 1979 | +;; psuedo-code of compiled validator at this point |
| 1980 | + |
| 1981 | +(fn [x] (or (nil? x) ;; first recursion |
| 1982 | + (and (vector? x) |
| 1983 | + (= 2 (count x)) |
| 1984 | + (int? (nth x 0)) |
| 1985 | + (let [x (nth x 1)] |
| 1986 | + (or (nil? x) ;; second recursion |
| 1987 | + (vector? x) |
| 1988 | + (= 2 (count x)) |
| 1989 | + (int? (nth x 0)) |
| 1990 | + (let [x (nth x 1)] |
| 1991 | + (or (nil? x) ;; third recursion |
| 1992 | + (vector? x) |
| 1993 | + (= 2 (count x)) |
| 1994 | + (int? (nth x 0)) |
| 1995 | + ;; lazy compilation |
| 1996 | + ((m/validator ConsCell) (nth x 1))))))))) |
| 1997 | + |
| 1998 | +;; # Goals |
| 1999 | +;; |
| 2000 | +;; This PR changes how validators are compiled by detecting recursion using |
| 2001 | +;; -identify-ref-schema. This function helps us reliably detect ref cycles even in |
| 2002 | +;; the presence of dynamic scope (malli's scoping approach for refs) by only |
| 2003 | +;; recognizing a ref cycle if the same name _and scope_ are seen. |
| 2004 | +;; |
| 2005 | +;; This puts us on equal footing with Plumatic Schema's validator compilation. |
| 2006 | +;; It is trivial for Schema to detect cycles since they use _global_ scope for |
| 2007 | +;; schemas. If they see the same schema again, they can simply reuse the same |
| 2008 | +;; validator they were already compiling to "tie the knot" into a recursive validation. |
| 2009 | +;; |
| 2010 | +;; The goal and purpose of this PR is for ConsCell to compile its validator up-front and once-and-for-all: |
| 2011 | + |
| 2012 | +(let [rec (volatile! nil) |
| 2013 | + f (fn [x] (or (nil? x) |
| 2014 | + (and (vector? x) |
| 2015 | + (= 2 (count x)) |
| 2016 | + (int? (nth x 0)) |
| 2017 | + (@rec (nth x 1)))))] |
| 2018 | + (vreset! rec f) |
| 2019 | + f) |
| 2020 | + |
| 2021 | +;; This validator does no compilation at runtime and has constant memory usage. |
| 2022 | +;; |
| 2023 | +;; # Implementation |
| 2024 | +;; |
| 2025 | +;; There are two kinds of refs in malli: lazy and "eager" (or just normal). Lazy ref schemas |
| 2026 | +;; realize their child schema lazily, and eager refs know them upfront. The essential difference |
| 2027 | +;; is apparent in their initial validators (see `rf` in the :ref impl): |
| 2028 | + |
| 2029 | +;; lazy refs only realize their child when the validator is called |
| 2030 | +(-memoize (fn [] (schema (mr/-schema (-registry options) ref) options))) |
| 2031 | +;; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| 2032 | + |
| 2033 | +;; eager refs realize their child when compiling their validators |
| 2034 | +(when-let [s (mr/-schema (-registry options) ref)] (-memoize (fn [] (schema s options)))) |
| 2035 | +;; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| 2036 | +;; |
| 2037 | +;; Because of this essential difference, validators for eager refs can be fully compiled, but |
| 2038 | +;; lazy refs must <START FROM HERE, WE CAN FIX THIS> |
| 2039 | +;; |
| 2040 | +;; ## -validator doesn't take opts |
| 2041 | +;; |
| 2042 | +;; A key |
| 2043 | +;; |
| 2044 | +;; ## Eager refs |
| 2045 | +;; |
| 2046 | +;; Implementation: |
| 2047 | + |
| 2048 | +(-validator [this] |
| 2049 | + (let [ref-validators *ref-validators* |
| 2050 | + id (-identify-ref-schema this)] |
| 2051 | + (if-some [vol (ref-validators id)] |
| 2052 | + #(@vol %) |
| 2053 | + (let [vol (volatile! nil) |
| 2054 | + s (or (when-let [s (mr/-schema (-registry options) ref)] |
| 2055 | + (schema s options)) |
| 2056 | + (when-not allow-invalid-refs |
| 2057 | + (-fail! ::invalid-ref {:type :ref, :ref ref}))) |
| 2058 | + f (binding [*ref-validators* (assoc ref-validators id vol)] |
| 2059 | + (-validator s))] |
| 2060 | + (vreset! vol f) |
| 2061 | + f)))) |
| 2062 | + |
| 2063 | + |
| 2064 | +) ;; DOC END |
| 2065 | + |
1929 | 2066 | ;; returns an identifier for the :ref schema in the context of its dynamic scope. |
1930 | 2067 | ;; useful for detecting cycles. |
1931 | 2068 | (defn -identify-ref-schema [schema] |
|
0 commit comments