Skip to content

Commit 6458ccd

Browse files
committed
Add mdx_esm test suite.
1 parent 04a52a5 commit 6458ccd

File tree

62 files changed

+3041
-277
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+3041
-277
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
%%%-----------------------------------------------------------------------------
2+
%%% Copyright (c) Meta Platforms, Inc. and affiliates.
3+
%%% Copyright (c) WhatsApp LLC
4+
%%%
5+
%%% This source code is licensed under the MIT license found in the
6+
%%% LICENSE.md file in the root directory of this source tree.
7+
%%%
8+
%%% @author Andrew Bennett <[email protected]>
9+
%%% @copyright (c) Meta Platforms, Inc. and affiliates.
10+
%%% @doc
11+
%%%
12+
%%% @end
13+
%%% Created : 10 Oct 2025 by Andrew Bennett <[email protected]>
14+
%%%-----------------------------------------------------------------------------
15+
%%% % @format
16+
%% @oncall whatsapp_clr
17+
-ifndef(MARKDOWN_MDX_HRL).
18+
-define(MARKDOWN_MDX_HRL, 1).
19+
20+
%% Collect info for MDX.
21+
-record(markdown_mdx_collect_result, {
22+
value :: unicode:unicode_binary(),
23+
stops :: markdown_vec:t(markdown_stop:t())
24+
}).
25+
26+
%% Signal used as feedback when parsing MDX ESM/expressions.
27+
-record(markdown_mdx_signal, {
28+
inner :: markdown_mdx_signal:inner()
29+
}).
30+
31+
-endif.

apps/markdown/include/markdown_parser.hrl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1322,6 +1322,12 @@
13221322
content :: markdown_types:option(markdown_event:content())
13231323
}).
13241324

1325+
%% Relative byte index into a string, to an absolute byte index into the whole document.
1326+
-record(markdown_stop, {
1327+
relative :: non_neg_integer(),
1328+
absolute :: non_neg_integer()
1329+
}).
1330+
13251331
-record(markdown_subresult, {
13261332
done = false :: boolean(),
13271333
gfm_footnote_definitions = markdown_vec:new() :: markdown_vec:t(unicode:unicode_binary()),

apps/markdown/include/markdown_util.hrl

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,6 @@
1717
-ifndef(MARKDOWN_UTIL_HRL).
1818
-define(MARKDOWN_UTIL_HRL, 1).
1919

20-
% Deal with positions in a file.
21-
-record(markdown_util_location, {
22-
indices :: array:array(markdown_unist_point:offset())
23-
}).
24-
2520
-define('format!'(Fmt, Args), markdown_types:unicode_binary(lists:flatten(io_lib:format(Fmt, Args)))).
2621
-define('unreachable!'(Fmt, Args), erlang:error(unreachable, [?'format!'(Fmt, Args)])).
2722
-define('vec!'(List), markdown_vec:from_list(List)).

apps/markdown/src/construct/markdown_construct_mdx_esm.erl

Lines changed: 209 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,21 @@ It can include blank lines if [`MdxEsmParse`][crate::MdxEsmParse] passed in
4747
-oncall("whatsapp_clr").
4848

4949
-include_lib("markdown/include/markdown_const.hrl").
50+
-include_lib("markdown/include/markdown_mdx.hrl").
5051
-include_lib("markdown/include/markdown_parser.hrl").
5152
-include_lib("markdown/include/markdown_vec.hrl").
5253

54+
-include_lib("stdlib/include/assert.hrl").
55+
5356
%% API
5457
-export([
55-
start/1
58+
start/1,
59+
word/1,
60+
inside/1,
61+
line_start/1,
62+
continuation_start/1,
63+
blank_line_before/1,
64+
at_end/1
5665
]).
5766

5867
%%%=============================================================================
@@ -65,7 +74,7 @@ Start of MDX ESM.
6574
```markdown
6675
> | import a from 'b'
6776
^
68-
\```
77+
```
6978
""".
7079
-spec start(Tokenizer) -> {Tokenizer, State} when Tokenizer :: markdown_tokenizer:t(), State :: markdown_state:t().
7180
start(
@@ -99,3 +108,201 @@ start(
99108
start(Tokenizer = #markdown_tokenizer{}) ->
100109
State = markdown_state:nok(),
101110
{Tokenizer, State}.
111+
112+
-doc """
113+
In keyword.
114+
115+
```markdown
116+
> | import a from 'b'
117+
^^^^^^
118+
```
119+
""".
120+
-spec word(Tokenizer) -> {Tokenizer, State} when Tokenizer :: markdown_tokenizer:t(), State :: markdown_state:t().
121+
word(Tokenizer1 = #markdown_tokenizer{current = {some, Current}}) when Current >= $a andalso Current =< $z ->
122+
Tokenizer2 = markdown_tokenizer:consume(Tokenizer1),
123+
State = markdown_state:next(mdx_esm_word),
124+
{Tokenizer2, State};
125+
word(
126+
Tokenizer1 = #markdown_tokenizer{
127+
current = Current,
128+
parse_state = #markdown_parse_state{bytes = Bytes},
129+
point = #markdown_point{offset = Index},
130+
tokenize_state = TokenizeState1 = #markdown_tokenize_state{start = Start}
131+
}
132+
) ->
133+
Slice = markdown_slice:from_indices(Bytes, Start, Index),
134+
SliceBytes = markdown_slice:as_binary(Slice),
135+
case (SliceBytes =:= <<"export">> orelse SliceBytes =:= <<"import">>) andalso Current =:= {some, $\s} of
136+
true ->
137+
Tokenizer2 = Tokenizer1#markdown_tokenizer{concrete = true},
138+
EventsLen = markdown_vec:size(Tokenizer2#markdown_tokenizer.events),
139+
TokenizeState2 = TokenizeState1#markdown_tokenize_state{start = EventsLen - 1},
140+
Tokenizer3 = Tokenizer2#markdown_tokenizer{tokenize_state = TokenizeState2},
141+
Tokenizer4 = markdown_tokenizer:consume(Tokenizer3),
142+
State = markdown_state:next(mdx_esm_inside),
143+
{Tokenizer4, State};
144+
false ->
145+
TokenizeState2 = TokenizeState1#markdown_tokenize_state{start = 0},
146+
Tokenizer2 = Tokenizer1#markdown_tokenizer{tokenize_state = TokenizeState2},
147+
State = markdown_state:nok(),
148+
{Tokenizer2, State}
149+
end.
150+
151+
-doc """
152+
In data.
153+
154+
```markdown
155+
> | import a from 'b'
156+
^
157+
```
158+
""".
159+
-spec inside(Tokenizer) -> {Tokenizer, State} when Tokenizer :: markdown_tokenizer:t(), State :: markdown_state:t().
160+
inside(Tokenizer1 = #markdown_tokenizer{current = Current}) when Current =:= none orelse Current =:= {some, $\n} ->
161+
Tokenizer2 = markdown_tokenizer:exit(Tokenizer1, mdx_esm_data),
162+
State = markdown_state:retry(mdx_esm_line_start),
163+
{Tokenizer2, State};
164+
inside(Tokenizer1 = #markdown_tokenizer{}) ->
165+
Tokenizer2 = markdown_tokenizer:consume(Tokenizer1),
166+
State = markdown_state:next(mdx_esm_inside),
167+
{Tokenizer2, State}.
168+
169+
-doc """
170+
At start of line.
171+
172+
```markdown
173+
| import a from 'b'
174+
> | export {a}
175+
^
176+
```
177+
""".
178+
-spec line_start(Tokenizer) -> {Tokenizer, State} when Tokenizer :: markdown_tokenizer:t(), State :: markdown_state:t().
179+
line_start(Tokenizer1 = #markdown_tokenizer{current = none}) ->
180+
State = markdown_state:retry(mdx_esm_at_end),
181+
{Tokenizer1, State};
182+
line_start(Tokenizer1 = #markdown_tokenizer{current = {some, $\n}}) ->
183+
OkState = markdown_state:next(mdx_esm_at_end),
184+
NokState = markdown_state:next(mdx_esm_continuation_start),
185+
Tokenizer2 = markdown_tokenizer:check(Tokenizer1, OkState, NokState),
186+
State = markdown_state:retry(mdx_esm_blank_line_before),
187+
{Tokenizer2, State};
188+
line_start(Tokenizer1 = #markdown_tokenizer{}) ->
189+
Tokenizer2 = markdown_tokenizer:enter(Tokenizer1, mdx_esm_data),
190+
Tokenizer3 = markdown_tokenizer:consume(Tokenizer2),
191+
State = markdown_state:next(mdx_esm_inside),
192+
{Tokenizer3, State}.
193+
194+
-doc """
195+
At start of line that continues.
196+
197+
```markdown
198+
| import a from 'b'
199+
> | export {a}
200+
^
201+
```
202+
""".
203+
-spec continuation_start(Tokenizer) -> {Tokenizer, State} when
204+
Tokenizer :: markdown_tokenizer:t(), State :: markdown_state:t().
205+
continuation_start(Tokenizer1 = #markdown_tokenizer{}) ->
206+
Tokenizer2 = markdown_tokenizer:enter(Tokenizer1, line_ending),
207+
Tokenizer3 = markdown_tokenizer:consume(Tokenizer2),
208+
Tokenizer4 = markdown_tokenizer:exit(Tokenizer3, line_ending),
209+
State = markdown_state:next(mdx_esm_line_start),
210+
{Tokenizer4, State}.
211+
212+
-doc """
213+
At start of a potentially blank line.
214+
215+
```markdown
216+
| import a from 'b'
217+
> | export {a}
218+
^
219+
```
220+
""".
221+
-spec blank_line_before(Tokenizer) -> {Tokenizer, State} when
222+
Tokenizer :: markdown_tokenizer:t(), State :: markdown_state:t().
223+
blank_line_before(Tokenizer1 = #markdown_tokenizer{}) ->
224+
Tokenizer2 = markdown_tokenizer:enter(Tokenizer1, line_ending),
225+
Tokenizer3 = markdown_tokenizer:consume(Tokenizer2),
226+
Tokenizer4 = markdown_tokenizer:exit(Tokenizer3, line_ending),
227+
State = markdown_state:next(blank_line_start),
228+
{Tokenizer4, State}.
229+
230+
-doc """
231+
At end of line (blank or eof).
232+
233+
```markdown
234+
> | import a from 'b'
235+
^
236+
```
237+
""".
238+
-spec at_end(Tokenizer) -> {Tokenizer, State} when Tokenizer :: markdown_tokenizer:t(), State :: markdown_state:t().
239+
at_end(Tokenizer1 = #markdown_tokenizer{}) ->
240+
{Tokenizer2, Result} = parse_esm(Tokenizer1),
241+
case Result of
242+
ok ->
243+
Tokenizer3 = Tokenizer2#markdown_tokenizer{concrete = false},
244+
Tokenizer4 = markdown_tokenizer:exit(Tokenizer3, mdx_esm),
245+
State = markdown_state:ok(),
246+
{Tokenizer4, State};
247+
_ ->
248+
{Tokenizer2, Result}
249+
end.
250+
251+
%%%-----------------------------------------------------------------------------
252+
%%% Internal functions
253+
%%%-----------------------------------------------------------------------------
254+
255+
%% @private
256+
-doc """
257+
Parse ESM with a given function.
258+
""".
259+
-spec parse_esm(Tokenizer) -> {Tokenizer, State} when Tokenizer :: markdown_tokenizer:t(), State :: markdown_state:t().
260+
parse_esm(
261+
Tokenizer1 = #markdown_tokenizer{
262+
current = Current,
263+
events = Events,
264+
parse_state = #markdown_parse_state{
265+
bytes = Bytes, location = OptionLocation, options = #markdown_parse_options{mdx_esm_parse = {some, Parse}}
266+
},
267+
point = Point,
268+
tokenize_state = #markdown_tokenize_state{start = Start}
269+
}
270+
) when is_function(Parse, 1) ->
271+
%% Collect the body of the ESM and positional info for each run of it.
272+
CollectResult = markdown_mdx_collect:collect(Events, Bytes, Start, [mdx_esm_data, line_ending], []),
273+
CollectValue = CollectResult#markdown_mdx_collect_result.value,
274+
CollectStops = CollectResult#markdown_mdx_collect_result.stops,
275+
%% Parse and handle what was signaled back.
276+
case Parse(CollectValue) of
277+
#markdown_mdx_signal{inner = ok} ->
278+
State = markdown_state:ok(),
279+
{Tokenizer1, State};
280+
#markdown_mdx_signal{inner = {error, Message, Relative, Source, RuleId}} ->
281+
%% BEGIN: assertions
282+
?assertMatch({some, _}, OptionLocation, "expected location index if aware mdx is on"),
283+
%% END: assertions
284+
{some, Location} = OptionLocation,
285+
OptionRelativePoint = markdown_location:relative_to_point(Location, CollectStops, Relative),
286+
%% BEGIN: assertions
287+
?assertMatch({some, _}, OptionRelativePoint, "expected non-empty string"),
288+
%% END: assertions
289+
{some, RelativePoint} = OptionRelativePoint,
290+
Place = markdown_place:point(RelativePoint),
291+
State = markdown_state:error(markdown_message:new({some, Place}, Message, Source, RuleId)),
292+
{Tokenizer1, State};
293+
#markdown_mdx_signal{inner = {eof, Message, Source, RuleId}} ->
294+
case Current of
295+
none ->
296+
Place = markdown_place:point(markdown_point:to_unist(Point)),
297+
State = markdown_state:error(markdown_message:new({some, Place}, Message, Source, RuleId)),
298+
{Tokenizer1, State};
299+
{some, _} ->
300+
TokenizeState1 = Tokenizer1#markdown_tokenizer.tokenize_state,
301+
TokenizeState2 = TokenizeState1#markdown_tokenize_state{
302+
mdx_last_parse_error = {some, {Message, Source, RuleId}}
303+
},
304+
Tokenizer2 = Tokenizer1#markdown_tokenizer{tokenize_state = TokenizeState2},
305+
State = markdown_state:retry(mdx_esm_continuation_start),
306+
{Tokenizer2, State}
307+
end
308+
end.

apps/markdown/src/construct/markdown_construct_mdx_expression_flow.erl

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,9 @@ See [`mdx_expression`][mdx_expression] for recommendations.
5757
%% API
5858
-export([
5959
start/1,
60-
before/1
60+
before/1,
61+
'after'/1,
62+
'end'/1
6163
]).
6264

6365
%%%=============================================================================
@@ -135,3 +137,74 @@ before(Tokenizer1 = #markdown_tokenizer{current = {some, Current}}) when Current
135137
before(Tokenizer = #markdown_tokenizer{}) ->
136138
State = markdown_state:nok(),
137139
{Tokenizer, State}.
140+
141+
-doc """
142+
After expression.
143+
144+
```markdown
145+
> | {Math.PI}
146+
^
147+
```
148+
""".
149+
-spec 'after'(Tokenizer) -> {Tokenizer, State} when Tokenizer :: markdown_tokenizer:t(), State :: markdown_state:t().
150+
'after'(Tokenizer1 = #markdown_tokenizer{current = {some, Current}}) when Current =:= $\t orelse Current =:= $\s ->
151+
OkState = markdown_state:next(mdx_expression_flow_end),
152+
NokState = markdown_state:nok(),
153+
Tokenizer2 = markdown_tokenizer:attempt(Tokenizer1, OkState, NokState),
154+
{Tokenizer3, SpaceOrTabState} = markdown_construct_partial_space_or_tab:space_or_tab(Tokenizer2),
155+
State = markdown_state:retry(SpaceOrTabState),
156+
{Tokenizer3, State};
157+
'after'(Tokenizer1 = #markdown_tokenizer{}) ->
158+
State = markdown_state:retry(mdx_expression_flow_end),
159+
{Tokenizer1, State}.
160+
161+
-doc """
162+
After expression, after optional whitespace.
163+
164+
```markdown
165+
> | {Math.PI}␠␊
166+
^
167+
```
168+
""".
169+
-spec 'end'(Tokenizer) -> {Tokenizer, State} when Tokenizer :: markdown_tokenizer:t(), State :: markdown_state:t().
170+
'end'(Tokenizer1 = #markdown_tokenizer{current = Current}) when Current =:= none orelse Current =:= {some, $\n} ->
171+
Tokenizer2 = reset(Tokenizer1),
172+
State = markdown_state:ok(),
173+
{Tokenizer2, State};
174+
'end'(
175+
Tokenizer1 = #markdown_tokenizer{
176+
current = {some, $<},
177+
parse_state = #markdown_parse_state{
178+
options = #markdown_parse_options{constructs = #markdown_construct_options{mdx_jsx_flow = true}}
179+
},
180+
tokenize_state = TokenizeState1
181+
}
182+
) ->
183+
%% Tag.
184+
%% We can't just say: fine.
185+
%% Lines of blocks have to be parsed until an eol/eof.
186+
TokenizeState2 = TokenizeState1#markdown_tokenize_state{token_1 = mdx_jsx_flow_tag},
187+
Tokenizer2 = Tokenizer1#markdown_tokenizer{tokenize_state = TokenizeState2},
188+
OkState = markdown_state:next(mdx_jsx_flow_after),
189+
NokState = markdown_state:next(mdx_jsx_flow_nok),
190+
Tokenizer3 = markdown_tokenizer:attempt(Tokenizer2, OkState, NokState),
191+
State = markdown_state:retry(mdx_jsx_start),
192+
{Tokenizer3, State};
193+
'end'(Tokenizer1 = #markdown_tokenizer{}) ->
194+
Tokenizer2 = reset(Tokenizer1),
195+
State = markdown_state:nok(),
196+
{Tokenizer2, State}.
197+
198+
%%%-----------------------------------------------------------------------------
199+
%%% Internal functions
200+
%%%-----------------------------------------------------------------------------
201+
202+
%% @private
203+
-doc """
204+
Reset state.
205+
""".
206+
-spec reset(Tokenizer) -> Tokenizer when Tokenizer :: markdown_tokenizer:t().
207+
reset(Tokenizer1 = #markdown_tokenizer{tokenize_state = TokenizeState1}) ->
208+
TokenizeState2 = TokenizeState1#markdown_tokenize_state{token_1 = data},
209+
Tokenizer2 = Tokenizer1#markdown_tokenizer{concrete = false, tokenize_state = TokenizeState2},
210+
Tokenizer2.

0 commit comments

Comments
 (0)