Skip to content

Commit 45b2dbd

Browse files
committed
Fixed the missing LineInfo of PlainText in Paragraph.
1 parent 913f0c4 commit 45b2dbd

File tree

5 files changed

+100
-30
lines changed

5 files changed

+100
-30
lines changed

MwParserFromScratch/MwParserFromScratch.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
<PackageLicenseUrl>http://www.apache.org/licenses/LICENSE-2.0</PackageLicenseUrl>
1212
<PackageProjectUrl>https://github.com/CXuesong/MwParserFromScratch</PackageProjectUrl>
1313
<Authors>CXuesong</Authors>
14-
<Version>0.1.3</Version>
14+
<Version>0.1.4</Version>
1515
<PackageTags>MediaWiki Wikitext Parser</PackageTags>
1616
<RepositoryUrl>https://github.com/CXuesong/MwParserFromScratch</RepositoryUrl>
1717
<RepositoryType>Git</RepositoryType>

MwParserFromScratch/Nodes/Node.cs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ public IEnumerable<Node> EnumDescendants()
211211
/// <summary>
212212
/// The parent node.
213213
/// </summary>
214+
[DebuggerBrowsable(DebuggerBrowsableState.Never)]
214215
internal INodeCollection ParentCollection { get; set; }
215216

216217
/// <summary>
@@ -292,6 +293,9 @@ internal void Detach(Node node)
292293
/// <inheritdoc />
293294
bool IWikitextLineInfo.HasLineInfo() => Annotation<LineInfoAnnotation>() != null;
294295

296+
internal bool HasLineInfo
297+
=> Annotation<LineInfoAnnotation>() != null;
298+
295299
internal void SetLineInfo(int lineNumber, int linePosition, int start, int length)
296300
{
297301
Debug.Assert(lineNumber > 0);
@@ -360,7 +364,7 @@ public string ToPlainText()
360364
/// </summary>
361365
public abstract string ToPlainText(NodePlainTextOptions options);
362366

363-
private class LineInfoAnnotation
367+
internal class LineInfoAnnotation
364368
{
365369
internal readonly int LineNumber;
366370
internal readonly int LinePosition;

MwParserFromScratch/Nodes/Wikitext.cs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System;
22
using System.Collections.Generic;
3+
using System.Diagnostics;
34
using System.Linq;
45
using System.Text;
56
using System.Text.RegularExpressions;
@@ -103,6 +104,26 @@ public PlainText Append(string text)
103104
return pt;
104105
}
105106

107+
internal PlainText AppendWithLineInfo(string text, int position, int length, int lineNumber, int linePosition)
108+
{
109+
Debug.Assert(text != null);
110+
Debug.Assert(length == text.Length);
111+
var pt = Inlines.LastNode as PlainText;
112+
if (pt == null)
113+
{
114+
Inlines.Add(pt = new PlainText(text));
115+
pt.SetLineInfo(lineNumber, linePosition, position, length);
116+
}
117+
else
118+
{
119+
if (text.Length == 0) return pt; // ExtendLineInfo won't accept (0)
120+
pt.Content += text;
121+
pt.ExtendLineInfo(length);
122+
}
123+
if (length > 0) ExtendLineInfo(length);
124+
return pt;
125+
}
126+
106127
/// <summary>
107128
/// Enumerates the children of this node.
108129
/// </summary>

MwParserFromScratch/WikitextParser.Basic.cs

Lines changed: 59 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ private LineNode ParseLine(LineNode lastLine)
6060
ParseStart(@"\n", false); // We want to set a terminator, so we need to call ParseStart
6161
// LIST_ITEM / HEADING automatically closes the previous PARAGRAPH
6262
var node = ParseListItem() ?? ParseHeading() ?? ParseCompactParagraph(lastLine);
63+
if (lastLine?.Inlines.LastNode is PlainText pt && pt.Content.Length == 0)
64+
{
65+
// This can happen because we appended a PlainText("") at (A) in ParseLineEnd
66+
pt.Remove();
67+
}
6368
if (node != null)
6469
Accept();
6570
else
@@ -91,10 +96,9 @@ private LineNode ParseLineEnd(LineNode lastNode)
9196
// abc\n\s*?\n TERM P[|abc|]PC[||]
9297
// Note that MediaWiki editor will automatically trim the trailing whitespaces,
9398
// leaving a \n after the content. This one \n will be removed when the page is transcluded.
94-
99+
var lastLinePosition = linePosition;
95100
// Here we consume a \n without fallback.
96-
if (ConsumeToken(@"\n") == null)
97-
return null;
101+
if (ConsumeToken(@"\n") == null) return null;
98102
ParseStart();
99103
// Whitespaces between 2 \n, assuming there's a second \n or TERM after trailingWs
100104
var trailingWs = ConsumeToken(@"[\f\r\t\v\x85\p{Z}]+");
@@ -104,50 +108,74 @@ private LineNode ParseLineEnd(LineNode lastNode)
104108
// Already consumed a \n, attempt to consume another \n
105109
if (ConsumeToken(@"\n") != null)
106110
{
107-
// 2 Line breaks received.
108111
// Close the last paragraph.
109-
unclosedParagraph.Append("\n" + trailingWs);
110-
unclosedParagraph.ExtendLineInfo(position - CurrentContext.StartingPosition);
111-
// Note here TERM excludes \n
112+
unclosedParagraph.AppendWithLineInfo("\n" + trailingWs,
113+
// don't forget the position of leading '\n'
114+
CurrentContext.StartingPosition - 1, position - CurrentContext.StartingPosition,
115+
CurrentContext.StartingLineNumber - 1, lastLinePosition);
116+
// 2 Line breaks received.
117+
// Check for the special case. Note here TERM excludes \n
112118
if (NeedsTerminate(Terminator.Get(@"\n")))
113119
{
114120
// This is a special case.
115-
// abc\n trailingWs \n TERM --> P[|abc\ntrailingWs|]PC[||]
121+
// abc \n trailingWs \n TERM --> P[|abc\ntrailingWs|]PC[||]
122+
// ^ We are here.
116123
// When the function returns, WIKITEXT parsing will stop
117124
// because a TERM will be received.
118125
// We need to correct this.
119126
var anotherparagraph = new Paragraph();
120-
return ParseSuccessful(anotherparagraph);
127+
anotherparagraph.SetLineInfo(lineNumber, linePosition, position, 0);
128+
return ParseSuccessful(anotherparagraph, false);
121129
}
122-
// After the paragraph, more content incoming.
123-
// abc\n trailingWs \n def
130+
// The last paragraph will be closed now.
124131
return ParseSuccessful(EMPTY_LINE_NODE, false);
125132
}
126133
// The attempt to consume the 2nd \n failed.
127-
// We're still after the whitespaces after the 1st \n .
128134
if (NeedsTerminate())
129135
{
130-
// abc \n TERM P[|abc|]
131-
// Still need to close the paragraph.
132-
unclosedParagraph.Append("\n" + trailingWs);
133-
unclosedParagraph.ExtendLineInfo(1 + position - CurrentContext.StartingPosition);
136+
// abc \n trailingWs TERM P[|abc|]
137+
// ^ We are here.
138+
// If we need to terminate, then close the last paragraph.
139+
unclosedParagraph.AppendWithLineInfo("\n" + trailingWs,
140+
// don't forget the position of leading '\n'
141+
CurrentContext.StartingPosition - 1, position - CurrentContext.StartingPosition + 1,
142+
CurrentContext.StartingLineNumber - 1, lastLinePosition);
134143
return ParseSuccessful(EMPTY_LINE_NODE, false);
135144
}
145+
// The last paragraph is still not closed (i.e. compact paragraph).
146+
// (A)
147+
// Note here we have still consumed the first '\n', while the last paragraph has no trailing '\n'.
148+
// For continued PlainText, we will add a '\n' in ParseCompactParagraph.
149+
// Add an empty node so ParseCompactParagraph can add a '\n' with LineInfo.
150+
unclosedParagraph.AppendWithLineInfo("", CurrentContext.StartingPosition - 1, 0,
151+
CurrentContext.StartingLineNumber - 1, lastLinePosition);
152+
// Fallback so we can either continue parsing PlainText,
153+
// or discover the next, for example, Heading, and leave the last paragraph compact.
154+
Fallback();
155+
return EMPTY_LINE_NODE;
136156
}
137157
else
138158
{
139-
// Last node cannot be a closed paragrap.
159+
// Last node cannot be a closed paragraph.
140160
// It can't because ParseLineEnd is invoked immediately after a last node is parsed,
141161
// and only ParseLineEnd can close a paragraph.
142162
Debug.Assert(!(lastNode is Paragraph), "Last node cannot be a closed paragraph.");
143163
// Rather, last node is LINE node of other type (LIST_ITEM/HEADING).
144-
// Remember we've consumed a \n , and the spaces after it in this function.
164+
// Remember we've already consumed a '\n' , and the spaces after it.
165+
// The situation here is just like the "special case" mentioned above.
145166
if (NeedsTerminate(Terminator.Get(@"\n")))
146167
{
147-
// abc \n TERM --> [|abc|] PC[||]
168+
// abc \n WHITE_SPACE TERM --> [|abc|] PC[|WHITE_SPACE|]
169+
// ^ CurCntxt ^ We are here now.
148170
// Note here TERM excludes \n
149171
var anotherparagraph = new Paragraph();
150-
if (trailingWs != null) anotherparagraph.Append(trailingWs);
172+
if (trailingWs != null)
173+
{
174+
var pt = new PlainText(trailingWs);
175+
// Actually the same as what we do in ParseSuccessful.
176+
pt.SetLineInfo(CurrentContext.StartingLineNumber, CurrentContext.StartingLinePosition,
177+
CurrentContext.StartingPosition, position - CurrentContext.StartingPosition);
178+
}
151179
return ParseSuccessful(anotherparagraph);
152180
}
153181
}
@@ -277,21 +305,23 @@ private LineNode ParseCompactParagraph(LineNode lastNode)
277305
if (mergeTo != null && !mergeTo.Compact) mergeTo = null;
278306
// Create a new paragraph, or merge the new line to the last unclosed paragraph.
279307
ParseStart();
280-
mergeTo?.Append("\n");
308+
if (mergeTo != null)
309+
{
310+
var paraTail = (PlainText) mergeTo.Inlines.LastNode;
311+
paraTail.Content += "\n";
312+
paraTail.ExtendLineInfo(1);
313+
mergeTo.ExtendLineInfo(1);
314+
}
281315
var node = mergeTo ?? new Paragraph();
282316
// Allows an empty paragraph/line.
283317
ParseRun(RunParsingMode.Run, node, false);
284-
if (node == mergeTo)
318+
if (mergeTo != null)
285319
{
286320
// Amend the line position
287-
// Don't forget the prepended \n
288-
lastNode.ExtendLineInfo(position - CurrentContext.StartingPosition + 1);
321+
lastNode.ExtendLineInfo(position - CurrentContext.StartingPosition);
289322
return ParseSuccessful(EMPTY_LINE_NODE, false);
290323
}
291-
else
292-
{
293-
return ParseSuccessful(node);
294-
}
324+
return ParseSuccessful(node);
295325
}
296326

297327
/// <summary>
@@ -331,6 +361,7 @@ private bool ParseRun(RunParsingMode mode, InlineContainer container, bool setLi
331361
if (container.Inlines.LastNode is PlainText lastText)
332362
{
333363
lastText.Content += newtext.Content;
364+
lastText.ExtendLineInfo(((IWikitextSpanInfo) newtext).Length);
334365
continue;
335366
}
336367
}

UnitTestProject1/NodeTests.cs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,22 @@ public void EnumDescendantsTest()
2222
{
2323
var li = (IWikitextLineInfo) node;
2424
var si = (IWikitextSpanInfo) node;
25+
Assert.IsTrue(li.HasLineInfo());
26+
Assert.IsTrue(si.HasSpanInfo);
2527
Trace.WriteLine(
2628
$"{node.GetType().Name}\t({li.LineNumber},{li.LinePosition};{si.Start}+{si.Length})\t[|{node}|]");
29+
if (node is InlineContainer container)
30+
{
31+
int pos = -1;
32+
foreach (IWikitextSpanInfo child in container.Inlines)
33+
{
34+
if (pos >= 0)
35+
{
36+
Assert.AreEqual(pos, child.Start, "LineInfo of Inline sequence is not consequent.");
37+
}
38+
pos = child.Start + child.Length;
39+
}
40+
}
2741
}
2842
var nn = root.Lines.FirstNode.NextNode;
2943
root.Lines.FirstNode.Remove();

0 commit comments

Comments
 (0)