@@ -60,6 +60,11 @@ private LineNode ParseLine(LineNode lastLine)
6060 ParseStart ( @"\n" , false ) ; // We want to set a terminator, so we need to call ParseStart
6161 // LIST_ITEM / HEADING automatically closes the previous PARAGRAPH
6262 var node = ParseListItem ( ) ?? ParseHeading ( ) ?? ParseCompactParagraph ( lastLine ) ;
63+ if ( lastLine ? . Inlines . LastNode is PlainText pt && pt . Content . Length == 0 )
64+ {
65+ // This can happen because we appended a PlainText("") at (A) in ParseLineEnd
66+ pt . Remove ( ) ;
67+ }
6368 if ( node != null )
6469 Accept ( ) ;
6570 else
@@ -91,10 +96,9 @@ private LineNode ParseLineEnd(LineNode lastNode)
9196 // abc\n\s*?\n TERM P[|abc|]PC[||]
9297 // Note that MediaWiki editor will automatically trim the trailing whitespaces,
9398 // leaving a \n after the content. This one \n will be removed when the page is transcluded.
94-
99+ var lastLinePosition = linePosition ;
95100 // Here we consume a \n without fallback.
96- if ( ConsumeToken ( @"\n" ) == null )
97- return null ;
101+ if ( ConsumeToken ( @"\n" ) == null ) return null ;
98102 ParseStart ( ) ;
99103 // Whitespaces between 2 \n, assuming there's a second \n or TERM after trailingWs
100104 var trailingWs = ConsumeToken ( @"[\f\r\t\v\x85\p{Z}]+" ) ;
@@ -104,50 +108,74 @@ private LineNode ParseLineEnd(LineNode lastNode)
104108 // Already consumed a \n, attempt to consume another \n
105109 if ( ConsumeToken ( @"\n" ) != null )
106110 {
107- // 2 Line breaks received.
108111 // Close the last paragraph.
109- unclosedParagraph . Append ( "\n " + trailingWs ) ;
110- unclosedParagraph . ExtendLineInfo ( position - CurrentContext . StartingPosition ) ;
111- // Note here TERM excludes \n
112+ unclosedParagraph . AppendWithLineInfo ( "\n " + trailingWs ,
113+ // don't forget the position of leading '\n'
114+ CurrentContext . StartingPosition - 1 , position - CurrentContext . StartingPosition ,
115+ CurrentContext . StartingLineNumber - 1 , lastLinePosition ) ;
116+ // 2 Line breaks received.
117+ // Check for the special case. Note here TERM excludes \n
112118 if ( NeedsTerminate ( Terminator . Get ( @"\n" ) ) )
113119 {
114120 // This is a special case.
115- // abc\n trailingWs \n TERM --> P[|abc\ntrailingWs|]PC[||]
121+ // abc \n trailingWs \n TERM --> P[|abc\ntrailingWs|]PC[||]
122+ // ^ We are here.
116123 // When the function returns, WIKITEXT parsing will stop
117124 // because a TERM will be received.
118125 // We need to correct this.
119126 var anotherparagraph = new Paragraph ( ) ;
120- return ParseSuccessful ( anotherparagraph ) ;
127+ anotherparagraph . SetLineInfo ( lineNumber , linePosition , position , 0 ) ;
128+ return ParseSuccessful ( anotherparagraph , false ) ;
121129 }
122- // After the paragraph, more content incoming.
123- // abc\n trailingWs \n def
130+ // The last paragraph will be closed now.
124131 return ParseSuccessful ( EMPTY_LINE_NODE , false ) ;
125132 }
126133 // The attempt to consume the 2nd \n failed.
127- // We're still after the whitespaces after the 1st \n .
128134 if ( NeedsTerminate ( ) )
129135 {
130- // abc \n TERM P[|abc|]
131- // Still need to close the paragraph.
132- unclosedParagraph . Append ( "\n " + trailingWs ) ;
133- unclosedParagraph . ExtendLineInfo ( 1 + position - CurrentContext . StartingPosition ) ;
136+ // abc \n trailingWs TERM P[|abc|]
137+ // ^ We are here.
138+ // If we need to terminate, then close the last paragraph.
139+ unclosedParagraph . AppendWithLineInfo ( "\n " + trailingWs ,
140+ // don't forget the position of leading '\n'
141+ CurrentContext . StartingPosition - 1 , position - CurrentContext . StartingPosition + 1 ,
142+ CurrentContext . StartingLineNumber - 1 , lastLinePosition ) ;
134143 return ParseSuccessful ( EMPTY_LINE_NODE , false ) ;
135144 }
145+ // The last paragraph is still not closed (i.e. compact paragraph).
146+ // (A)
147+ // Note here we have still consumed the first '\n', while the last paragraph has no trailing '\n'.
148+ // For continued PlainText, we will add a '\n' in ParseCompactParagraph.
149+ // Add an empty node so ParseCompactParagraph can add a '\n' with LineInfo.
150+ unclosedParagraph . AppendWithLineInfo ( "" , CurrentContext . StartingPosition - 1 , 0 ,
151+ CurrentContext . StartingLineNumber - 1 , lastLinePosition ) ;
152+ // Fallback so we can either continue parsing PlainText,
153+ // or discover the next, for example, Heading, and leave the last paragraph compact.
154+ Fallback ( ) ;
155+ return EMPTY_LINE_NODE ;
136156 }
137157 else
138158 {
139- // Last node cannot be a closed paragrap .
159+ // Last node cannot be a closed paragraph .
140160 // It can't because ParseLineEnd is invoked immediately after a last node is parsed,
141161 // and only ParseLineEnd can close a paragraph.
142162 Debug . Assert ( ! ( lastNode is Paragraph ) , "Last node cannot be a closed paragraph." ) ;
143163 // Rather, last node is LINE node of other type (LIST_ITEM/HEADING).
144- // Remember we've consumed a \n , and the spaces after it in this function.
164+ // Remember we've already consumed a '\n' , and the spaces after it.
165+ // The situation here is just like the "special case" mentioned above.
145166 if ( NeedsTerminate ( Terminator . Get ( @"\n" ) ) )
146167 {
147- // abc \n TERM --> [|abc|] PC[||]
168+ // abc \n WHITE_SPACE TERM --> [|abc|] PC[|WHITE_SPACE|]
169+ // ^ CurCntxt ^ We are here now.
148170 // Note here TERM excludes \n
149171 var anotherparagraph = new Paragraph ( ) ;
150- if ( trailingWs != null ) anotherparagraph . Append ( trailingWs ) ;
172+ if ( trailingWs != null )
173+ {
174+ var pt = new PlainText ( trailingWs ) ;
175+ // Actually the same as what we do in ParseSuccessful.
176+ pt . SetLineInfo ( CurrentContext . StartingLineNumber , CurrentContext . StartingLinePosition ,
177+ CurrentContext . StartingPosition , position - CurrentContext . StartingPosition ) ;
178+ }
151179 return ParseSuccessful ( anotherparagraph ) ;
152180 }
153181 }
@@ -277,21 +305,23 @@ private LineNode ParseCompactParagraph(LineNode lastNode)
277305 if ( mergeTo != null && ! mergeTo . Compact ) mergeTo = null ;
278306 // Create a new paragraph, or merge the new line to the last unclosed paragraph.
279307 ParseStart ( ) ;
280- mergeTo ? . Append ( "\n " ) ;
308+ if ( mergeTo != null )
309+ {
310+ var paraTail = ( PlainText ) mergeTo . Inlines . LastNode ;
311+ paraTail . Content += "\n " ;
312+ paraTail . ExtendLineInfo ( 1 ) ;
313+ mergeTo . ExtendLineInfo ( 1 ) ;
314+ }
281315 var node = mergeTo ?? new Paragraph ( ) ;
282316 // Allows an empty paragraph/line.
283317 ParseRun ( RunParsingMode . Run , node , false ) ;
284- if ( node == mergeTo )
318+ if ( mergeTo != null )
285319 {
286320 // Amend the line position
287- // Don't forget the prepended \n
288- lastNode . ExtendLineInfo ( position - CurrentContext . StartingPosition + 1 ) ;
321+ lastNode . ExtendLineInfo ( position - CurrentContext . StartingPosition ) ;
289322 return ParseSuccessful ( EMPTY_LINE_NODE , false ) ;
290323 }
291- else
292- {
293- return ParseSuccessful ( node ) ;
294- }
324+ return ParseSuccessful ( node ) ;
295325 }
296326
297327 /// <summary>
@@ -331,6 +361,7 @@ private bool ParseRun(RunParsingMode mode, InlineContainer container, bool setLi
331361 if ( container . Inlines . LastNode is PlainText lastText )
332362 {
333363 lastText . Content += newtext . Content ;
364+ lastText . ExtendLineInfo ( ( ( IWikitextSpanInfo ) newtext ) . Length ) ;
334365 continue ;
335366 }
336367 }
0 commit comments