Skip to content

Markdown decoder #191

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
231 changes: 228 additions & 3 deletions packages/notus/lib/src/convert/markdown.dart
Original file line number Diff line number Diff line change
@@ -11,13 +11,238 @@ class NotusMarkdownCodec extends Codec<Delta, String> {
const NotusMarkdownCodec();

@override
Converter<String, Delta> get decoder =>
throw UnimplementedError('Decoding is not implemented yet.');
Converter<String, Delta> get decoder => _NotusMarkdownDecoder();

@override
Converter<Delta, String> get encoder => _NotusMarkdownEncoder();
}

class _NotusMarkdownDecoder extends Converter<String, Delta> {
final List<Map<String, dynamic>> _attributesByStyleLength = [
null,
{'i': true}, // _
{'b': true}, // **
{'i': true, 'b': true} // **_
];
final RegExp _headingRegExp = RegExp(r'(#+) *(.+)');
final RegExp _styleRegExp = RegExp(r'((?:\*|_){1,3})(.*?[^\1 ])\1');
final RegExp _linkRegExp = RegExp(r'\[([^\]]+)\]\(([^\)]+)\)');
final RegExp _ulRegExp = RegExp(r'^( *)\* +(.*)');
final RegExp _olRegExp = RegExp(r'^( *)\d+[\.)] +(.*)');
final RegExp _bqRegExp = RegExp(r'^> *(.*)');
final RegExp _codeRegExp = RegExp(r'^( *)```'); // TODO: inline code
bool _inBlockStack = false;
// final List<String> _blockStack = [];
// int _olDepth = 0;

@override
Delta convert(String input) {
final lines = input.split('\n');
final delta = Delta();

for (var line in lines) {
_handleLine(line, delta);
}

return delta;
}

_handleLine(String line, Delta delta, [Map<String, dynamic> attributes]) {
if (_handleBlockQuote(line, delta, attributes)) {
return;
}
if (_handleBlock(line, delta, attributes)) {
return;
}
if (_handleHeading(line, delta, attributes)) {
return;
}

if (line.isNotEmpty) {
_handleSpan(line, delta, true, attributes);
}
}

/// Markdown supports headings and blocks within blocks (except for within code)
/// but not blocks within headers, or ul within
bool _handleBlock(String line, Delta delta,
[Map<String, dynamic> attributes]) {
var match;

match = _codeRegExp.matchAsPrefix(line);
if (match != null) {
_inBlockStack = !_inBlockStack;
return true;
}
if (_inBlockStack) {
delta.insert(
line + '\n',
NotusAttribute.code
.toJson()); // TODO: replace with?: {'quote': true})
// Don't bother testing for code blocks within block stacks
return true;
}

if (_handleOrderedList(line, delta, attributes) ||
_handleUnorderedList(line, delta, attributes)) {
return true;
}

return false;
}

/// all blocks are supported within bq
bool _handleBlockQuote(String line, Delta delta,
[Map<String, dynamic> attributes]) {
var match = _bqRegExp.matchAsPrefix(line);
if (match != null) {
var span = match.group(1);
Map<String, dynamic> newAttributes = {
'block': 'quote'
}; // NotusAttribute.bq.toJson();
if (attributes != null) {
newAttributes.addAll(attributes);
}
// all blocks are supported within bq
_handleLine(span, delta, newAttributes);
return true;
}
return false;
}

/// ol is supported within ol and bq, but not supported within ul
bool _handleOrderedList(String line, Delta delta,
[Map<String, dynamic> attributes]) {
var match = _olRegExp.matchAsPrefix(line);
if (match != null) {
// TODO: support nesting
// var depth = match.group(1).length / 3;
var span = match.group(2);
Map<String, dynamic> newAttributes = NotusAttribute.ol.toJson();
if (attributes != null) {
newAttributes.addAll(attributes);
}
// There's probably no reason why you would have other block types on the same line
_handleSpan(span, delta, true, newAttributes);
return true;
}
return false;
}

bool _handleUnorderedList(String line, Delta delta,
[Map<String, dynamic> attributes]) {
var match = _ulRegExp.matchAsPrefix(line);
if (match != null) {
// var depth = match.group(1).length / 3;
var span = match.group(2);
Map<String, dynamic> newAttributes = NotusAttribute.ul.toJson();
if (attributes != null) {
newAttributes.addAll(attributes);
}
// There's probably no reason why you would have other block types on the same line
_handleSpan(span, delta, true, newAttributes);
return true;
}
return false;
}

_handleHeading(String line, Delta delta, [Map<String, dynamic> attributes]) {
var match = _headingRegExp.matchAsPrefix(line);
if (match != null) {
var level = match.group(1).length;
Map<String, dynamic> newAttributes = {
'heading': level
}; // NotusAttribute.heading.withValue(level).toJson();
if (attributes != null) {
newAttributes.addAll(attributes);
}

var span = match.group(2);
// TODO: true or false?
_handleSpan(span, delta, true, newAttributes);
// delta.insert('\n', attribute.toJson());
return true;
}

return false;
}

_handleSpan(String span, Delta delta, bool addNewLine,
Map<String, dynamic> outerStyle) {
var start = _handleStyles(span, delta, outerStyle);
span = span.substring(start);

if (span.isNotEmpty) {
start = _handleLinks(span, delta, outerStyle);
span = span.substring(start);
}

if (span.isNotEmpty) {
if (addNewLine) {
delta.insert('$span\n', outerStyle);
} else {
delta.insert(span, outerStyle);
}
} else if (addNewLine) {
delta.insert('\n', outerStyle);
}
}

_handleStyles(String span, Delta delta, Map<String, dynamic> outerStyle) {
var start = 0;

var matches = _styleRegExp.allMatches(span);
matches.forEach((match) {
if (match.start > start) {
if (span.substring(match.start - 1, match.start) == '[') {
delta.insert(span.substring(start, match.start - 1), outerStyle);
start = match.start -
1 +
_handleLinks(span.substring(match.start - 1), delta, outerStyle);
return;
} else {
delta.insert(span.substring(start, match.start), outerStyle);
}
}

var text = match.group(2);
var newStyle = Map<String, dynamic>.from(
_attributesByStyleLength[match.group(1).length]);
if (outerStyle != null) {
newStyle.addAll(outerStyle);
}
_handleSpan(text, delta, false, newStyle);
start = match.end;
});

return start;
}

_handleLinks(String span, Delta delta, Map<String, dynamic> outerStyle) {
var start = 0;

var matches = _linkRegExp.allMatches(span);
matches.forEach((match) {
if (match.start > start) {
delta.insert(span.substring(start, match.start)); //, outerStyle);
}

var text = match.group(1);
var href = match.group(2);
Map<String, dynamic> newAttributes = {
'a': href
}; // NotusAttribute.link.fromString(href).toJson();
if (outerStyle != null) {
newAttributes.addAll(outerStyle);
}
_handleSpan(text, delta, false, newAttributes);
start = match.end;
});

return start;
}
}

class _NotusMarkdownEncoder extends Converter<Delta, String> {
static const kBold = '**';
static const kItalic = '_';
@@ -142,7 +367,7 @@ class _NotusMarkdownEncoder extends Converter<Delta, String> {
if (padding.isNotEmpty) buffer.write(padding);
}
// Now open any new styles.
for (var value in style.values) {
for (var value in style.values.toList().reversed) {
if (value.scope == NotusAttributeScope.line) continue;
if (currentStyle.containsSame(value)) continue;
final originalText = text;
323 changes: 319 additions & 4 deletions packages/notus/test/convert/markdown_test.dart
Original file line number Diff line number Diff line change
@@ -10,10 +10,325 @@ import 'package:notus/convert.dart';

void main() {
group('$NotusMarkdownCodec.encode', () {
test('unimplemented', () {
expect(() {
notusMarkdown.decode('test');
}, throwsUnimplementedError);
test('paragraphs', () {
final markdown = 'First line\n\nSecond line\n\n';
final delta = notusMarkdown.decode(markdown);
expect(delta.elementAt(0).data, 'First line\nSecond line\n');
final andBack = notusMarkdown.encode(delta);
expect(andBack, markdown);
});

test('italics', () {
runFor(String markdown, bool testEncode) {
final delta = notusMarkdown.decode(markdown);
expect(delta.elementAt(0).data, 'italics');
expect(delta.elementAt(0).attributes["i"], true);
expect(delta.elementAt(0).attributes["b"], null);
if (testEncode) {
final andBack = notusMarkdown.encode(delta);
expect(andBack, markdown);
}
}

runFor('_italics_\n\n', true);
runFor('*italics*\n\n', false);
});

test('multi-word italics', () {
runFor(String markdown, bool testEncode) {
final delta = notusMarkdown.decode(markdown);
expect(delta.elementAt(0).data, 'Okay, ');
expect(delta.elementAt(0).attributes, null);

expect(delta.elementAt(1).data, 'this is in italics');
expect(delta.elementAt(1).attributes["i"], true);
expect(delta.elementAt(1).attributes["b"], null);

expect(delta.elementAt(3).data, 'so is all of _ this');
expect(delta.elementAt(3).attributes["i"], true);

expect(delta.elementAt(4).data, ' but this is not\n');
expect(delta.elementAt(4).attributes, null);
if (testEncode) {
final andBack = notusMarkdown.encode(delta);
expect(andBack, markdown);
}
}

runFor(
'Okay, _this is in italics_ and _so is all of _ this_ but this is not\n\n',
true);
runFor(
'Okay, *this is in italics* and *so is all of _ this* but this is not\n\n',
false);
});

test('bold', () {
runFor(String markdown, bool testEncode) {
final delta = notusMarkdown.decode(markdown);
expect(delta.elementAt(0).data, 'bold');
expect(delta.elementAt(0).attributes["b"], true);
expect(delta.elementAt(0).attributes["i"], null);
if (testEncode) {
final andBack = notusMarkdown.encode(delta);
expect(andBack, markdown);
}
}

runFor('**bold**\n\n', true);
runFor('__bold__\n\n', false);
});

test('multi-word bold', () {
runFor(String markdown, bool testEncode) {
final delta = notusMarkdown.decode(markdown);
expect(delta.elementAt(0).data, 'Okay, ');
expect(delta.elementAt(0).attributes, null);

expect(delta.elementAt(1).data, 'this is bold');
expect(delta.elementAt(1).attributes["b"], true);
expect(delta.elementAt(1).attributes["i"], null);

expect(delta.elementAt(3).data, 'so is all of __ this');
expect(delta.elementAt(3).attributes["b"], true);

expect(delta.elementAt(4).data, ' but this is not\n');
expect(delta.elementAt(4).attributes, null);
if (testEncode) {
final andBack = notusMarkdown.encode(delta);
expect(andBack, markdown);
}
}

runFor(
'Okay, **this is bold** and **so is all of __ this** but this is not\n\n',
true);
runFor(
'Okay, __this is bold__ and __so is all of __ this__ but this is not\n\n',
false);
});

test('intersecting inline styles', () {
var markdown = 'This **house _is a_ circus**\n\n';
final delta = notusMarkdown.decode(markdown);
expect(delta.elementAt(1).data, 'house ');
expect(delta.elementAt(1).attributes["b"], true);
expect(delta.elementAt(1).attributes["i"], null);

expect(delta.elementAt(2).data, 'is a');
expect(delta.elementAt(2).attributes["b"], true);
expect(delta.elementAt(2).attributes["i"], true);

expect(delta.elementAt(3).data, ' circus');
expect(delta.elementAt(3).attributes["b"], true);
expect(delta.elementAt(3).attributes["i"], null);

final andBack = notusMarkdown.encode(delta);
expect(andBack, markdown);
});

test('bold and italics', () {
runFor(String markdown, bool testEncode) {
final delta = notusMarkdown.decode(markdown);
expect(delta.elementAt(0).data, 'this is bold and italic');
expect(delta.elementAt(0).attributes["b"], true);
expect(delta.elementAt(0).attributes["i"], true);

expect(delta.elementAt(1).data, '\n');
expect(delta.length, 2);

if (testEncode) {
final andBack = notusMarkdown.encode(delta);
expect(andBack, markdown);
}
}

runFor('**_this is bold and italic_**\n\n', true);
runFor('_**this is bold and italic**_\n\n', true);
runFor('***this is bold and italic***\n\n', false);
runFor('___this is bold and italic___\n\n', false);
});

test('bold and italics combinations', () {
runFor(String markdown, bool testEncode) {
final delta = notusMarkdown.decode(markdown);
expect(delta.elementAt(0).data, 'this is bold');
expect(delta.elementAt(0).attributes["b"], true);
expect(delta.elementAt(0).attributes["i"], null);

expect(delta.elementAt(2).data, 'this is in italics');
expect(delta.elementAt(2).attributes["b"], null);
expect(delta.elementAt(2).attributes["i"], true);

expect(delta.elementAt(4).data, 'this is both');
expect(delta.elementAt(4).attributes["b"], true);
expect(delta.elementAt(4).attributes["i"], true);

if (testEncode) {
final andBack = notusMarkdown.encode(delta);
expect(andBack, markdown);
}
}

runFor('**this is bold** _this is in italics_ and **_this is both_**\n\n',
true);
runFor('**this is bold** *this is in italics* and ***this is both***\n\n',
false);
runFor('__this is bold__ _this is in italics_ and ___this is both___\n\n',
false);
});

test('link', () {
var markdown = 'This **house** is a [circus](https://github.com)\n\n';
final delta = notusMarkdown.decode(markdown);

expect(delta.elementAt(1).data, 'house');
expect(delta.elementAt(1).attributes["b"], true);
expect(delta.elementAt(1).attributes["a"], null);

expect(delta.elementAt(3).data, 'circus');
expect(delta.elementAt(3).attributes["b"], null);
expect(delta.elementAt(3).attributes["a"], 'https://github.com');

final andBack = notusMarkdown.encode(delta);
expect(andBack, markdown);
});

test('style around link', () {
var markdown = 'This **house** is a **[circus](https://github.com)**\n\n';
final delta = notusMarkdown.decode(markdown);

expect(delta.elementAt(1).data, 'house');
expect(delta.elementAt(1).attributes["b"], true);
expect(delta.elementAt(1).attributes["a"], null);

expect(delta.elementAt(3).data, 'circus');
expect(delta.elementAt(3).attributes["b"], true);
expect(delta.elementAt(3).attributes["a"], 'https://github.com');

final andBack = notusMarkdown.encode(delta);
expect(andBack, markdown);
});

test('style within link', () {
var markdown = 'This **house** is a [**circus**](https://github.com)\n\n';
final delta = notusMarkdown.decode(markdown);

expect(delta.elementAt(1).data, 'house');
expect(delta.elementAt(1).attributes["b"], true);
expect(delta.elementAt(1).attributes["a"], null);

expect(delta.elementAt(2).data, ' is a ');
expect(delta.elementAt(2).attributes, null);

expect(delta.elementAt(3).data, 'circus');
expect(delta.elementAt(3).attributes["b"], true);
expect(delta.elementAt(3).attributes["a"], 'https://github.com');

expect(delta.elementAt(4).data, '\n');
expect(delta.length, 5);

final andBack = notusMarkdown.encode(delta);
expect(andBack, markdown);
});

test('heading styles', () {
runFor(String markdown, int level) {
final delta = notusMarkdown.decode(markdown);
expect(delta.elementAt(0).data, 'This is an H$level\n');
expect(delta.elementAt(0).attributes['heading'], level);
final andBack = notusMarkdown.encode(delta);
expect(andBack, markdown);
}

runFor('# This is an H1\n\n', 1);
runFor('## This is an H2\n\n', 2);
runFor('### This is an H3\n\n', 3);
});

test('ul', () {
var markdown = '* a bullet point\n* another bullet point\n\n';
final delta = notusMarkdown.decode(markdown);

final andBack = notusMarkdown.encode(delta);
expect(andBack, markdown);
});

test('ol', () {
var markdown = '1. 1st point\n1. 2nd point\n\n';
final delta = notusMarkdown.decode(markdown);

final andBack = notusMarkdown.encode(delta);
expect(andBack, markdown);
});

test('simple bq', () {
// var markdown = '> quote\n> > nested\n>#Heading\n>**bold**\n>_italics_\n>* bullet\n>1. 1st point\n>1. 2nd point\n\n';
var markdown =
'> quote\n> # Heading in Quote\n> # **Styled** heading in _block quote_\n> **bold text**\n> _text in italics_\n\n';
final delta = notusMarkdown.decode(markdown);

expect(delta.elementAt(0).data, 'quote\n');
expect(delta.elementAt(0).attributes['block'], 'quote');
expect(delta.elementAt(0).attributes.length, 1);

expect(delta.elementAt(1).data, 'Heading in Quote\n');
expect(delta.elementAt(1).attributes['block'], 'quote');
expect(delta.elementAt(1).attributes['heading'], 1);
expect(delta.elementAt(1).attributes.length, 2);

expect(delta.elementAt(2).data, 'Styled');
expect(delta.elementAt(2).attributes['block'], 'quote');
expect(delta.elementAt(2).attributes['heading'], 1);
expect(delta.elementAt(2).attributes['b'], true);
expect(delta.elementAt(2).attributes.length, 3);

expect(delta.elementAt(3).data, ' heading in ');
expect(delta.elementAt(3).attributes['block'], 'quote');
expect(delta.elementAt(3).attributes['heading'], 1);
expect(delta.elementAt(3).attributes.length, 2);

expect(delta.elementAt(4).data, 'block quote');
expect(delta.elementAt(4).attributes['block'], 'quote');
expect(delta.elementAt(4).attributes['heading'], 1);
expect(delta.elementAt(4).attributes['i'], true);
expect(delta.elementAt(4).attributes.length, 3);

expect(delta.elementAt(6).data, 'bold text');
expect(delta.elementAt(6).attributes['block'], 'quote');
expect(delta.elementAt(6).attributes['b'], true);
expect(delta.elementAt(6).attributes.length, 2);

expect(delta.elementAt(8).data, 'text in italics');
expect(delta.elementAt(8).attributes['block'], 'quote');
expect(delta.elementAt(8).attributes['i'], true);
expect(delta.elementAt(8).attributes.length, 2);

final andBack = notusMarkdown.encode(delta);
expect(andBack, markdown);
});

/*test('nested bq', () {
var markdown = '> > nested\n>* bullet\n>1. 1st point\n>1. 2nd point\n\n';
final delta = notusMarkdown.decode(markdown);
final andBack = notusMarkdown.encode(delta);
expect(andBack, markdown);
});
test('code in bq', () {
var markdown = '> ```\n> print("Hello world!")\n> ```\n\n';
final delta = notusMarkdown.decode(markdown);
final andBack = notusMarkdown.encode(delta);
expect(andBack, markdown);
});*/

test('multiple styles', () {
final delta = notusMarkdown.decode(expectedMarkdown);
// expect(delta, doc);
final andBack = notusMarkdown.encode(delta);
expect(andBack, expectedMarkdown);
});
});