From c275fd0ce18f338a19c3e12d522275501d70cb90 Mon Sep 17 00:00:00 2001 From: Jonathan Bernard Date: Fri, 26 Aug 2011 15:40:56 -0500 Subject: [PATCH] Implmented actions to build AST. * Rewrote grammar slightly. * Added parboiled parse section to JLPMain. * Added code to build the AST while parsing. * Created ASTNode classes. --- doc/grammar.rst | 30 ++-- resources/main/test.groovy | 2 +- src/main/com/jdblabs/jlp/JLPMain.groovy | 19 +- src/main/com/jdblabs/jlp/JLPPegParser.java | 166 +++++++++++++++--- .../jdblabs/jlp/JLPPegParser.java.noactions | 133 ++++++++++++++ src/main/com/jdblabs/jlp/ParserActions.groovy | 7 + src/main/com/jdblabs/jlp/ast/ASTNode.groovy | 5 + src/main/com/jdblabs/jlp/ast/Directive.groovy | 26 +++ src/main/com/jdblabs/jlp/ast/TextBlock.groovy | 25 +++ 9 files changed, 368 insertions(+), 45 deletions(-) create mode 100644 src/main/com/jdblabs/jlp/JLPPegParser.java.noactions create mode 100644 src/main/com/jdblabs/jlp/ParserActions.groovy create mode 100644 src/main/com/jdblabs/jlp/ast/ASTNode.groovy create mode 100644 src/main/com/jdblabs/jlp/ast/Directive.groovy create mode 100644 src/main/com/jdblabs/jlp/ast/TextBlock.groovy diff --git a/doc/grammar.rst b/doc/grammar.rst index 1e99e0d..9bc3366 100644 --- a/doc/grammar.rst +++ b/doc/grammar.rst @@ -1,31 +1,27 @@ -CodePage -> (CodeBlock | DocBlock)* +CodePage -> DocBlock / CodeBlock -// lookahead 2 needed here -DocBlock -> (DirectiveBlock | MarkdownBlock)+ +DocBlock -> DirectiveBlock / MarkdownBlock -DirectiveBlock -> - "author" RemainingLine EOL MarkdownBlock? | - "doc" RemainingLine EOL MarkdownBlock? | - "example" RemainingLine EOL MarkdownBlock? | - "org" OrgString EOL +Code Block -> !DOC_START RemainingLine + +DirectiveBlock -> DOC_START DIRECTIVE_START (LongDirective / LineDirective) MarkdownBlock -> MarkdownLine+ -MarkdownLine -> - NOT_DIRECTIVE_START RemainingLine +LongDirective -> + (AUTHOR_DIR / DOC_DIR / EXAMPLE_DIR) RemainingLine MarkdownBlock? -RemainingLine -> NOT_EOL* +LineDirective -> ORG_DIR RemainingLine -OrgString -> - ( )* ? +MarkdownLine -> DOC_START !DIRECTIVE_START RemainingLine + +RemainingLine -> (!EOL)+, EOL Tokens ------ DOC_START -> "%% " EOL -> "\n" -NOT_EOL -> ~"\n" DIRECTIVE_START -> "@" -NOT_DIRECTIVE_START -> ~"@" -SLASH -> "/" -ORG_ID -> ~"[/\n]" + + diff --git a/resources/main/test.groovy b/resources/main/test.groovy index 9e9b26d..8d39529 100644 --- a/resources/main/test.groovy +++ b/resources/main/test.groovy @@ -4,7 +4,7 @@ import org.parboiled.parserunners.ReportingParseRunner import org.parboiled.parserunners.RecoveringParseRunner parser = Parboiled.createParser(JLPPegParser.class) -parseRunner = new RecoveringParseRunner(parser.CodePage()) +parseRunner = new RecoveringParseRunner(parser.SourceFile()) testLine = """%% This the first test line. %% Second Line diff --git a/src/main/com/jdblabs/jlp/JLPMain.groovy b/src/main/com/jdblabs/jlp/JLPMain.groovy index 9a17f9e..daa0907 100644 --- a/src/main/com/jdblabs/jlp/JLPMain.groovy +++ b/src/main/com/jdblabs/jlp/JLPMain.groovy @@ -1,7 +1,12 @@ package com.jdblabs.jlp +import org.parboiled.Parboiled +import org.parboiled.parserunners.ReportingParseRunner + public class JLPMain { + private JLPPegParser parser + public static void main(String[] args) { JLPMain inst = new JLPMain() @@ -21,20 +26,26 @@ public class JLPMain { cli.usage() return } - Map documentContext = [ docs: [:] ] - // get files passed in def filenames = opts.getArgs() def files = filenames.collect { new File(it) } // -------- parse input -------- // - files.inject(documentContext) { docContext, file -> + Map parsed = files.inject([:]) { docContext, file -> inst.parse(new File(file), docContext) } // -------- generate output -------- // } - public void parse(File inputFile, Map docCtx) { + public JLPMain() { + parser = Parboiled.createParser(JLPPegParser.class) + } + + public Map parse(File inputFile, Map docCtx) { + def parseRunner = new ReportingParseRunner(parser.SourceFile()) + + // parse the file + def firstPass = parseRunner.run(inputFile) } } diff --git a/src/main/com/jdblabs/jlp/JLPPegParser.java b/src/main/com/jdblabs/jlp/JLPPegParser.java index bf4a7fa..3b402bf 100644 --- a/src/main/com/jdblabs/jlp/JLPPegParser.java +++ b/src/main/com/jdblabs/jlp/JLPPegParser.java @@ -1,51 +1,154 @@ package com.jdblabs.jlp; +import com.jdblabs.jlp.ast.*; +import java.util.ArrayList; +import java.util.List; import org.parboiled.Action; import org.parboiled.BaseParser; import org.parboiled.Context; import org.parboiled.Rule; import org.parboiled.annotations.*; +import static com.jdblabs.jlp.ast.TextBlock.makeCodeBlock; +import static com.jdblabs.jlp.ast.TextBlock.makeMarkdownBlock; + +@BuildParseTree public class JLPPegParser extends BaseParser { - public Rule CodePage() { - return ZeroOrMore(FirstOf( - DocBlock(), - CodeBlock())); } + int curLineNum = 1; + public Rule SourceFile() { + return Sequence( + clearLineCount(), + push(new ArrayList()), + ZeroOrMore(Sequence( + FirstOf( + DocBlock(), + CodeBlock()), + push(addToList(pop(), (List)pop()))))); } + + /** + * Parses the rule: + * DocBlock = DirectiveBlock / MarkdownBlock + * + * Pushes a DocBlock object onto the stack. + */ Rule DocBlock() { - return OneOrMore(FirstOf( - DirectiveBlock(), - MarkdownBlock())); } + return Sequence( + push(new ArrayList()), + OneOrMore(Sequence( + FirstOf( + DirectiveBlock(), + MarkdownBlock()), + // stack is now: [List, BlockValue *top*] + // pop the Block, then List, pass to helper to add the + // Block to the list, then push the List back on + push(addToList((ASTNode)pop(), (List)pop()))))); } + + /** + * Parses the rule: + * CodeBlock = !DOC_START RemainingLine + * + * Pushes a CodeBlock onto the stack. + */ Rule CodeBlock() { - return OneOrMore(Sequence( - TestNot(DOC_START), RemainingLine())); } + return Sequence( + push(curLineNum), + TestNot(DOC_START), RemainingLine(), push(match()), + ZeroOrMore(Sequence( + TestNot(DOC_START), RemainingLine(), + push(popAsString() + match()))), + + push(makeCodeBlock(popAsString(), popAsInt()))); } + /** + * Parses the rule: + * DirectiveBlock = + * DOC_START DIRECTIVE_START (LongDirective / LineDirective) + * + * Pushes a Directive onto the stack. + */ Rule DirectiveBlock() { - return FirstOf( + return Sequence( + DOC_START, DIRECTIVE_START, + FirstOf(LongDirective(), LineDirective())); } - // there is a bug in parboiled that prevents sequences of greater - // than 2, so this ia workaround - Sequence(DOC_START, DIRECTIVE_START, LongDirective(), - RemainingLine(), Optional(MarkdownBlock())), + /** + * Parses the rule: + * LongDirective = + * (AUTHOR_DIR / DOC_DIR / EXAMPLE_DIR) RemainingLine MarkdownBlock? + * + * Pushes a Directive object onto the value stack. + */ + Rule LongDirective() { + return Sequence( + push(curLineNum), + FirstOf(AUTHOR_DIR, DOC_DIR, EXAMPLE_DIR), push(match()), + RemainingLine(), push(match()), + Optional(Sequence( + MarkdownBlock(), // pushes block + swap(), + push(popAsString() + ((TextBlock) pop()).value))), + + // pull off the value, type and create the directive + push(new Directive(popAsString(), popAsString(), popAsInt()))); } - Sequence(DOC_START, DIRECTIVE_START, LineDirective(), - RemainingLine())); } + /** + * Parses the rule: + * LineDirective = + * ORG_DIR RemainingLine + * + * Pushes a Directive object onto the value stack. + */ + Rule LineDirective() { + return Sequence( + push(curLineNum), + ORG_DIR, push(match()), + RemainingLine(), - Rule LongDirective() { return FirstOf(AUTHOR_DIR, DOC_DIR, EXAMPLE_DIR); } + // pull off the value, type and create the directive + push(new Directive(match().trim(), popAsString(), popAsInt()))); } - Rule LineDirective() { return ORG_DIR; } - - Rule MarkdownBlock() { return OneOrMore(MarkdownLine()); } + /** + * Parses the rule: + * MarkdownBlock = MarkdownLine+ + * + * Pushes a MarkdownBlock onto the stack as a string. + */ + Rule MarkdownBlock() { + return Sequence( + push(curLineNum), + MarkdownLine(), // pushes the value onto the stack + ZeroOrMore(Sequence( + MarkdownLine(), + swap(), + push(popAsString() + popAsString()))), + + push(makeMarkdownBlock(popAsString(), popAsInt()))); } + /** + * Parses the rule: + * MarkdownLine = + * DOC_START !DIRECTIVE_START RemainingLine + * + * Pushes the line value (not including the DOC_START) onto the stack. + */ Rule MarkdownLine() { - return Sequence(DOC_START, TestNot(DIRECTIVE_START), RemainingLine()); } + return Sequence( + DOC_START, TestNot(DIRECTIVE_START), + RemainingLine(), push(match())); } - Rule RemainingLine() { return Sequence(OneOrMore(NOT_EOL), EOL); } + /** + * Parses the rule: + * RemainingLine = (!EOL)+ EOL + */ + @SuppressSubnodes + Rule RemainingLine() { + return Sequence(OneOrMore(NOT_EOL), EOL, incLineCount()); } Rule DOC_START = String("%% "); - Rule EOL = Ch('\n'); + Rule EOL = FirstOf(Ch('\n'), EOI); Rule NOT_EOL = Sequence(TestNot(EOL), ANY); Rule DIRECTIVE_START= Ch('@'); Rule SLASH = Ch('/'); @@ -55,4 +158,21 @@ public class JLPPegParser extends BaseParser { Rule DOC_DIR = IgnoreCase("doc"); Rule EXAMPLE_DIR = IgnoreCase("example"); Rule ORG_DIR = IgnoreCase("org"); + + String popAsString() { return (String) pop(); } + + Integer popAsInt() { return (Integer) pop(); } + + static List addToList(T value, List list) { + list.add(value); + return list; } + + boolean printValueStack() { + for (int i = 0; i < getContext().getValueStack().size(); i++) { + System.out.println(i + ": " + peek(i)); } + return true; } + + boolean clearLineCount() { curLineNum = 1; return true; } + + boolean incLineCount() { curLineNum++; return true; } } diff --git a/src/main/com/jdblabs/jlp/JLPPegParser.java.noactions b/src/main/com/jdblabs/jlp/JLPPegParser.java.noactions new file mode 100644 index 0000000..2d14c1a --- /dev/null +++ b/src/main/com/jdblabs/jlp/JLPPegParser.java.noactions @@ -0,0 +1,133 @@ +package com.jdblabs.jlp; + +import com.jdblabs.jlp.ast.*; +import java.util.ArrayList; +import java.util.List; +import org.parboiled.Action; +import org.parboiled.BaseParser; +import org.parboiled.Context; +import org.parboiled.Rule; +import org.parboiled.annotations.*; + +import static com.jdblabs.jlp.ast.TextBlock.makeCodeBlock; +import static com.jdblabs.jlp.ast.TextBlock.makeMarkdownBlock; + +@BuildParseTree +public class JLPPegParser extends BaseParser { + + public Rule CodePage() { + return ZeroOrMore(FirstOf( + DocBlock(), + CodeBlock())); } + + /** + * Parses the rule: + * DocBlock = DirectiveBlock / MarkdownBlock + * + * Pushes a DocBlock object onto the stack. + */ + Rule DocBlock() { + return OneOrMore(FirstOf( + DirectiveBlock(), + MarkdownBlock())); } + + /** + * Parses the rule: + * CodeBlock = !DOC_START RemainingLine + * + * Pushes a CodeBlock onto the stack. + */ + Rule CodeBlock() { + return Sequence( + TestNot(DOC_START), RemainingLine(), + ZeroOrMore(Sequence( + TestNot(DOC_START), RemainingLine()))); } + + /** + * Parses the rule: + * DirectiveBlock = + * DOC_START DIRECTIVE_START (LongDirective / LineDirective) + * + * Pushes a Directive onto the stack. + */ + Rule DirectiveBlock() { + return Sequence( + DOC_START, DIRECTIVE_START, + FirstOf(LongDirective(), LineDirective())); } + + /** + * Parses the rule: + * LongDirective = + * (AUTHOR_DIR / DOC_DIR / EXAMPLE_DIR) RemainingLine MarkdownBlock? + * + * Pushes a Directive object onto the value stack. + */ + Rule LongDirective() { + return Sequence( + FirstOf(AUTHOR_DIR, DOC_DIR, EXAMPLE_DIR), + RemainingLine(), + Optional(MarkdownBlock())); } + + /** + * Parses the rule: + * LineDirective = + * ORG_DIR RemainingLine + * + * Pushes a Directive object onto the value stack. + */ + Rule LineDirective() { + return Sequence( + ORG_DIR, + RemainingLine()); } + + /** + * Parses the rule: + * MarkdownBlock = MarkdownLine+ + * + * Pushes a MarkdownBlock onto the stack as a string. + */ + Rule MarkdownBlock() { return OneOrMore(MarkdownLine()); } + + /** + * Parses the rule: + * MarkdownLine = + * DOC_START !DIRECTIVE_START RemainingLine + * + * Pushes the line value (not including the DOC_START) onto the stack. + */ + Rule MarkdownLine() { + return Sequence( + DOC_START, TestNot(DIRECTIVE_START), RemainingLine()); } + + /** + * Parses the rule: + * RemainingLine = (!EOL)+ EOL + */ + @SuppressSubnodes + Rule RemainingLine() { + return Sequence(OneOrMore(NOT_EOL), EOL); } + + Rule DOC_START = String("%% "); + Rule EOL = FirstOf(Ch('\n'), EOI); + Rule NOT_EOL = Sequence(TestNot(EOL), ANY); + Rule DIRECTIVE_START= Ch('@'); + Rule SLASH = Ch('/'); + + // directive terminals + Rule AUTHOR_DIR = IgnoreCase("author"); + Rule DOC_DIR = IgnoreCase("doc"); + Rule EXAMPLE_DIR = IgnoreCase("example"); + Rule ORG_DIR = IgnoreCase("org"); + + String popAsString() { + return (String) pop(); } + + List addToList(ASTNode value, List list) { + list.add(value); + return list; } + + boolean printValueStack() { + for (int i = 0; i < getContext().getValueStack().size(); i++) { + System.out.println(i + ": " + peek(i)); } + return true; } +} diff --git a/src/main/com/jdblabs/jlp/ParserActions.groovy b/src/main/com/jdblabs/jlp/ParserActions.groovy new file mode 100644 index 0000000..7a3485c --- /dev/null +++ b/src/main/com/jdblabs/jlp/ParserActions.groovy @@ -0,0 +1,7 @@ +package com.jdblabs.jlp + +import org.parboiled.Action + +public class ParserActions { + +} diff --git a/src/main/com/jdblabs/jlp/ast/ASTNode.groovy b/src/main/com/jdblabs/jlp/ast/ASTNode.groovy new file mode 100644 index 0000000..6e66126 --- /dev/null +++ b/src/main/com/jdblabs/jlp/ast/ASTNode.groovy @@ -0,0 +1,5 @@ +package com.jdblabs.jlp.ast + +public interface ASTNode { + public int getLineNumber() +} diff --git a/src/main/com/jdblabs/jlp/ast/Directive.groovy b/src/main/com/jdblabs/jlp/ast/Directive.groovy new file mode 100644 index 0000000..73e9dd6 --- /dev/null +++ b/src/main/com/jdblabs/jlp/ast/Directive.groovy @@ -0,0 +1,26 @@ +package com.jdblabs.jlp.ast + +public class Directive implements ASTNode { + + public static enum DirectiveType { + Author, + Doc, + Example, + Org; + + public static DirectiveType parse(String typeString) { + valueOf(typeString.toLowerCase().capitalize()) } } + + public final DirectiveType type; + public final String value; + public final int lineNumber; + + public Directive(String value, String typeString, int lineNumber) { + this.value = value + this.type = DirectiveType.parse(typeString) + this.lineNumber = lineNumber } + + public int getLineNumber() { return lineNumber } + + public String toString() { return "[Directive(${lineNumber}): ${type}, ${value}]" } +} diff --git a/src/main/com/jdblabs/jlp/ast/TextBlock.groovy b/src/main/com/jdblabs/jlp/ast/TextBlock.groovy new file mode 100644 index 0000000..f42d10a --- /dev/null +++ b/src/main/com/jdblabs/jlp/ast/TextBlock.groovy @@ -0,0 +1,25 @@ +package com.jdblabs.jlp.ast + +public class TextBlock implements ASTNode { + + public static enum TextBlockType { MarkdownBlock, CodeBlock } + + public final TextBlockType type + public final String value + public final int lineNumber + + public TextBlock(TextBlockType type, String value, int lineNumber) { + this.type = type + this.value = value + this.lineNumber = lineNumber } + + public int getLineNumber() { return lineNumber } + + public String toString() { return "[${type}(${lineNumber}): ${value}]" } + + public static TextBlock makeMarkdownBlock(String value, int lineNumber) { + return new TextBlock(TextBlockType.MarkdownBlock, value, lineNumber) } + + public static TextBlock makeCodeBlock(String value, int lineNumber) { + return new TextBlock(TextBlockType.CodeBlock, value, lineNumber) } +}