Worked on documentation, parser.

* Added planning documentation regrding the process.
* Updated grammer.
* Refactored the test code a bit.
* Added sample input file from vbs-suite
* Refactored the AST node structure created by the parser.
This commit is contained in:
Jonathan Bernard 2011-08-29 09:44:05 -05:00
parent c275fd0ce1
commit 557feaeb83
8 changed files with 398 additions and 53 deletions

View File

@ -1,8 +1,8 @@
CodePage -> DocBlock / CodeBlock
SourceFile -> (DocBlock / CodeBlock)*
DocBlock -> DirectiveBlock / MarkdownBlock
DocBlock -> (DirectiveBlock / MarkdownBlock)+
Code Block -> !DOC_START RemainingLine
Code Block -> ((!DOC_START RemainingLine) / EmptyLine)+
DirectiveBlock -> DOC_START DIRECTIVE_START (LongDirective / LineDirective)
@ -15,7 +15,9 @@ LineDirective -> ORG_DIR RemainingLine
MarkdownLine -> DOC_START !DIRECTIVE_START RemainingLine
RemainingLine -> (!EOL)+, EOL
RemainingLine -> (!EOL)+ (EOL / EOI)
EmptyLine -> EOL
Tokens
------

52
doc/phase-doc.txt Normal file
View File

@ -0,0 +1,52 @@
Parse phase
===========
Init: none
Input: Map<Name, InputStream>
Output: Map<Name, ASTNode List>
Generate phase
==============
Input: Map<Name, AST Node List>
Output: Map<Name, String>
Emitter (object)
----------------
Emitter is good for one emit run.
Object fields:
* where the value goes
* current emit state (may need existing state from generate phase)
+--Generate---------------------------------+
| |
| GenerationState--+ |
| v |
| +->Emitter>-+ |
| [Sources]>-+->Emitter>-+->[Destinations] |
| +->Emitter>-+ |
| |
| |
+-------------------------------------------+
+--Emitter----------+
| |
| GenerationState |
| Source |
| Output |
| |
+-------------------+
Emit Process:
Order nodes, emit based on type.
Refer to generation state when neccessary.
Emitters:
FormattingEmitter - accepts a paramets, formatter, that formats a block of
text.

View File

@ -1,3 +1,4 @@
import com.jdblabs.jlp.EchoEmitter
import com.jdblabs.jlp.JLPPegParser
import org.parboiled.Parboiled
import org.parboiled.parserunners.ReportingParseRunner
@ -6,16 +7,33 @@ import org.parboiled.parserunners.RecoveringParseRunner
parser = Parboiled.createParser(JLPPegParser.class)
parseRunner = new RecoveringParseRunner(parser.SourceFile())
testLine = """%% This the first test line.
%% Second Line
%% Third Line
Fourth line
%% Fifth line
%% @author Sixth Line
%% @Example Seventh Line
%% Markdown lines (eigth line)
%% Still markdown (ninth line)
Tenth line is a code line
"""
result = parseRunner.run(testLine)
simpleTest = {
"Parsing the simple test into 'result'.\n" +
"--------------------------------------\n"
testLine = """%% This the first test line.
%% Second Line
%% Third Line
Fourth line
%% Fifth line
%% @author Sixth Line
%% @Example Seventh Line
%% Markdown lines (eigth line)
%% Still markdown (ninth line)
Tenth line is a code line
"""
parseRunner.run(testLine)
}
vbsTest = {
"Parsing vbs_db_records.hrl into 'vbsResult'."
"--------------------------------------------\n"
vbsTestFile = new File('vbs_db_records.hrl')
println "vbsTestFile is ${vbsTestFile.exists() ? 'present' : 'absent'}."
vbsTestInput = vbsTestFile.text
parseRunner.run(vbsTestInput)
}

View File

@ -0,0 +1,238 @@
% vbs_db_records.erl
%% @author Jonathan Bernard <jdb@jdb-labs.com>
%% @doc
%% The VBS database API is centered around the data records:
%%
%% * Tables are named after the records. ``vbs_adult`` records are stored in
%% a table named ``vbs_adult``.
%% * The functions that make up the database API are grouped into modules named
%% after the records on which they operate. The ``vbs_adult`` module contains
%% the standard VBS database API functions that work with ``vbs_adult``
%% records.
%%
%% @section Record Definitions
%% Here are the record definitions:
%% @doc Information about an adult in the VBS system.
-record(vbs_adult, {
%% @doc A unique number. This is the record's primary identification and
%% the table's primary key
id,
%% @doc A unique full name.
%% @example "John Smith", "Fae Alice McDonald"
name,
%% @doc The adult's age (optional).
age = 0,
%% @doc A list of phone numbers (strings).
%% @example ["512-555-1155", "123-456-7890"]
phone_numbers,
%% @doc The adult's address (optional). There is not pre-defined format,
%% this is a string that can be formatted s desired (linebreaks are ok,
%% for example).
%% @example
%%
%% "123 Grant Drive
%% Plainsville, TX, 78707"
address = "",
%% @doc The adult's email address as a string.
%% @example "john_smith@mailco.com"
email = ""}).
%% @doc An entry recording a person's attendance.
-record(vbs_attendance, {
%% @doc A unique number. This is the record's primary identification and the
%% table's primary key.
id,
%% @doc The id of person who attended. This is a foreign key onto either the
%% [`vbs_worker`](doc://records/vbs_worker) or
%% [`vbs_child`](doc://records/vbs_child) table, depending on the value of
%% the [`person_type`](doc://records/vbs_attendance/person_type) field.
person_id,
%% @doc The type of person who attended. This determines which table the
%% [`person_id`](doc://records/vbs_attendance/person_id) links on. The
%% possible values and the corresponding link tables are:
%%
%% ======== ========================================
%% `child` [`vbs_child`](doc://records/vbs_child)
%% `worker` [`vbs_worker`](doc://records/vbs_worker)
%% ======== ========================================
%%
person_type,
%% @doc The date of attendance, stored as {Year, Month, Day}.
%% @example {2011, 6, 14}
date = {1900, 1, 1},
%% @doc A timestamp taken when the person was signed in, stored as
%% {Hour, Minute, Second}
%% @example {5, 22, 13}
sign_in = false, % {hour, minute, second}
%% @doc A timestamp taken when the person is signed out, stored as
%% {Hour, Minute, Second}
sign_out = false, % {hour, minute, second}
%% @doc A list of {Key, Value} pairs that can be used to store additional
%% information. This is intended to allow callers to store optional data,
%% or client-specific data, without having to alter the database schema.
%% When working with `vbs_attendance` records, a caller should ignore
%% `ext_data` values it does not understand
ext_data = [],
%% @doc Any comments for the day about this person.
comments = ""}).
%% @doc Information about a child in the VBS program.
-record(vbs_child, {
%% @doc A unique number. This is the record's primary identification and the
%% table's primary key.
id,
%% @doc The id of the crew to which this child has been assigned. This is a
%% foreign key linking to a [`vbs_crew.id`](doc://records/vbs_crew/id).
crew_id,
%% @doc The child's full name.
%% @example "Mary Scott", "Gregory Brown"
name,
%% @doc The child's date of birth, stored as {Year, Month, Day}
%% @example {1998, 12, 22}
date_of_birth,
%% @doc The child's gender, either `male` or `female`
gender,
%% @doc The child's grade level in school.
grade,
%% @doc A list of ids representing the child's legal guardians. These link
%% the child record to adult records by the
%% [`vbs_adult.id`](doc://records/vbs_adult/id)
%% @example [4, 5]
guardian_ids,
%% @doc A list of ids, similar to `guardian_ids`, but representing the
%% adults that are allowed to pick the children up. These link the child
%% record to adult records by
%% ['vbs_adult.id`](doc://records/vbs_adult/id).
pickup_ids,
%% @doc A list of ids, similar to `guardian_ids` and `pickup_ids`, but
%% representing adults that should be contacted if there is an emergency
%% involving this child (injury, for example). These link the child record
%% to adult records by [`vbs_adult.id`](doc://records/vbs_adult/id).
emerency_ids,
%% @doc The child's home church, usually used if they are not a member of
%% the hosting church.
home_church,
%% @doc If this child is a visitor, this is used to track who invited them,
%% or who brought them.
visitor_of,
%% @doc Answers the question: Is this child a visitor? Valid values are
%% `true` and `false`.
is_visitor,
%% @doc The date the child registered, stored as {Year, Month, Day}
registration_date,
%% @doc The child's shirt size, stored as a string.
shirt_size,
%% @doc Any special needs this child has that should be accomodated.
special_needs,
%% @doc Any known allergies this child has.
allergies,
%% @doc Additional comments about this child.
comments}).
%% @doc Information about a crew in the VBS system.
-record(vbs_crew, {
%% @doc A unique number. This is the record's primary identification and the
%% table's primary key.
id, % primary key
%% @doc The crew number.
number,
%% @doc The crew type. This is a foreign key on
%% [`vbs_crew_type.id`](doc://records/vbs_crew_type/id).
crew_type_id, % foreign key onto crew_type
%% @doc The name of the crew, stored as a string.
name,
%% @doc Any comments about the crew.
comments = ""}).
%% @doc Information about a crew type. Crew types are often used when a VBS
%% program has seperate activities set up for different types of children
%% (usually based on age). For example, having two type: Elementary and Pre-K
%% is common when there is a seperate set of activities for smaller children.
-record(vbs_crew_type, {
%% @doc A unique number. This is the record's primary identification and the
%% table's primary key.
id,
%% @doc The displayed name of the crew type.
name}).
%% @doc The id counter records are used to keep track of the next valid id for a
%% specific purpose. This is how the unique id fields in other records is
%% implmented.
-record(vbs_id_counter, {
%% @doc A name for the counter. This is the primary key for the table and
%% must be unique.
%% @example `vbs_adult_id`
name, % primary key
%% @doc The next value for this counter.
next_value = 0}).
%% @doc Information about workers involved in the VBS program.
-record(vbs_worker, {
%% @doc A unique number. This is the record's primary identification and the
%% table's primary key.
id,
%% @doc Links this worker record to a [`vbs_adult`](doc://records/vbs_adult)
adult_id, % foreign key on adult
%% @doc The crew this worker is assigned to. This is a link to
%% [`vbs_crew.id`](doc://records/vbs_crew/id). The most common way to deal
%% with workers who are not assigned to a particular crew is to create a
%% special administrative crew and assign all these workers to that crew.
crew_id = 0,
%% @doc
worker_type_id, % foreign key on worker_type
%% @doc
shirt_size,
%% @doc
ext_data = []}).
-record(vbs_worker_type, {
id, % primary key
name}).

View File

@ -1,5 +1,6 @@
package com.jdblabs.jlp
import com.jdblabs.jlp.ast.ASTNode
import org.parboiled.Parboiled
import org.parboiled.parserunners.ReportingParseRunner
@ -31,8 +32,10 @@ public class JLPMain {
def files = filenames.collect { new File(it) }
// -------- parse input -------- //
Map parsed = files.inject([:]) { docContext, file ->
inst.parse(new File(file), docContext) }
Map parsedFiles = files.inject([:]) { acc, file ->
def parsed = inst.parse(new File(file))
acc[file.canonicalPath] = parsed
return acc }
// -------- generate output -------- //
}
@ -41,11 +44,14 @@ public class JLPMain {
parser = Parboiled.createParser(JLPPegParser.class)
}
public Map parse(File inputFile, Map docCtx) {
public Map parse(File inputFile) {
def parseRunner = new ReportingParseRunner(parser.SourceFile())
// parse the file
def firstPass = parseRunner.run(inputFile)
return parseRunner.run(inputFile).resultValue
}
public def generate(def emitter, List<ASTNode> blocks) {
// second pass, semantics
}
}

View File

@ -10,7 +10,7 @@ import org.parboiled.Rule;
import org.parboiled.annotations.*;
import static com.jdblabs.jlp.ast.TextBlock.makeCodeBlock;
import static com.jdblabs.jlp.ast.TextBlock.makeMarkdownBlock;
import static com.jdblabs.jlp.ast.TextBlock.makeTextBlock;
@BuildParseTree
public class JLPPegParser extends BaseParser<Object> {
@ -20,7 +20,7 @@ public class JLPPegParser extends BaseParser<Object> {
public Rule SourceFile() {
return Sequence(
clearLineCount(),
push(new ArrayList<Object>()),
push(new ArrayList<ASTNode>()),
ZeroOrMore(Sequence(
FirstOf(
DocBlock(),
@ -29,22 +29,19 @@ public class JLPPegParser extends BaseParser<Object> {
/**
* Parses the rule:
* DocBlock = DirectiveBlock / MarkdownBlock
* DocBlock = (DirectiveBlock / DocTextBlock)+
*
* Pushes a DocBlock object onto the stack.
*/
Rule DocBlock() {
return Sequence(
push(new ArrayList<ASTNode>()),
OneOrMore(Sequence(
push(new DocBlock(curLineNum)),
OneOrMore(
FirstOf(
DirectiveBlock(),
MarkdownBlock()),
// stack is now: [List<ASTNode>, BlockValue *top*]
// pop the Block, then List, pass to helper to add the
// Block to the list, then push the List back on
push(addToList((ASTNode)pop(), (List<ASTNode>)pop()))))); }
Sequence(DirectiveBlock(),
push(addDirectiveBlock((Directive) pop(), (DocBlock) pop()))),
Sequence(DocTextBlock(),
push(addTextBlock((TextBlock) pop(), (DocBlock) pop())))))); }
/**
* Parses the rule:
@ -55,13 +52,15 @@ public class JLPPegParser extends BaseParser<Object> {
Rule CodeBlock() {
return Sequence(
push(curLineNum),
TestNot(DOC_START), RemainingLine(), push(match()),
ZeroOrMore(Sequence(
TestNot(DOC_START), RemainingLine(),
push(popAsString() + match()))),
push(""),
OneOrMore(FirstOf(
Sequence(
TestNot(DOC_START), RemainingLine(),
push(popAsString() + match())),
Sequence(EmptyLine(),
push(popAsString() + match())))),
push(makeCodeBlock(popAsString(),popAsInt()))); }
push(makeCodeBlock(popAsString(), popAsInt()))); }
/**
* Parses the rule:
* DirectiveBlock =
@ -77,7 +76,7 @@ public class JLPPegParser extends BaseParser<Object> {
/**
* Parses the rule:
* LongDirective =
* (AUTHOR_DIR / DOC_DIR / EXAMPLE_DIR) RemainingLine MarkdownBlock?
* (AUTHOR_DIR / DOC_DIR / EXAMPLE_DIR) RemainingLine DocTextBlock?
*
* Pushes a Directive object onto the value stack.
*/
@ -87,7 +86,7 @@ public class JLPPegParser extends BaseParser<Object> {
FirstOf(AUTHOR_DIR, DOC_DIR, EXAMPLE_DIR), push(match()),
RemainingLine(), push(match()),
Optional(Sequence(
MarkdownBlock(), // pushes block
DocTextBlock(), // pushes block
swap(),
push(popAsString() + ((TextBlock) pop()).value))),
@ -112,29 +111,29 @@ public class JLPPegParser extends BaseParser<Object> {
/**
* Parses the rule:
* MarkdownBlock = MarkdownLine+
* DocTextBlock = DocTextLine+
*
* Pushes a MarkdownBlock onto the stack as a string.
* Pushes a DocTextBlock onto the stack as a string.
*/
Rule MarkdownBlock() {
Rule DocTextBlock() {
return Sequence(
push(curLineNum),
MarkdownLine(), // pushes the value onto the stack
DocTextLine(), // pushes the value onto the stack
ZeroOrMore(Sequence(
MarkdownLine(),
DocTextLine(),
swap(),
push(popAsString() + popAsString()))),
push(makeMarkdownBlock(popAsString(), popAsInt()))); }
push(makeTextBlock(popAsString(), popAsInt()))); }
/**
* Parses the rule:
* MarkdownLine =
* DocTextLine =
* DOC_START !DIRECTIVE_START RemainingLine
*
* Pushes the line value (not including the DOC_START) onto the stack.
*/
Rule MarkdownLine() {
Rule DocTextLine() {
return Sequence(
DOC_START, TestNot(DIRECTIVE_START),
RemainingLine(), push(match())); }
@ -145,13 +144,17 @@ public class JLPPegParser extends BaseParser<Object> {
*/
@SuppressSubnodes
Rule RemainingLine() {
return Sequence(OneOrMore(NOT_EOL), EOL, incLineCount()); }
return Sequence(OneOrMore(NOT_EOL), FirstOf(EOL, EOI), incLineCount()); }
Rule EmptyLine() {
return Sequence(EOL, incLineCount()); }
Rule DOC_START = String("%% ");
Rule EOL = FirstOf(Ch('\n'), EOI);
Rule EOL = Ch('\n');
Rule NOT_EOL = Sequence(TestNot(EOL), ANY);
Rule DIRECTIVE_START= Ch('@');
Rule SLASH = Ch('/');
Rule SPACE = AnyOf(" \t");
// directive terminals
Rule AUTHOR_DIR = IgnoreCase("author");
@ -175,4 +178,12 @@ public class JLPPegParser extends BaseParser<Object> {
boolean clearLineCount() { curLineNum = 1; return true; }
boolean incLineCount() { curLineNum++; return true; }
boolean echo(String msg) { System.out.println(msg); return true; }
static DocBlock addDirectiveBlock(Directive dir, DocBlock docBlock) {
docBlock.directives.add(dir); return docBlock; }
static DocBlock addTextBlock(TextBlock tb, DocBlock docBlock) {
docBlock.textBlocks.add(tb); return docBlock; }
}

View File

@ -0,0 +1,18 @@
package com.jdblabs.jlp.ast
import java.util.ArrayList
import java.util.List
public class DocBlock implements ASTNode {
public final int lineNumber
public List<Directive> directives = new ArrayList<Directive>()
public List<TextBlock> textBlocks = new ArrayList<TextBlock>()
public DocBlock(int lineNumber) { this.lineNumber = lineNumber }
public int getLineNumber() { lineNumber }
public String toString() {
"[DocBlock: Directives ${directives}, TextBlocks ${textBlocks}]" }
}

View File

@ -2,7 +2,7 @@ package com.jdblabs.jlp.ast
public class TextBlock implements ASTNode {
public static enum TextBlockType { MarkdownBlock, CodeBlock }
public static enum TextBlockType { TextBlock, CodeBlock }
public final TextBlockType type
public final String value
@ -17,8 +17,8 @@ public class TextBlock implements ASTNode {
public String toString() { return "[${type}(${lineNumber}): ${value}]" }
public static TextBlock makeMarkdownBlock(String value, int lineNumber) {
return new TextBlock(TextBlockType.MarkdownBlock, value, lineNumber) }
public static TextBlock makeTextBlock(String value, int lineNumber) {
return new TextBlock(TextBlockType.TextBlock, value, lineNumber) }
public static TextBlock makeCodeBlock(String value, int lineNumber) {
return new TextBlock(TextBlockType.CodeBlock, value, lineNumber) }