Compiler.pm 4.94 KB
Newer Older
root's avatar
root committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
package Pegex::Compiler;
use Pegex::Base;

use Pegex::Parser;
use Pegex::Pegex::Grammar;
use Pegex::Pegex::AST;
use Pegex::Grammar::Atoms;

has tree => ();

sub compile {
    my ($self, $grammar, @rules) = @_;

    # Global request to use the Pegex bootstrap compiler
    if ($Pegex::Bootstrap) {
        require Pegex::Bootstrap;
        $self = Pegex::Bootstrap->new;
    }

    @rules = map { s/-/_/g; $_ } @rules;

    $self->parse($grammar);
    $self->combinate(@rules);
    $self->native;

    return $self;
}

sub parse {
    my ($self, $input) = @_;

    my $parser = Pegex::Parser->new(
        grammar => Pegex::Pegex::Grammar->new,
        receiver => Pegex::Pegex::AST->new,
    );

    $self->{tree} = $parser->parse($input);

    return $self;
}

#-----------------------------------------------------------------------------#
# Combination
#-----------------------------------------------------------------------------#
has _tree => ();

sub combinate {
    my ($self, @rule) = @_;
    if (not @rule) {
        if (my $rule = $self->{tree}->{'+toprule'}) {
            @rule = ($rule);
        }
        else {
            return $self;
        }
    }
    $self->{_tree} = {
        map {($_, $self->{tree}->{$_})} grep { /^\+/ } keys %{$self->{tree}}
    };
    for my $rule (@rule) {
        $self->combinate_rule($rule);
    }
    $self->{tree} = $self->{_tree};
    delete $self->{_tree};
    return $self;
}

sub combinate_rule {
    my ($self, $rule) = @_;
    return if exists $self->{_tree}->{$rule};

    my $object = $self->{_tree}->{$rule} = $self->{tree}->{$rule};
    $self->combinate_object($object);
}

sub combinate_object {
    my ($self, $object) = @_;
    if (exists $object->{'.rgx'}) {
        $self->combinate_re($object);
    }
    elsif (exists $object->{'.ref'}) {
        my $rule = $object->{'.ref'};
        if (exists $self->{tree}{$rule}) {
            $self->combinate_rule($rule);
        }
        else {
            if (my $regex = (Pegex::Grammar::Atoms::atoms)->{$rule}) {
                $self->{tree}{$rule} = { '.rgx' => $regex };
                $self->combinate_rule($rule);
            }
        }
    }
    elsif (exists $object->{'.any'}) {
        for my $elem (@{$object->{'.any'}}) {
            $self->combinate_object($elem);
        }
    }
    elsif (exists $object->{'.all' }) {
        for my $elem (@{$object->{'.all'}}) {
            $self->combinate_object($elem);
        }
    }
    elsif (exists $object->{'.err' }) {
    }
    else {
root's avatar
root committed
106 107 108 109
        require YAML::PP;
        die "Can't combinate:\n" . YAML::PP
            ->new(schema => ['Core', 'Perl'])
            ->dump_string($object);
root's avatar
root committed
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
    }
}

sub combinate_re {
    my ($self, $regexp) = @_;
    my $atoms = Pegex::Grammar::Atoms->atoms;
    my $re = $regexp->{'.rgx'};
    while (1) {
        $re =~ s[(?<!\\)(~+)]['<ws' . length($1) . '>']ge;
        $re =~ s[<([\w\-]+)>][
            (my $key = $1) =~ s/-/_/g;
            $self->{tree}->{$key} and (
                $self->{tree}->{$key}{'.rgx'} or
                die "'$key' not defined as a single RE"
            )
            or $atoms->{$key}
            or die "'$key' not defined in the grammar"
        ]e;
        last if $re eq $regexp->{'.rgx'};
        $regexp->{'.rgx'} = $re;
    }
}

#-----------------------------------------------------------------------------#
# Compile to native Perl regexes
#-----------------------------------------------------------------------------#
sub native {
    my ($self) = @_;
    $self->perl_regexes($self->{tree});
    return $self;
}

sub perl_regexes {
    my ($self, $node) = @_;
    if (ref($node) eq 'HASH') {
        if (exists $node->{'.rgx'}) {
            my $re = $node->{'.rgx'};
            $node->{'.rgx'} = qr/\G$re/;
        }
        else {
            for (keys %$node) {
                $self->perl_regexes($node->{$_});
            }
        }
    }
    elsif (ref($node) eq 'ARRAY') {
        $self->perl_regexes($_) for @$node;
    }
}

#-----------------------------------------------------------------------------#
# Serialization formatter methods
#-----------------------------------------------------------------------------#
sub to_yaml {
root's avatar
root committed
164
    require YAML::PP;
root's avatar
root committed
165
    my $self = shift;
root's avatar
root committed
166 167 168
    YAML::PP
        ->new(schema => ['Core', 'Perl'])
        ->dump_string($self->tree);
root's avatar
root committed
169 170 171
}

sub to_json {
root's avatar
root committed
172
    require JSON::PP;
root's avatar
root committed
173
    my $self = shift;
root's avatar
root committed
174 175 176 177 178 179
    JSON::PP
        ->new
        ->utf8
        ->canonical
        ->pretty
        ->encode($self->tree);
root's avatar
root committed
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
}

sub to_perl {
    my $self = shift;
    require Data::Dumper;
    no warnings 'once';
    $Data::Dumper::Terse = 1;
    $Data::Dumper::Indent = 1;
    $Data::Dumper::Sortkeys = 1;
    my $perl = Data::Dumper::Dumper($self->tree);
    $perl =~ s/\?\^u?:/?-xism:/g; # the "u" is perl 5.14-18 equiv of /u
    $perl =~ s!(\.rgx.*?qr/)\(\?-xism:(.*)\)(?=/)!$1$2!g;
    $perl =~ s!/u$!/!gm; # perl 5.20+ put /u, older perls don't understand
    die "to_perl failed with non compatible regex in:\n$perl"
        if $perl =~ /\?\^/;
    return $perl;
}

1;