1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
|
package urn
import (
"fmt"
)
var (
errPrefix = "expecting the prefix to be the \"urn\" string (whatever case) [col %d]"
errIdentifier = "expecting the identifier to be string (1..31 alnum chars, also containing dashes but not at its start) [col %d]"
errSpecificString = "expecting the specific string to be a string containing alnum, hex, or others ([()+,-.:=@;$_!*']) chars [col %d]"
errNoUrnWithinID = "expecting the identifier to not contain the \"urn\" reserved string [col %d]"
errHex = "expecting the specific string hex chars to be well-formed (%%alnum{2}) [col %d]"
errParse = "parsing error [col %d]"
)
%%{
machine urn;
# unsigned alphabet
alphtype uint8;
action mark {
m.pb = m.p
}
action tolower {
m.tolower = append(m.tolower, m.p - m.pb)
}
action set_pre {
output.prefix = string(m.text())
}
action set_nid {
output.ID = string(m.text())
}
action set_nss {
raw := m.text()
output.SS = string(raw)
// Iterate upper letters lowering them
for _, i := range m.tolower {
raw[i] = raw[i] + 32
}
output.norm = string(raw)
}
action err_pre {
m.err = fmt.Errorf(errPrefix, m.p)
fhold;
fgoto fail;
}
action err_nid {
m.err = fmt.Errorf(errIdentifier, m.p)
fhold;
fgoto fail;
}
action err_nss {
m.err = fmt.Errorf(errSpecificString, m.p)
fhold;
fgoto fail;
}
action err_urn {
m.err = fmt.Errorf(errNoUrnWithinID, m.p)
fhold;
fgoto fail;
}
action err_hex {
m.err = fmt.Errorf(errHex, m.p)
fhold;
fgoto fail;
}
action err_parse {
m.err = fmt.Errorf(errParse, m.p)
fhold;
fgoto fail;
}
pre = ([uU][rR][nN] @err(err_pre)) >mark %set_pre;
nid = (alnum >mark (alnum | '-'){0,31}) %set_nid;
hex = '%' (digit | lower | upper >tolower){2} $err(err_hex);
sss = (alnum | [()+,\-.:=@;$_!*']);
nss = (sss | hex)+ $err(err_nss);
fail := (any - [\n\r])* @err{ fgoto main; };
main := (pre ':' (nid - pre %err(err_urn)) $err(err_nid) ':' nss >mark %set_nss) $err(err_parse);
}%%
%% write data noerror noprefix;
// Machine is the interface representing the FSM
type Machine interface {
Error() error
Parse(input []byte) (*URN, error)
}
type machine struct {
data []byte
cs int
p, pe, eof, pb int
err error
tolower []int
}
// NewMachine creates a new FSM able to parse RFC 2141 strings.
func NewMachine() Machine {
m := &machine{}
%% access m.;
%% variable p m.p;
%% variable pe m.pe;
%% variable eof m.eof;
%% variable data m.data;
return m
}
// Err returns the error that occurred on the last call to Parse.
//
// If the result is nil, then the line was parsed successfully.
func (m *machine) Error() error {
return m.err
}
func (m *machine) text() []byte {
return m.data[m.pb:m.p]
}
// Parse parses the input byte array as a RFC 2141 string.
func (m *machine) Parse(input []byte) (*URN, error) {
m.data = input
m.p = 0
m.pb = 0
m.pe = len(input)
m.eof = len(input)
m.err = nil
m.tolower = []int{}
output := &URN{}
%% write init;
%% write exec;
if m.cs < first_final || m.cs == en_fail {
return nil, m.err
}
return output, nil
}
|