Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
98.77% |
80 / 81 |
|
90.00% |
9 / 10 |
CRAP | |
0.00% |
0 / 1 |
RegexStream | |
98.77% |
80 / 81 |
|
90.00% |
9 / 10 |
31 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
nextToken | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 | |||
getIterator | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
createRepetition | |
100.00% |
32 / 32 |
|
100.00% |
1 / 1 |
11 | |||
createStaticCharacterMarker | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
createEscapedCharacter | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
createStartMarker | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
createCaptureGroup | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
6 | |||
createAnyMatch | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
4 | |||
createEndMarker | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | namespace Apie\RegexTools; |
3 | |
4 | use Apie\RegexTools\Parts\AnyMatch; |
5 | use Apie\RegexTools\Parts\CaptureGroup; |
6 | use Apie\RegexTools\Parts\EndOfRegex; |
7 | use Apie\RegexTools\Parts\EscapedCharacter; |
8 | use Apie\RegexTools\Parts\MatchOrMatch; |
9 | use Apie\RegexTools\Parts\OptionalToken; |
10 | use Apie\RegexTools\Parts\RegexPartInterface; |
11 | use Apie\RegexTools\Parts\RepeatToken; |
12 | use Apie\RegexTools\Parts\RepetitionToken; |
13 | use Apie\RegexTools\Parts\StartOfRegex; |
14 | use Apie\RegexTools\Parts\StaticCharacter; |
15 | use IteratorAggregate; |
16 | use Traversable; |
17 | |
18 | final class RegexStream implements IteratorAggregate |
19 | { |
20 | const METHODMAP = [ |
21 | '^' => 'createStartMarker', |
22 | '$' => 'createEndMarker', |
23 | '\\' => 'createEscapedCharacter', |
24 | '(' => 'createCaptureGroup', |
25 | '[' => 'createAnyMatch', |
26 | ]; |
27 | |
28 | private ?RegexPartInterface $previousPart = null; |
29 | |
30 | private string $fullRegex; |
31 | |
32 | public function __construct( |
33 | private string $regexToStream |
34 | ) { |
35 | $this->fullRegex = $regexToStream; |
36 | } |
37 | |
38 | public function nextToken(): ?RegexPartInterface |
39 | { |
40 | $firstCharacter = substr($this->regexToStream, 0, 1); |
41 | if ($firstCharacter === '') { |
42 | return null; |
43 | } |
44 | $method = self::METHODMAP[$firstCharacter] ?? 'createStaticCharacterMarker'; |
45 | /** @var RegexPartInterface */ |
46 | $part = $this->$method(); |
47 | $this->regexToStream = substr($this->regexToStream, $part->getRegexStringLength()); |
48 | $part = $this->createRepetition($part); |
49 | $this->previousPart = $part; |
50 | |
51 | return $part; |
52 | } |
53 | |
54 | public function getIterator(): Traversable |
55 | { |
56 | return new RegexPartIterator($this->fullRegex); |
57 | } |
58 | |
59 | private function createRepetition(RegexPartInterface $part): RegexPartInterface |
60 | { |
61 | $firstCharacter = substr($this->regexToStream, 0, 1); |
62 | if ($firstCharacter === '*') { |
63 | $this->regexToStream = substr($this->regexToStream, 1); |
64 | $part = new RepetitionToken($part); |
65 | return $this->createRepetition($part); |
66 | } |
67 | if ($firstCharacter === '+') { |
68 | $this->regexToStream = substr($this->regexToStream, 1); |
69 | $part = new RepetitionToken($part, true); |
70 | return $this->createRepetition($part); |
71 | } |
72 | if ($firstCharacter === '?') { |
73 | $this->regexToStream = substr($this->regexToStream, 1); |
74 | $part = new OptionalToken($part); |
75 | return $this->createRepetition($part); |
76 | } |
77 | if ($firstCharacter === '|') { |
78 | $part = new MatchOrMatch( |
79 | [$part], |
80 | iterator_to_array(new self(substr($this->regexToStream, 1))) |
81 | ); |
82 | $this->regexToStream = ''; |
83 | } |
84 | if ($firstCharacter === '{') { |
85 | if (preg_match('/^\{\s*(\d*)\s*,\s*(\d*)\s*\}/', $this->regexToStream, $matches)) { |
86 | $this->regexToStream = substr($this->regexToStream, strlen($matches[0])); |
87 | $minimum = $matches[1] === '' ? null : intval($matches[1]); |
88 | $maximum = $matches[2] === '' ? null : intval($matches[2]); |
89 | $part = new RepeatToken($part, $minimum, $maximum, $matches[0]); |
90 | return $this->createRepetition($part); |
91 | } |
92 | if (preg_match('/^\{\s*(\d*)\s*\}/', $this->regexToStream, $matches)) { |
93 | $this->regexToStream = substr($this->regexToStream, strlen($matches[0])); |
94 | $repeatCount = $matches[1] === '' ? null : intval($matches[1]); |
95 | $part = new RepeatToken($part, $repeatCount, $repeatCount, $matches[0]); |
96 | return $this->createRepetition($part); |
97 | } |
98 | // first character is { without } or invalid format => assume static { |
99 | } |
100 | return $part; |
101 | } |
102 | |
103 | private function createStaticCharacterMarker(): RegexPartInterface |
104 | { |
105 | return new StaticCharacter(substr($this->regexToStream, 0, 1)); |
106 | } |
107 | |
108 | private function createEscapedCharacter(): RegexPartInterface |
109 | { |
110 | if (strlen($this->regexToStream) === 1) { |
111 | return new StaticCharacter('\\'); |
112 | } |
113 | |
114 | return new EscapedCharacter(substr($this->regexToStream, 1, 1)); |
115 | } |
116 | |
117 | private function createStartMarker(): RegexPartInterface |
118 | { |
119 | if ($this->previousPart) { |
120 | return $this->createStaticCharacterMarker(); |
121 | } |
122 | return new StartOfRegex(); |
123 | } |
124 | |
125 | private function createCaptureGroup(): RegexPartInterface |
126 | { |
127 | $ptr = 1; |
128 | $counter = 1; |
129 | while ($ptr < strlen($this->regexToStream)) { |
130 | $character = substr($this->regexToStream, $ptr, 1); |
131 | if ($character === '\\') { |
132 | $ptr++; |
133 | } |
134 | $ptr++; |
135 | if ($character === ')') { |
136 | $counter--; |
137 | if ($counter === 0) { |
138 | break; |
139 | } |
140 | } elseif ($character === '(') { |
141 | $counter++; |
142 | } |
143 | } |
144 | $insideCaptureGroup = substr($this->regexToStream, 1, $ptr - 2); |
145 | return new CaptureGroup( |
146 | iterator_to_array(new self($insideCaptureGroup)) |
147 | ); |
148 | } |
149 | |
150 | private function createAnyMatch(): RegexPartInterface |
151 | { |
152 | $ptr = 1; |
153 | while ($ptr < strlen($this->regexToStream)) { |
154 | $character = substr($this->regexToStream, $ptr, 1); |
155 | if ($character === '\\') { |
156 | $ptr+=2; |
157 | continue; |
158 | } |
159 | $ptr++; |
160 | if ($character === ']') { |
161 | break; |
162 | } |
163 | } |
164 | $insideAnyMatch = substr($this->regexToStream, 1, $ptr - 2); |
165 | return new AnyMatch( |
166 | $insideAnyMatch |
167 | ); |
168 | } |
169 | |
170 | private function createEndMarker(): RegexPartInterface |
171 | { |
172 | return new EndOfRegex(); |
173 | } |
174 | } |