Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
95.74% |
90 / 94 |
|
81.82% |
9 / 11 |
CRAP | |
0.00% |
0 / 1 |
AnyMatch | |
95.74% |
90 / 94 |
|
81.82% |
9 / 11 |
34 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
__toString | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getRegexStringLength | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getMinimalPossibleLength | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getMaximumPossibleLength | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
makeAllIncludedArray | |
93.48% |
43 / 46 |
|
0.00% |
0 / 1 |
15.06 | |||
toCaseInsensitive | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 | |||
toDotAll | |
90.91% |
10 / 11 |
|
0.00% |
0 / 1 |
3.01 | |||
createRange | |
100.00% |
16 / 16 |
|
100.00% |
1 / 1 |
5 | |||
renderRange | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 | |||
removeStartAndEndMarkers | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | namespace Apie\RegexTools\Parts; |
3 | |
4 | final class AnyMatch implements RegexPartInterface |
5 | { |
6 | public function __construct( |
7 | public readonly string $part |
8 | ) { |
9 | } |
10 | |
11 | public function __toString(): string |
12 | { |
13 | return '[' . $this->part . ']'; |
14 | } |
15 | |
16 | public function getRegexStringLength(): int |
17 | { |
18 | return 2 + strlen($this->part); |
19 | } |
20 | |
21 | public function getMinimalPossibleLength(): int |
22 | { |
23 | return 1; |
24 | } |
25 | |
26 | public function getMaximumPossibleLength(): int |
27 | { |
28 | return 1; |
29 | } |
30 | |
31 | private function makeAllIncludedArray(?callable $callback): array |
32 | { |
33 | $split = mb_str_split($this->part); |
34 | $included = []; |
35 | $nextEscaped = false; |
36 | $skip = 0; |
37 | if ($split[0] === '^') { |
38 | $skip = 1; |
39 | } |
40 | foreach ($split as $key => $character) { |
41 | if ($skip > 0) { |
42 | $skip--; |
43 | continue; |
44 | } |
45 | // escaped character \ |
46 | if ($character === '\\') { |
47 | if ($nextEscaped) { |
48 | $included[] = '\\\\'; |
49 | $nextEscaped = false; |
50 | continue; |
51 | } |
52 | $nextEscaped = true; |
53 | continue; |
54 | } |
55 | // previous character was \ |
56 | if ($nextEscaped) { |
57 | $included[] = '\\' . $character; |
58 | $nextEscaped = false; |
59 | // todo \p and \P |
60 | continue; |
61 | } |
62 | // range |
63 | if (($split[$key + 1] ?? null) === '-' && !empty($split[$key + 2]) && $split[$key + 2] > $character) { |
64 | $minCode = mb_ord($character); |
65 | $maxCode = mb_ord($split[$key + 2]); |
66 | |
67 | for ($code = $minCode; $code <= $maxCode; $code++) { |
68 | $chr = mb_chr($code); |
69 | foreach ($callback($chr) as $added) { |
70 | $included[] = $added; |
71 | } |
72 | } |
73 | $skip = 2; |
74 | } else { |
75 | foreach ($callback($character) as $added) { |
76 | $included[] = $added; |
77 | } |
78 | } |
79 | } |
80 | $included = array_filter( |
81 | array_map( |
82 | function (string $word) { |
83 | if (mb_substr($word, 0, 1) === '\\') { |
84 | return mb_substr($word, 1); |
85 | } |
86 | return $word; |
87 | }, |
88 | array_unique($included) |
89 | ), |
90 | function (string $input) { |
91 | // for some reason mb_strtoupper('ß') returns 'SS' |
92 | return $input !== '' && $input !== 'SS'; |
93 | } |
94 | ); |
95 | sort($included); |
96 | return $included; |
97 | } |
98 | |
99 | public function toCaseInsensitive(): RegexPartInterface |
100 | { |
101 | $prefix = ''; |
102 | if ($this->part[0] === '^') { |
103 | $prefix = '^'; |
104 | } |
105 | $included = $this->makeAllIncludedArray(function (string $chr): array { |
106 | return [$chr, mb_strtoupper($chr), mb_strtolower($chr)]; |
107 | }); |
108 | return new AnyMatch( |
109 | $prefix . self::createRange($included) |
110 | ); |
111 | } |
112 | |
113 | public function toDotAll(): RegexPartInterface |
114 | { |
115 | $prefix = ''; |
116 | if ($this->part[0] === '^') { |
117 | $prefix = '^'; |
118 | } |
119 | $included = $this->makeAllIncludedArray(function (string $chr): array { |
120 | if ($chr === '.') { |
121 | return ['.', "\n", "\r"]; |
122 | } |
123 | return [$chr]; |
124 | }); |
125 | return new AnyMatch( |
126 | $prefix . self::createRange($included) |
127 | ); |
128 | } |
129 | |
130 | /** |
131 | * @param list<string> $included |
132 | */ |
133 | private static function createRange(array $included): string |
134 | { |
135 | $result = ''; |
136 | $currentRange = null; |
137 | $startRange = null; |
138 | |
139 | while (!empty($included)) { |
140 | $character = array_shift($included); |
141 | if ($startRange === null) { |
142 | $startRange = $character; |
143 | $currentRange = mb_ord($character) + 1; |
144 | } else { |
145 | if (mb_ord($character) === $currentRange) { |
146 | $currentRange++; |
147 | } else { |
148 | $result .= self::renderRange($startRange, $currentRange); |
149 | |
150 | $startRange = $character; |
151 | $currentRange = mb_ord($character) + 1; |
152 | } |
153 | } |
154 | } |
155 | if ($startRange !== null) { |
156 | $result .= self::renderRange($startRange, $currentRange); |
157 | } |
158 | |
159 | return $result; |
160 | } |
161 | |
162 | private static function renderRange(?string $startRange, ?int $currentRange): string |
163 | { |
164 | $last = mb_chr($currentRange - 1); |
165 | if ($last === $startRange) { |
166 | return preg_quote($startRange); |
167 | } |
168 | |
169 | if ($currentRange - 1 === mb_ord($startRange) + 1) { |
170 | return preg_quote($startRange) . preg_quote($last); |
171 | } |
172 | |
173 | return preg_quote($startRange) . '-' . preg_quote($last); |
174 | } |
175 | |
176 | public function removeStartAndEndMarkers(): ?RegexPartInterface |
177 | { |
178 | return $this; |
179 | } |
180 | } |