Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
95.83% |
23 / 24 |
|
66.67% |
2 / 3 |
CRAP | |
0.00% |
0 / 1 |
HtmlWordCounter | |
95.83% |
23 / 24 |
|
66.67% |
2 / 3 |
8 | |
0.00% |
0 / 1 |
__construct | n/a |
0 / 0 |
n/a |
0 / 0 |
1 | |||||
isSupported | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
extractAttributeValues | |
83.33% |
5 / 6 |
|
0.00% |
0 / 1 |
3.04 | |||
countFromString | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | namespace Apie\CountWords\Strategies; |
3 | |
4 | use Apie\CountWords\Strategies\Concerns\UseResourceForFile; |
5 | use Apie\CountWords\Strategies\Concerns\UseStringForResource; |
6 | use Apie\CountWords\WordCounter; |
7 | use DOMDocument; |
8 | use DOMElement; |
9 | use DOMNodeList; |
10 | use DOMXPath; |
11 | |
12 | final class HtmlWordCounter implements WordCounterInterface |
13 | { |
14 | use UseStringForResource; |
15 | use UseResourceForFile; |
16 | |
17 | private const INLINE_ELEMENTS = [ |
18 | 'a', |
19 | 'abbr', |
20 | 'acronym', |
21 | 'b', |
22 | 'bdo', |
23 | 'big', |
24 | 'button', |
25 | 'cite', |
26 | 'code', |
27 | 'dfn', |
28 | 'em', |
29 | 'i', |
30 | 'img', |
31 | 'input', |
32 | 'kbd', |
33 | 'label', |
34 | 'map', |
35 | 'object', |
36 | 'output', |
37 | 'q', |
38 | 'samp', |
39 | 'script', |
40 | 'select', |
41 | 'small', |
42 | 'span', |
43 | 'strong', |
44 | 'sub', |
45 | 'sup', |
46 | 'textarea', |
47 | 'time', |
48 | 'tt', |
49 | 'var', |
50 | ]; |
51 | |
52 | /** |
53 | * @codeCoverageIgnore |
54 | */ |
55 | private function __construct() |
56 | { |
57 | } |
58 | |
59 | public static function isSupported(?string $fileExtension, ?string $mimeType): bool |
60 | { |
61 | return in_array($fileExtension, ['html', 'xhtml', 'htm', 'svg']) || in_array($mimeType, ['text/html', 'application/xhtml+xml', 'image/svg+xml']); |
62 | } |
63 | |
64 | /** |
65 | * @param DOMNodeList<DOMElement> $nodes |
66 | * @return array<int, string> |
67 | */ |
68 | private static function extractAttributeValues(DOMNodeList|false $nodes, string $attribute): array |
69 | { |
70 | if (!$nodes) { |
71 | return []; |
72 | } |
73 | $values = []; |
74 | foreach ($nodes as $node) { |
75 | $values[] = (string) $node->getAttribute($attribute); |
76 | } |
77 | return $values; |
78 | } |
79 | |
80 | public static function countFromString(string $text, array $counts = []): array |
81 | { |
82 | $dom = new DOMDocument(); |
83 | libxml_use_internal_errors(true); |
84 | $dom->loadHTML($text); |
85 | libxml_clear_errors(); |
86 | |
87 | $xpath = new DOMXPath($dom); |
88 | |
89 | $altNodes = $xpath->query("//*[@alt]"); |
90 | $titleNodes = $xpath->query("//*[@title]"); |
91 | $labelNodes = $xpath->query("//*[@label]"); |
92 | |
93 | $altTexts = self::extractAttributeValues($altNodes, 'alt'); |
94 | $titleTexts = self::extractAttributeValues($titleNodes, 'title'); |
95 | $labelTexts = self::extractAttributeValues($labelNodes, 'label'); |
96 | |
97 | $allTexts = array_merge($altTexts, $titleTexts, $labelTexts); |
98 | |
99 | foreach ($allTexts as $attributeText) { |
100 | $counts = WordCounter::countFromString($attributeText, $counts); |
101 | } |
102 | |
103 | $text = strip_tags(str_replace(['<', '>'], [' <', '> '], $text), self::INLINE_ELEMENTS); |
104 | $text = html_entity_decode(strip_tags(str_replace([' <', '> '], ['<', '>'], $text))); |
105 | |
106 | return WordCounter::countFromString($text, $counts); |
107 | } |
108 | } |