Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
95.83% |
23 / 24 |
|
66.67% |
2 / 3 |
CRAP | |
0.00% |
0 / 1 |
| HtmlWordCounter | |
95.83% |
23 / 24 |
|
66.67% |
2 / 3 |
8 | |
0.00% |
0 / 1 |
| __construct | n/a |
0 / 0 |
n/a |
0 / 0 |
1 | |||||
| isSupported | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
| extractAttributeValues | |
83.33% |
5 / 6 |
|
0.00% |
0 / 1 |
3.04 | |||
| countFromString | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
2 | |||
| 1 | <?php |
| 2 | namespace Apie\CountWords\Strategies; |
| 3 | |
| 4 | use Apie\CountWords\Strategies\Concerns\UseResourceForFile; |
| 5 | use Apie\CountWords\Strategies\Concerns\UseStringForResource; |
| 6 | use Apie\CountWords\WordCounter; |
| 7 | use DOMDocument; |
| 8 | use DOMElement; |
| 9 | use DOMNodeList; |
| 10 | use DOMXPath; |
| 11 | |
| 12 | final class HtmlWordCounter implements WordCounterInterface |
| 13 | { |
| 14 | use UseStringForResource; |
| 15 | use UseResourceForFile; |
| 16 | |
| 17 | private const INLINE_ELEMENTS = [ |
| 18 | 'a', |
| 19 | 'abbr', |
| 20 | 'acronym', |
| 21 | 'b', |
| 22 | 'bdo', |
| 23 | 'big', |
| 24 | 'button', |
| 25 | 'cite', |
| 26 | 'code', |
| 27 | 'dfn', |
| 28 | 'em', |
| 29 | 'i', |
| 30 | 'img', |
| 31 | 'input', |
| 32 | 'kbd', |
| 33 | 'label', |
| 34 | 'map', |
| 35 | 'object', |
| 36 | 'output', |
| 37 | 'q', |
| 38 | 'samp', |
| 39 | 'script', |
| 40 | 'select', |
| 41 | 'small', |
| 42 | 'span', |
| 43 | 'strong', |
| 44 | 'sub', |
| 45 | 'sup', |
| 46 | 'textarea', |
| 47 | 'time', |
| 48 | 'tt', |
| 49 | 'var', |
| 50 | ]; |
| 51 | |
| 52 | /** |
| 53 | * @codeCoverageIgnore |
| 54 | */ |
| 55 | private function __construct() |
| 56 | { |
| 57 | } |
| 58 | |
| 59 | public static function isSupported(?string $fileExtension, ?string $mimeType): bool |
| 60 | { |
| 61 | return in_array($fileExtension, ['html', 'xhtml', 'htm', 'svg']) || in_array($mimeType, ['text/html', 'application/xhtml+xml', 'image/svg+xml']); |
| 62 | } |
| 63 | |
| 64 | /** |
| 65 | * @param DOMNodeList<DOMElement> $nodes |
| 66 | * @return array<int, string> |
| 67 | */ |
| 68 | private static function extractAttributeValues(DOMNodeList|false $nodes, string $attribute): array |
| 69 | { |
| 70 | if (!$nodes) { |
| 71 | return []; |
| 72 | } |
| 73 | $values = []; |
| 74 | foreach ($nodes as $node) { |
| 75 | $values[] = (string) $node->getAttribute($attribute); |
| 76 | } |
| 77 | return $values; |
| 78 | } |
| 79 | |
| 80 | public static function countFromString(string $text, array $counts = []): array |
| 81 | { |
| 82 | $dom = new DOMDocument(); |
| 83 | libxml_use_internal_errors(true); |
| 84 | $dom->loadHTML($text); |
| 85 | libxml_clear_errors(); |
| 86 | |
| 87 | $xpath = new DOMXPath($dom); |
| 88 | |
| 89 | $altNodes = $xpath->query("//*[@alt]"); |
| 90 | $titleNodes = $xpath->query("//*[@title]"); |
| 91 | $labelNodes = $xpath->query("//*[@label]"); |
| 92 | |
| 93 | $altTexts = self::extractAttributeValues($altNodes, 'alt'); |
| 94 | $titleTexts = self::extractAttributeValues($titleNodes, 'title'); |
| 95 | $labelTexts = self::extractAttributeValues($labelNodes, 'label'); |
| 96 | |
| 97 | $allTexts = array_merge($altTexts, $titleTexts, $labelTexts); |
| 98 | |
| 99 | foreach ($allTexts as $attributeText) { |
| 100 | $counts = WordCounter::countFromString($attributeText, $counts); |
| 101 | } |
| 102 | |
| 103 | $text = strip_tags(str_replace(['<', '>'], [' <', '> '], $text), self::INLINE_ELEMENTS); |
| 104 | $text = html_entity_decode(strip_tags(str_replace([' <', '> '], ['<', '>'], $text))); |
| 105 | |
| 106 | return WordCounter::countFromString($text, $counts); |
| 107 | } |
| 108 | } |