Commit 6db26e1e authored by shajiaiming's avatar shajiaiming

BOC jpy ticker

parent 5e778b6b
......@@ -37,7 +37,8 @@ class ExchangeBusiness
8 => 'Zg',
9 => 'Go',
10 => 'Zhaobi',
11 => 'Gdpro'
11 => 'Gdpro',
12 => 'Boc'
];
/**
......
......@@ -9,11 +9,12 @@
namespace common\service\exchange;
use linslin\yii2\curl\Curl;
use voku\helper\HtmlDomParser;
class Boc extends Exchange implements ExchangeInterface
{
protected $supported_symbol = 'supported_symbol_boc';
protected $quotation_prefix = 'boc';
protected $quotation_prefix = 'quotation_boc_';
protected $base_url = 'http://srh.bankofchina.com/search/whpj/search.jsp';
public function symbolExists($tag = 'CNY', $aim = "JPY")
......@@ -61,13 +62,16 @@ class Boc extends Exchange implements ExchangeInterface
'pjname' => 1323
])->post('http://srh.bankofchina.com/search/whpj/search.jsp');
var_dump($response);exit;
if (is_array($content) && isset($content['data']) && 200 == $content['code']) {
$data = $content['data'];
$key = $this->quotation_prefix . 'CNY_JPY';
$this->redis->hmset($key, 'low', $data['low'], 'high', $data['high'], 'last', $data['last']);
$this->redis->sadd($this->supported_symbol, 'CNYJPY');
$response = iconv('UTF-8', 'GBK//TRANSLIT',$response);
$html = HtmlDomParser::str_get_html($response);
$div = ($html->find('div.BOC_main'))[0];
foreach ($div->find('td') as $key => $e){
if($key == 5){
$key = $this->quotation_prefix . 'CNY_JPY';
$this->redis->hmset($key, 'low', $e->innertext, 'high', $e->innertext, 'last', $e->innertext);
$this->redis->sadd($this->supported_symbol, 'CNYJPY');
break;
}
}
}
......
......@@ -21,7 +21,8 @@
"kartik-v/yii2-widget-fileinput": "*",
"yiisoft/yii2-redis": "~2.0.0",
"yiisoft/yii2-queue": "~2.0",
"linslin/yii2-curl": "*"
"linslin/yii2-curl": "*",
"voku/simple_html_dom": "^4.5"
},
"require-dev": {
"yiisoft/yii2-debug": "~2.0.0",
......
......@@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "4c1d96df90fb4ff9a976d6268b520242",
"content-hash": "5ba1a32b2897f378910c9c125cb6e133",
"packages": [
{
"name": "bower-asset/bootstrap",
......@@ -1813,6 +1813,59 @@
"time": "2018-07-13T07:04:35+00:00"
},
{
"name": "symfony/css-selector",
"version": "v4.1.1",
"source": {
"type": "git",
"url": "https://github.com/symfony/css-selector.git",
"reference": "03ac71606ecb0b0ce792faa17d74cc32c2949ef4"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/css-selector/zipball/03ac71606ecb0b0ce792faa17d74cc32c2949ef4",
"reference": "03ac71606ecb0b0ce792faa17d74cc32c2949ef4",
"shasum": ""
},
"require": {
"php": "^7.1.3"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "4.1-dev"
}
},
"autoload": {
"psr-4": {
"Symfony\\Component\\CssSelector\\": ""
},
"exclude-from-classmap": [
"/Tests/"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Jean-François Simon",
"email": "jeanfrancois.simon@sensiolabs.com"
},
{
"name": "Fabien Potencier",
"email": "fabien@symfony.com"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "Symfony CssSelector Component",
"homepage": "https://symfony.com",
"time": "2018-05-30T07:26:09+00:00"
},
{
"name": "symfony/process",
"version": "v4.1.1",
"source": {
......@@ -1862,6 +1915,65 @@
"time": "2018-05-31T10:17:53+00:00"
},
{
"name": "voku/simple_html_dom",
"version": "4.5.3",
"source": {
"type": "git",
"url": "https://github.com/voku/simple_html_dom.git",
"reference": "ebc63c36dc75a350c390b51f0fc134daa469adda"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/voku/simple_html_dom/zipball/ebc63c36dc75a350c390b51f0fc134daa469adda",
"reference": "ebc63c36dc75a350c390b51f0fc134daa469adda",
"shasum": ""
},
"require": {
"ext-dom": "*",
"ext-libxml": "*",
"ext-simplexml": "*",
"php": ">=7.0.0",
"symfony/css-selector": "~3.0|~4.0"
},
"require-dev": {
"phpunit/phpunit": "~6.0 || ~7.0"
},
"suggest": {
"voku/portable-utf8": "~4.0"
},
"type": "library",
"autoload": {
"psr-4": {
"voku\\helper\\": "src/voku/helper/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Lars Moelleken",
"homepage": "http://www.moelleken.org/",
"role": "Developer"
},
{
"name": "dimabdc",
"email": "support@titor.ru",
"homepage": "http://github.com/dimabdc",
"role": "Developer"
}
],
"description": "Simple HTML DOM package.",
"homepage": "http://simplehtmldom.sourceforge.net/",
"keywords": [
"HTML Parser",
"dom",
"php dom"
],
"time": "2019-05-20T07:56:13+00:00"
},
{
"name": "yiisoft/yii2",
"version": "2.0.15.1",
"source": {
......@@ -3495,6 +3607,7 @@
"mock",
"xunit"
],
"abandoned": true,
"time": "2018-05-29T13:54:20+00:00"
},
{
......@@ -4236,59 +4349,6 @@
"time": "2018-05-31T10:17:53+00:00"
},
{
"name": "symfony/css-selector",
"version": "v4.1.1",
"source": {
"type": "git",
"url": "https://github.com/symfony/css-selector.git",
"reference": "03ac71606ecb0b0ce792faa17d74cc32c2949ef4"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/css-selector/zipball/03ac71606ecb0b0ce792faa17d74cc32c2949ef4",
"reference": "03ac71606ecb0b0ce792faa17d74cc32c2949ef4",
"shasum": ""
},
"require": {
"php": "^7.1.3"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "4.1-dev"
}
},
"autoload": {
"psr-4": {
"Symfony\\Component\\CssSelector\\": ""
},
"exclude-from-classmap": [
"/Tests/"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Jean-François Simon",
"email": "jeanfrancois.simon@sensiolabs.com"
},
{
"name": "Fabien Potencier",
"email": "fabien@symfony.com"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "Symfony CssSelector Component",
"homepage": "https://symfony.com",
"time": "2018-05-30T07:26:09+00:00"
},
{
"name": "symfony/dom-crawler",
"version": "v4.1.1",
"source": {
......
......@@ -24,6 +24,7 @@ return array(
'yii\\composer\\' => array($vendorDir . '/yiisoft/yii2-composer'),
'yii\\bootstrap\\' => array($vendorDir . '/yiisoft/yii2-bootstrap/src'),
'yii\\' => array($vendorDir . '/yiisoft/yii2'),
'voku\\helper\\' => array($vendorDir . '/voku/simple_html_dom/src/voku/helper'),
'phpDocumentor\\Reflection\\' => array($vendorDir . '/phpdocumentor/reflection-common/src', $vendorDir . '/phpdocumentor/reflection-docblock/src', $vendorDir . '/phpdocumentor/type-resolver/src'),
'linslin\\yii2\\curl\\' => array($vendorDir . '/linslin/yii2-curl'),
'kartik\\widgets\\' => array($vendorDir . '/kartik-v/yii2-widgets'),
......
......@@ -38,6 +38,10 @@ class ComposerStaticInit33057934f3e7eaaa1ce2d53797277936
'yii\\bootstrap\\' => 14,
'yii\\' => 4,
),
'v' =>
array (
'voku\\helper\\' => 12,
),
'p' =>
array (
'phpDocumentor\\Reflection\\' => 25,
......@@ -201,6 +205,10 @@ class ComposerStaticInit33057934f3e7eaaa1ce2d53797277936
array (
0 => __DIR__ . '/..' . '/yiisoft/yii2',
),
'voku\\helper\\' =>
array (
0 => __DIR__ . '/..' . '/voku/simple_html_dom/src/voku/helper',
),
'phpDocumentor\\Reflection\\' =>
array (
0 => __DIR__ . '/..' . '/phpdocumentor/reflection-common/src',
......
......@@ -4434,6 +4434,67 @@
"description": "A small library for converting tokenized PHP source code into XML and potentially other formats"
},
{
"name": "voku/simple_html_dom",
"version": "4.5.3",
"version_normalized": "4.5.3.0",
"source": {
"type": "git",
"url": "https://github.com/voku/simple_html_dom.git",
"reference": "ebc63c36dc75a350c390b51f0fc134daa469adda"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/voku/simple_html_dom/zipball/ebc63c36dc75a350c390b51f0fc134daa469adda",
"reference": "ebc63c36dc75a350c390b51f0fc134daa469adda",
"shasum": ""
},
"require": {
"ext-dom": "*",
"ext-libxml": "*",
"ext-simplexml": "*",
"php": ">=7.0.0",
"symfony/css-selector": "~3.0|~4.0"
},
"require-dev": {
"phpunit/phpunit": "~6.0 || ~7.0"
},
"suggest": {
"voku/portable-utf8": "~4.0"
},
"time": "2019-05-20T07:56:13+00:00",
"type": "library",
"installation-source": "dist",
"autoload": {
"psr-4": {
"voku\\helper\\": "src/voku/helper/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Lars Moelleken",
"homepage": "http://www.moelleken.org/",
"role": "Developer"
},
{
"name": "dimabdc",
"email": "support@titor.ru",
"homepage": "http://github.com/dimabdc",
"role": "Developer"
}
],
"description": "Simple HTML DOM package.",
"homepage": "http://simplehtmldom.sourceforge.net/",
"keywords": [
"HTML Parser",
"dom",
"php dom"
]
},
{
"name": "webmozart/assert",
"version": "1.3.0",
"version_normalized": "1.3.0.0",
......
[PHP Simple HTML Dom v4.5.x]
1: fix -> return types
2: add abstract class and interface for "Dom Elements" (SimpleHtmlDom*)
3: and abstract class and interface for "Dom Nodes" (SimpleHtmlDomNode*)
4: fix -> errors reported by phpstan (level 7)
[PHP Simple HTML Dom v4.4.x]
1: add "findMulti()" method for "SimpleDomParser"
2: fix -> phpdoc improvements via phpstan
[PHP Simple HTML Dom v4.3.x]
1: add "isRemoved()" method for "SimpleHtmlDom"
2: fix -> do not remove newlines from the output
3: fix -> keep HTML closing tags in <script> tags
[PHP Simple HTML Dom v4.2.x]
1: add "val()" method for form elements
2: add simple access to DOMElement via "SimpleHtmlDom"
3: fix -> for special script tags with type="text/html"
[PHP Simple HTML Dom v4.1.x]
1: "HtmlDomParser" -> fix clone method for "document"
2: add "findOne($selector)" === "find($selector, 0)"
3: update "symfony/css-selector" (optional)
4: use LIBXML options for every html-loading task
5: fix -> for vuejs (attributes beginning with "@")
6: fix -> plaintext output
7: fix -> document.write issue from DomDocument
8: fix -> remove (auto-added) head element
[PHP Simple HTML Dom v4.0.x]
1: drop support for PHP < 7.0
2: use "strict_types"
3: "Portable UTF-8" is now optional
[PHP Simple HTML Dom v3.1.x]
1: optimize performance (use the "UTF8"-Class only if needed)
2: fix html-handling of "meta"-tags [tags in the <head>-tag]
[PHP Simple HTML Dom v3.0.x]
1: use output from "SimpleHtmlDomNode" as array instead of string
[PHP Simple HTML Dom v2.0.x]
1: Complete Re-Write (based on https://github.com/dimabdc/PHP-Fast-Simple-HTML-DOM-Parser)
2: bug-fixing / performance improvements
[PHP Simple HTML Dom v1.7.x]
1: removed old parameter: maxLen / lowercase / stripRN / defaultBRText / defaultSpanText
2: add good default settings
3: removed charset-parsing (use UTF-8)
[PHP Simple HTML Dom v1.6.x]
1: fixed code-style
2: removed debugging
3: use Composer and PSR-0
4: added UTF-8 Support (need some testing)
[PHP Simple HTML Dom version 1.5 released.]
1: Memory leak fixed!
2: Added support for detecting the source html character set. This is used to convert characters when plaintext is requested.
3: Other little fixes and features, too numerous to categorize.
4: add ability to search the "noise" array
[PHP Simple HTML DOM Parser v1.11 is released]
1. Supports xpath generated from Firebug.
2. New method "dump" of "simple_html_dom_node".
3. New attribute "xmltext" of "simple_html_dom_node".
4. remove preg_quote on selector match function: [attribute*=value];
5. Element "Comment" will treat as children.
6. Fixed the problem with <pre>.
7. Fixed bug #2207477 (does not load some pages properly).
8. Fixed bug #2315853 (Error with character after < sign).
[PHP Simple HTML DOM Parser v1.10 is released]
1. Negative indexes supports of "find" method, thanks for Vadim Voituk.
2. Constructor with automatically load contents either text or file/url, thanks for Antcs.
3. Fully supports wildcard in selectors.
4. Fixed bug of confusing by the < symbol inside the text.
5. Fixed bug of dash in selectors.
6. Fixed bug of <nobr>.
7. Fixed bug #2155883 (Nested List Parses Incorrectly).
8. Fixed bug #2155113 (error with unclosed html tags).
[PHP Simple HTML DOM Parser v1.00 is released]
1. New method "getAllAttributes" of "simple_html_dom_node".
2. Fix the bug of selector in some critical conditions.
3. Fix the bug of striping php tags.
4. Fix the bug of remove_noise().
5. Fix the bug of noise in attributes.
6. Supports full javascript string in selector: $e->find("a[onclick=alert('hello')]").
7. Change selector "*=" to case-insentive.
[PHP Simple HTML DOM Parser v0.99 is released]
1. Performance turning (boost 10%).
2. Memory requirement reduce 25%.
3. Change function name from "file_get_dom()" to "file_get_html()".
4. Change function name from "str_get_dom()" to "str_get_html()".
5. Fixed bug #2011286 (Error with unclosed html tags).
6. Fixed bug #2012551 (Error parsing divs).
7. Fixed bug #2020924 (Error for missed tag.).
8. Fixed bug (problem with <body> tag's innertext).
[PHP Simple HTML DOM Parser v0.98 is released]
1. Performance turning (boost 20%).
2. Supports "multiple class" selector feature: <div class="a b c"></div>.
3. New "callback function" feature.
4. New "multiple selectors" feature: $dom->find('p,a,b');
5. New examples.
6. Supports extract contents from HTML features: $dom->plaintext;
7. Fix the bug of $dom->clear().
8. Fix the bug of text nodes' innertext.
9. Fix the bug of comment nodes' innertext.
10. Fix the bug of decendent selector with optional tags.
11. Change simple_html_dom_node method name from "text()" to "makeup()".
[PHP Simple HTML DOM Parser v0.97 is released]
1. Important!! file and class name changed (html_dom_parser->simple_html_dom)!
2. Important!! ($dom->save_file) will not support anymore.
3. New node type "comment" (eg. $dom->find('comment')).
4. Add self-closing tags: 'base', 'spacer'.
5. Fix the bug of outertext (th).
6. Fix the bug of regular expression escaping chars ($dom->find).
7. Fix the bug while line-breaker and "\t" in tags.
8. Remove example "example_customize_parser.php".
9. New example "simple_html_dom_utility.php".
[PHP Simple HTML DOM Parser v0.96 is released]
1. (Request #1936000) New DOM operations(first_child, last_child, next_sibling, previous_sibling).
2. New method to remove attribute.
3. Add the solution while server behind proxy in FAQ (Thanks to Yousuke Shaggy).
4. Add traverse section in manual.
5. Now file_get_dom supports full file_get_contents parameters.
6. Fix the bug of self-closing tags in the end of file.
7. Fix the bug of blanks in the end of tag.
8. Add Reference section in manual.
#. Fix some typo of testcase.
[PHP Simple HTML DOM Parser v0.95 is released]
1. New attribute filters (Thanks to Yousuke Kumakura).
2. Fix the bug of optional-closing tags.
3. Fix the bug of parsing the line break next to the tag's name.
4. Supports tag name with namespace.
#. Refine structure of testcase.
[PHP Simple HTML DOM Parser v0.94 is released]
1. Stop infinity loop while tthe source content is BAD HTML.
2. Fix the bug of adding new attributes to self closing tags.
3. Fix the bug of customize parser without $dom->remove_noise();
4. Add FAQ section in manual.
The MIT License (MIT)
Copyright (c) 2016
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
[![Build Status](https://travis-ci.org/voku/simple_html_dom.svg?branch=master)](https://travis-ci.org/voku/simple_html_dom)
[![Coverage Status](https://coveralls.io/repos/github/voku/simple_html_dom/badge.svg?branch=master)](https://coveralls.io/github/voku/simple_html_dom?branch=master)
[![Codacy Badge](https://api.codacy.com/project/badge/Grade/3290fdc35c8f49ad9abdf053582466eb)](https://www.codacy.com/app/voku/simple_html_dom?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=voku/simple_html_dom&amp;utm_campaign=Badge_Grade)
[![Latest Stable Version](https://poser.pugx.org/voku/simple_html_dom/v/stable)](https://packagist.org/packages/voku/simple_html_dom)
[![Total Downloads](https://poser.pugx.org/voku/simple_html_dom/downloads)](https://packagist.org/packages/voku/simple_html_dom)
[![License](https://poser.pugx.org/voku/simple_html_dom/license)](https://packagist.org/packages/voku/simple_html_dom)
[![Donate to this project using Paypal](https://img.shields.io/badge/paypal-donate-yellow.svg)](https://www.paypal.me/moelleken)
[![Donate to this project using Patreon](https://img.shields.io/badge/patreon-donate-yellow.svg)](https://www.patreon.com/voku)
# :scroll: Simple Html Dom Parser for PHP
A HTML DOM parser written in PHP - let you manipulate HTML in a very easy way!
This is a fork of [PHP Simple HTML DOM Parser project](http://simplehtmldom.sourceforge.net/) but instead of string manipulation we use DOMDocument and modern php classes like "Symfony CssSelector".
- PHP 7.0+ Support
- PHP-FIG Standard
- Composer & PSR-4 support
- PHPUnit testing via Travis CI
- PHP-Quality testing via SensioLabsInsight
- UTF-8 Support (more support via "voku/portable-utf8")
- Invalid HTML Support (partly ...)
- Find tags on an HTML page with selectors just like jQuery
- Extract contents from HTML in a single line
### Install via "composer require"
```shell
composer require voku/simple_html_dom
composer require voku/portable-utf8 # if you need e.g. UTF-8 fixed output
```
### Quick Start
```php
use voku\helper\HtmlDomParser;
require_once 'composer/autoload.php';
...
$dom = HtmlDomParser::str_get_html($str);
// or
$dom = HtmlDomParser::file_get_html($file);
$element = $dom->findOne('#css-selector'); // "$element" === instance of "SimpleHtmlDom"
$elements = $dom->findMulti('.css-selector'); // "$elements" === instance of SimpleHtmlDomNodeInterface<int, SimpleHtmlDom>
...
```
### Examples
[github.com/voku/simple_html_dom/tree/master/example](https://github.com/voku/simple_html_dom/tree/master/example)
### Support
For support and donations please visit [Github](https://github.com/voku/simple_html_dom/) | [Issues](https://github.com/voku/simple_html_dom/issues) | [PayPal](https://paypal.me/moelleken) | [Patreon](https://www.patreon.com/voku).
For status updates and release announcements please visit [Releases](https://github.com/voku/simple_html_dom/releases) | [Twitter](https://twitter.com/suckup_de) | [Patreon](https://www.patreon.com/voku/posts).
For professional support please contact [me](https://about.me/voku).
### Thanks
- Thanks to [GitHub](https://github.com) (Microsoft) for hosting the code and a good infrastructure including Issues-Managment, etc.
- Thanks to [IntelliJ](https://www.jetbrains.com) as they make the best IDEs for PHP and they gave me an open source license for PhpStorm!
- Thanks to [Travis CI](https://travis-ci.com/) for being the most awesome, easiest continous integration tool out there!
- Thanks to [StyleCI](https://styleci.io/) for the simple but powerfull code style check.
- Thanks to [PHPStan](https://github.com/phpstan/phpstan) && [Psalm](https://github.com/vimeo/psalm) for relly great Static analysis tools and for discover bugs in the code!
### License
[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fvoku%2Fsimple_html_dom.svg?type=large)](https://app.fossa.io/projects/git%2Bgithub.com%2Fvoku%2Fsimple_html_dom?ref=badge_large)
{
"name": "voku/simple_html_dom",
"description": "Simple HTML DOM package.",
"keywords": [
"dom",
"php dom",
"HTML parser"
],
"homepage": "http://simplehtmldom.sourceforge.net/",
"license": "MIT",
"type": "library",
"authors": [
{
"name": "dimabdc",
"email": "support@titor.ru",
"homepage": "http://github.com/dimabdc",
"role": "Developer"
},
{
"name": "Lars Moelleken",
"homepage": "http://www.moelleken.org/",
"role": "Developer"
}
],
"require": {
"php": ">=7.0.0",
"symfony/css-selector": "~3.0|~4.0",
"ext-dom": "*",
"ext-libxml": "*",
"ext-simplexml": "*"
},
"suggest": {
"voku/portable-utf8": "~4.0"
},
"require-dev": {
"phpunit/phpunit": "~6.0 || ~7.0"
},
"autoload": {
"psr-4": {
"voku\\helper\\": "src/voku/helper/"
}
}
}
parameters:
reportUnmatchedIgnoredErrors: false
excludes_analyse:
- %rootDir%/vendor/*
- %rootDir%/tests/*
autoload_files:
- %rootDir%/vendor/autoload.php
ignoreErrors:
- '#Parameter \#1 \$var of function count expects array\|Countable, DOMNodeList given#'
- '#on an unknown class voku\\helper\\UTF8#'
- '#function of function call_user_func_array expects callable#'
- '#Method voku\\helper\\HtmlDomParser::findOne\(\) should return#'
- '#Method voku\\helper\\HtmlDomParser::findMulti\(\) should return#'
- '#Method voku\\helper\\SimpleHtmlDom::findOne\(\) should return#'
- '#Method voku\\helper\\SimpleHtmlDom::findMulti\(\) should return#'
- '#Method voku\\helper\\SimpleHtmlDomNode::findOne\(\) should return#'
- '#Method voku\\helper\\SimpleHtmlDomNode::findMulti\(\) should return#'
- '#@return with type array<voku\\helper\\SimpleHtmlDomInterface>\|voku\\helper\\SimpleHtmlDomNodeInterface#'
\ No newline at end of file
<?php
declare(strict_types=1);
namespace voku\helper;
abstract class AbstractSimpleHtmlDom
{
/**
* @var \DOMElement|\DOMNode|null
*/
protected $node;
/**
* @var array
*/
protected static $functionAliases = [
'children' => 'childNodes',
'first_child' => 'firstChild',
'last_child' => 'lastChild',
'next_sibling' => 'nextSibling',
'prev_sibling' => 'previousSibling',
'parent' => 'parentNode',
'outertext' => 'html',
'outerhtml' => 'html',
'innertext' => 'innerHtml',
'innerhtml' => 'innerHtml',
];
/**
* @param string $name
* @param array $arguments
*
* @throws \BadMethodCallException
*
* @return SimpleHtmlDomInterface|string|null
*/
public function __call($name, $arguments)
{
$name = \strtolower($name);
if (isset(self::$functionAliases[$name])) {
return \call_user_func_array([$this, self::$functionAliases[$name]], $arguments);
}
throw new \BadMethodCallException('Method does not exist');
}
/**
* @param string $name
*
* @return array|string|null
*/
public function __get($name)
{
$nameOrig = $name;
$name = \strtolower($name);
switch ($name) {
case 'outerhtml':
case 'outertext':
case 'html':
return $this->html();
case 'innerhtml':
case 'innertext':
return $this->innerHtml();
case 'text':
case 'plaintext':
return $this->text();
case 'tag':
return $this->node ? $this->node->nodeName : '';
case 'attr':
return $this->getAllAttributes();
default:
if ($this->node && \property_exists($this->node, $nameOrig)) {
return $this->node->{$nameOrig};
}
return $this->getAttribute($name);
}
}
/**
* @param string $selector
* @param int $idx
*
* @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function __invoke($selector, $idx = null)
{
return $this->find($selector, $idx);
}
/**
* @param string $name
*
* @return bool
*/
public function __isset($name)
{
$nameOrig = $name;
$name = \strtolower($name);
switch ($name) {
case 'outertext':
case 'outerhtml':
case 'innertext':
case 'innerhtml':
case 'plaintext':
case 'text':
case 'tag':
return true;
default:
if ($this->node && \property_exists($this->node, $nameOrig)) {
return isset($this->node->{$nameOrig});
}
return $this->hasAttribute($name);
}
}
/**
* @param string $name
* @param mixed $value
*
* @return SimpleHtmlDomInterface|null
*/
public function __set($name, $value)
{
$nameOrig = $name;
$name = \strtolower($name);
switch ($name) {
case 'outerhtml':
case 'outertext':
return $this->replaceNodeWithString($value);
case 'innertext':
case 'innerhtml':
return $this->replaceChildWithString($value);
case 'plaintext':
return $this->replaceTextWithString($value);
default:
if ($this->node && \property_exists($this->node, $nameOrig)) {
return $this->node->{$nameOrig} = $value;
}
return $this->setAttribute($name, $value);
}
}
/**
* @return string
*/
public function __toString()
{
return $this->html();
}
/**
* @param string $name
*
* @return void
*/
public function __unset($name)
{
/** @noinspection UnusedFunctionResultInspection */
$this->removeAttribute($name);
}
abstract public function setAttribute(string $name, $value = null, bool $strict = false): SimpleHtmlDomInterface;
abstract protected function replaceNodeWithString(string $string): SimpleHtmlDomInterface;
abstract protected function replaceChildWithString(string $string): SimpleHtmlDomInterface;
abstract protected function replaceTextWithString($string): SimpleHtmlDomInterface;
abstract public function hasAttribute(string $name): bool;
abstract public function find(string $selector, $idx = null);
abstract public function getAttribute(string $name): string;
abstract public function getAllAttributes();
abstract public function text(): string;
abstract public function innerHtml(bool $multiDecodeNewHtmlEntity = false): string;
abstract public function html(bool $multiDecodeNewHtmlEntity = false): string;
abstract public function removeAttribute(string $name): SimpleHtmlDomInterface;
}
<?php
declare(strict_types=1);
namespace voku\helper;
abstract class AbstractSimpleHtmlDomNode extends \ArrayObject
{
/** @noinspection MagicMethodsValidityInspection */
/**
* @param string $name
*
* @return array|null
*/
public function __get($name)
{
// init
$name = \strtolower($name);
if ($this->count() > 0) {
$return = [];
foreach ($this as $node) {
if ($node instanceof SimpleHtmlDomInterface) {
$return[] = $node->{$name};
}
}
return $return;
}
if ($name === 'plaintext' || $name === 'outertext') {
return [];
}
return null;
}
/**
* @param string $selector
* @param int|null $idx
*
* @return SimpleHtmlDomNodeInterface|SimpleHtmlDomNodeInterface[]|null
*/
public function __invoke($selector, $idx = null)
{
return $this->find($selector, $idx);
}
/**
* @return string
*/
public function __toString()
{
// init
$html = '';
foreach ($this as $node) {
$html .= $node->outertext;
}
return $html;
}
abstract public function find(string $selector, $idx = null);
}
<?php
declare(strict_types=1);
namespace voku\helper;
/**
* @property-read string $outerText
* <p>Get dom node's outer html (alias for "outerHtml").</p>
* @property-read string $outerHtml
* <p>Get dom node's outer html.</p>
* @property-read string $innerText
* <p>Get dom node's inner html (alias for "innerHtml").</p>
* @property-read string $innerHtml
* <p>Get dom node's inner html.</p>
* @property-read string $plaintext
* <p>Get dom node's plain text.</p>
*
* @method string outerText()
* <p>Get dom node's outer html (alias for "outerHtml()").</p>
* @method string outerHtml()
* <p>Get dom node's outer html.</p>
* @method string innerText()
* <p>Get dom node's inner html (alias for "innerHtml()").</p>
* @method HtmlDomParser load(string $html)
* <p>Load HTML from string.</p>
* @method HtmlDomParser load_file(string $html)
* <p>Load HTML from file.</p>
* @method static HtmlDomParser file_get_html($html, $libXMLExtraOptions = null)
* <p>Load HTML from file.</p>
* @method static HtmlDomParser str_get_html($html, $libXMLExtraOptions = null)
* <p>Load HTML from string.</p>
*/
class HtmlDomParser
{
/**
* @var array
*/
protected static $functionAliases = [
'outertext' => 'html',
'outerhtml' => 'html',
'innertext' => 'innerHtml',
'innerhtml' => 'innerHtml',
'load' => 'loadHtml',
'load_file' => 'loadHtmlFile',
];
/**
* @var string[][]
*/
protected static $domLinkReplaceHelper = [
'orig' => ['[', ']', '{', '}'],
'tmp' => [
'____SIMPLE_HTML_DOM__VOKU__SQUARE_BRACKET_LEFT____',
'____SIMPLE_HTML_DOM__VOKU__SQUARE_BRACKET_RIGHT____',
'____SIMPLE_HTML_DOM__VOKU__BRACKET_LEFT____',
'____SIMPLE_HTML_DOM__VOKU__BRACKET_RIGHT____',
],
];
/**
* @var array
*/
protected static $domReplaceHelper = [
'orig' => ['&', '|', '+', '%', '@'],
'tmp' => [
'____SIMPLE_HTML_DOM__VOKU__AMP____',
'____SIMPLE_HTML_DOM__VOKU__PIPE____',
'____SIMPLE_HTML_DOM__VOKU__PLUS____',
'____SIMPLE_HTML_DOM__VOKU__PERCENT____',
'____SIMPLE_HTML_DOM__VOKU__AT____',
],
];
protected static $domHtmlWrapperHelper = '____simple_html_dom__voku__html_wrapper____';
protected static $domHtmlSpecialScriptHelper = '____simple_html_dom__voku__html_special_sctipt____';
/**
* @var array
*/
protected static $domBrokenReplaceHelper = [];
/**
* @var callable
*/
protected static $callback;
/**
* @var \DOMDocument
*/
protected $document;
/**
* @var string
*/
protected $encoding = 'UTF-8';
/**
* @var bool
*/
protected $isDOMDocumentCreatedWithoutHtml = false;
/**
* @var bool
*/
protected $isDOMDocumentCreatedWithoutWrapper = false;
/**
* @var bool
*/
protected $isDOMDocumentCreatedWithoutHeadWrapper = false;
/**
* @var bool
*/
protected $isDOMDocumentCreatedWithoutHtmlWrapper = false;
/**
* @var bool
*/
protected $isDOMDocumentCreatedWithFakeEndScript = false;
/**
* @var bool
*/
protected $keepBrokenHtml;
/**
* Constructor
*
* @param \DOMNode|SimpleHtmlDomInterface|string $element HTML code or SimpleHtmlDomInterface, \DOMNode
*
* @throws \InvalidArgumentException
*/
public function __construct($element = null)
{
$this->document = new \DOMDocument('1.0', $this->getEncoding());
// reset
self::$domBrokenReplaceHelper = [];
// DOMDocument settings
$this->document->preserveWhiteSpace = true;
$this->document->formatOutput = true;
if ($element instanceof SimpleHtmlDomInterface) {
$element = $element->getNode();
}
if ($element instanceof \DOMNode) {
$domNode = $this->document->importNode($element, true);
if ($domNode instanceof \DOMNode) {
/** @noinspection UnusedFunctionResultInspection */
$this->document->appendChild($domNode);
}
return;
}
if ($element !== null) {
/** @noinspection UnusedFunctionResultInspection */
$this->loadHtml($element);
}
}
/**
* @param string $name
* @param array $arguments
*
* @return bool|mixed
*/
public function __call($name, $arguments)
{
$name = \strtolower($name);
if (isset(self::$functionAliases[$name])) {
return \call_user_func_array([$this, self::$functionAliases[$name]], $arguments);
}
throw new \BadMethodCallException('Method does not exist: ' . $name);
}
/**
* @param string $name
* @param array $arguments
*
* @throws \BadMethodCallException
* @throws \RuntimeException
* @throws \InvalidArgumentException
*
* @return HtmlDomParser
*/
public static function __callStatic($name, $arguments)
{
$arguments0 = $arguments[0] ?? '';
$arguments1 = $arguments[1] ?? null;
if ($name === 'str_get_html') {
$parser = new static();
return $parser->loadHtml($arguments0, $arguments1);
}
if ($name === 'file_get_html') {
$parser = new static();
return $parser->loadHtmlFile($arguments0, $arguments1);
}
throw new \BadMethodCallException('Method does not exist');
}
/** @noinspection MagicMethodsValidityInspection */
/**
* @param string $name
*
* @return string|null
*/
public function __get($name)
{
$name = \strtolower($name);
switch ($name) {
case 'outerhtml':
case 'outertext':
return $this->html();
case 'innerhtml':
case 'innertext':
return $this->innerHtml();
case 'text':
case 'plaintext':
return $this->text();
}
return null;
}
/**
* @param string $selector
* @param int $idx
*
* @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function __invoke($selector, $idx = null)
{
return $this->find($selector, $idx);
}
/**
* @return string
*/
public function __toString()
{
return $this->html();
}
/**
* does nothing (only for api-compatibility-reasons)
*
* @deprecated
*
* @return bool
*/
public function clear(): bool
{
return true;
}
/**
* @param string $html
*
* @return string
*/
public static function replaceToPreserveHtmlEntities(string $html): string
{
// init
$linksNew = [];
$linksOld = [];
if (\strpos($html, 'http') !== false) {
// regEx for e.g.: [https://www.domain.de/foo.php?foobar=1&email=lars%40moelleken.org&guid=test1233312&{{foo}}#foo]
$regExUrl = '/(\[?\bhttps?:\/\/[^\s<>]+(?:\([\w]+\)|[^[:punct:]\s]|\/|\}|\]))/i';
\preg_match_all($regExUrl, $html, $linksOld);
if (!empty($linksOld[1])) {
$linksOld = $linksOld[1];
foreach ((array) $linksOld as $linkKey => $linkOld) {
$linksNew[$linkKey] = \str_replace(
self::$domLinkReplaceHelper['orig'],
self::$domLinkReplaceHelper['tmp'],
$linkOld
);
}
}
}
$linksNewCount = \count($linksNew);
if ($linksNewCount > 0 && \count($linksOld) === $linksNewCount) {
$search = \array_merge($linksOld, self::$domReplaceHelper['orig']);
$replace = \array_merge($linksNew, self::$domReplaceHelper['tmp']);
} else {
$search = self::$domReplaceHelper['orig'];
$replace = self::$domReplaceHelper['tmp'];
}
return \str_replace($search, $replace, $html);
}
/**
* @param string $html
*
* @return string
*/
public static function putReplacedBackToPreserveHtmlEntities(string $html): string
{
static $DOM_REPLACE__HELPER_CACHE = null;
if ($DOM_REPLACE__HELPER_CACHE === null) {
$DOM_REPLACE__HELPER_CACHE['tmp'] = \array_merge(
self::$domLinkReplaceHelper['tmp'],
self::$domReplaceHelper['tmp']
);
$DOM_REPLACE__HELPER_CACHE['orig'] = \array_merge(
self::$domLinkReplaceHelper['orig'],
self::$domReplaceHelper['orig']
);
$DOM_REPLACE__HELPER_CACHE['tmp']['html_wrapper__start'] = '<' . self::$domHtmlWrapperHelper . '>';
$DOM_REPLACE__HELPER_CACHE['tmp']['html_wrapper__end'] = '</' . self::$domHtmlWrapperHelper . '>';
$DOM_REPLACE__HELPER_CACHE['orig']['html_wrapper__start'] = '';
$DOM_REPLACE__HELPER_CACHE['orig']['html_wrapper__end'] = '';
$DOM_REPLACE__HELPER_CACHE['tmp']['html_special_script__start'] = '<' . self::$domHtmlSpecialScriptHelper;
$DOM_REPLACE__HELPER_CACHE['tmp']['html_special_script__end'] = '</' . self::$domHtmlSpecialScriptHelper . '>';
$DOM_REPLACE__HELPER_CACHE['orig']['html_special_script__start'] = '<script';
$DOM_REPLACE__HELPER_CACHE['orig']['html_special_script__end'] = '</script>';
}
if (
isset(self::$domBrokenReplaceHelper['tmp'])
&&
\count(self::$domBrokenReplaceHelper['tmp']) > 0
) {
$html = \str_replace(self::$domBrokenReplaceHelper['tmp'], self::$domBrokenReplaceHelper['orig'], $html);
}
return \str_replace($DOM_REPLACE__HELPER_CACHE['tmp'], $DOM_REPLACE__HELPER_CACHE['orig'], $html);
}
/**
* Create DOMDocument from HTML.
*
* @param string $html
* @param int|null $libXMLExtraOptions
*
* @return \DOMDocument
*/
private function createDOMDocument(string $html, $libXMLExtraOptions = null): \DOMDocument
{
if ($this->keepBrokenHtml) {
$html = $this->keepBrokenHtml(\trim($html));
}
if (\strpos($html, '<') === false) {
$this->isDOMDocumentCreatedWithoutHtml = true;
} elseif (\strpos(\ltrim($html), '<') !== 0) {
$this->isDOMDocumentCreatedWithoutWrapper = true;
}
if (\strpos($html, '<html') === false) {
$this->isDOMDocumentCreatedWithoutHtmlWrapper = true;
}
/** @noinspection HtmlRequiredTitleElement */
if (\strpos($html, '<head>') === false) {
$this->isDOMDocumentCreatedWithoutHeadWrapper = true;
}
if (
\strpos($html, '</script>') === false
&&
\strpos($html, '<\/script>') !== false
) {
$this->isDOMDocumentCreatedWithFakeEndScript = true;
}
if (\strpos($html, '<script') !== false) {
$this->html5FallbackForScriptTags($html);
if (
\strpos($html, 'type="text/html"') !== false
||
\strpos($html, 'type=\'text/html\'') !== false
||
\strpos($html, 'type=text/html') !== false
) {
$this->keepSpecialScriptTags($html);
}
}
// set error level
$internalErrors = \libxml_use_internal_errors(true);
$disableEntityLoader = \libxml_disable_entity_loader(true);
\libxml_clear_errors();
$optionsXml = \LIBXML_DTDLOAD | \LIBXML_DTDATTR | \LIBXML_NONET;
if (\defined('LIBXML_BIGLINES')) {
$optionsXml |= \LIBXML_BIGLINES;
}
if (\defined('LIBXML_COMPACT')) {
$optionsXml |= \LIBXML_COMPACT;
}
if (\defined('LIBXML_HTML_NODEFDTD')) {
$optionsXml |= \LIBXML_HTML_NODEFDTD;
}
if ($libXMLExtraOptions !== null) {
$optionsXml |= $libXMLExtraOptions;
}
if (
$this->isDOMDocumentCreatedWithoutWrapper
||
$this->keepBrokenHtml
) {
$html = '<' . self::$domHtmlWrapperHelper . '>' . $html . '</' . self::$domHtmlWrapperHelper . '>';
}
$html = self::replaceToPreserveHtmlEntities($html);
$documentFound = false;
$sxe = \simplexml_load_string($html, \SimpleXMLElement::class, $optionsXml);
if ($sxe !== false && \count(\libxml_get_errors()) === 0) {
$domElementTmp = \dom_import_simplexml($sxe);
if ($domElementTmp) {
$documentFound = true;
$this->document = $domElementTmp->ownerDocument;
}
}
if ($documentFound === false) {
// UTF-8 hack: http://php.net/manual/en/domdocument.loadhtml.php#95251
$xmlHackUsed = false;
if (\stripos('<?xml', $html) !== 0) {
$xmlHackUsed = true;
$html = '<?xml encoding="' . $this->getEncoding() . '" ?>' . $html;
}
$this->document->loadHTML($html, $optionsXml);
// remove the "xml-encoding" hack
if ($xmlHackUsed) {
foreach ($this->document->childNodes as $child) {
if ($child->nodeType === \XML_PI_NODE) {
/** @noinspection UnusedFunctionResultInspection */
$this->document->removeChild($child);
break;
}
}
}
}
// set encoding
$this->document->encoding = $this->getEncoding();
// restore lib-xml settings
\libxml_clear_errors();
\libxml_use_internal_errors($internalErrors);
\libxml_disable_entity_loader($disableEntityLoader);
return $this->document;
}
/**
* workaround for bug: https://bugs.php.net/bug.php?id=74628
*
* @param string $html
*/
protected function html5FallbackForScriptTags(string &$html)
{
// regEx for e.g.: [<script id="elements-image-2">...<script>]
/** @noinspection HtmlDeprecatedTag */
$regExSpecialScript = '/<(script)(?<attr>[^>]*)>(?<content>.*)<\/\1>/isU';
$html = \preg_replace_callback($regExSpecialScript, static function ($scripts) {
return '<script' . $scripts['attr'] . '>' . \str_replace('</', '<\/', $scripts['content']) . '</script>';
}, $html);
}
/**
* @param string $html
*/
protected function keepSpecialScriptTags(string &$html)
{
$specialScripts = [];
// regEx for e.g.: [<script id="elements-image-1" type="text/html">...</script>]
$regExSpecialScript = '/<(script) [^>]*type=(["\']){0,1}text\/html\2{0,1}([^>]*)>.*<\/\1>/isU';
\preg_match_all($regExSpecialScript, $html, $specialScripts);
if (isset($specialScripts[0])) {
foreach ($specialScripts[0] as $specialScript) {
$specialNonScript = '<' . self::$domHtmlSpecialScriptHelper . \substr($specialScript, \strlen('<script'));
$specialNonScript = \substr($specialNonScript, 0, -\strlen('</script>')) . '</' . self::$domHtmlSpecialScriptHelper . '>';
// remove the html5 fallback
$specialNonScript = \str_replace('<\/', '</', $specialNonScript);
$html = \str_replace($specialScript, $specialNonScript, $html);
}
}
}
/**
* @param string $html
*
* @return string
*/
protected function keepBrokenHtml(string $html): string
{
do {
$original = $html;
$html = (string) \preg_replace_callback(
'/(?<start>.*)<(?<element_start>[a-z]+)(?<element_start_addon> [^>]*)?>(?<value>.*?)<\/(?<element_end>\2)>(?<end>.*)/sui',
static function ($matches) {
return $matches['start'] .
'°lt_simple_html_dom__voku_°' . $matches['element_start'] . $matches['element_start_addon'] . '°gt_simple_html_dom__voku_°' .
$matches['value'] .
'°lt/_simple_html_dom__voku_°' . $matches['element_end'] . '°gt_simple_html_dom__voku_°' .
$matches['end'];
},
$html
);
} while ($original !== $html);
do {
$original = $html;
$html = (string) \preg_replace_callback(
'/(?<start>[^<]*)?(?<broken>(?:(?:<\/\w+(?:\s+\w+=\\"[^\"]+\\")*+)(?:[^<]+)>)+)(?<end>.*)/u',
static function ($matches) {
$matches['broken'] = \str_replace(
['°lt/_simple_html_dom__voku_°', '°lt_simple_html_dom__voku_°', '°gt_simple_html_dom__voku_°'],
['</', '<', '>'],
$matches['broken']
);
self::$domBrokenReplaceHelper['orig'][] = $matches['broken'];
self::$domBrokenReplaceHelper['tmp'][] = $matchesHash = '____simple_html_dom__voku__broken_html____' . \crc32($matches['broken']);
return $matches['start'] . $matchesHash . $matches['end'];
},
$html
);
} while ($original !== $html);
return \str_replace(
['°lt/_simple_html_dom__voku_°', '°lt_simple_html_dom__voku_°', '°gt_simple_html_dom__voku_°'],
['</', '<', '>'],
$html
);
}
/**
* Return element by #id.
*
* @param string $id
*
* @return SimpleHtmlDomInterface
*/
public function getElementById(string $id): SimpleHtmlDomInterface
{
return $this->findOne("#${id}");
}
/**
* Returns elements by #id.
*
* @param string $id
* @param int|null $idx
*
* @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function getElementsById(string $id, $idx = null)
{
return $this->find("#${id}", $idx);
}
/**
* Return elements by .class.
*
* @param string $class
*
* @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function getElementByClass(string $class): SimpleHtmlDomNodeInterface
{
return $this->findMulti(".${class}");
}
/**
* Return element by tag name.
*
* @param string $name
*
* @return SimpleHtmlDomInterface
*/
public function getElementByTagName(string $name): SimpleHtmlDomInterface
{
$node = $this->document->getElementsByTagName($name)->item(0);
if ($node === null) {
return new SimpleHtmlDomBlank();
}
return new SimpleHtmlDom($node);
}
/**
* Returns elements by tag name.
*
* @param string $name
* @param int|null $idx
*
* @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function getElementsByTagName(string $name, $idx = null)
{
$nodesList = $this->document->getElementsByTagName($name);
$elements = new SimpleHtmlDomNode();
foreach ($nodesList as $node) {
$elements[] = new SimpleHtmlDom($node);
}
// return all elements
if ($idx === null) {
if (\count($elements) === 0) {
return new SimpleHtmlDomNodeBlank();
}
return $elements;
}
// handle negative values
if ($idx < 0) {
$idx = \count($elements) + $idx;
}
// return one element
return $elements[$idx] ?? new SimpleHtmlDomNodeBlank();
}
/**
* Find one node with a CSS selector.
*
* @param string $selector
*
* @return SimpleHtmlDomInterface
*/
public function findOne(string $selector): SimpleHtmlDomInterface
{
return $this->find($selector, 0);
}
/**
* Find nodes with a CSS selector.
*
* @param string $selector
*
* @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function findMulti(string $selector): SimpleHtmlDomNodeInterface
{
return $this->find($selector, null);
}
/**
* Find list of nodes with a CSS selector.
*
* @param string $selector
* @param int|null $idx
*
* @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function find(string $selector, $idx = null)
{
$xPathQuery = SelectorConverter::toXPath($selector);
$xPath = new \DOMXPath($this->document);
$nodesList = $xPath->query($xPathQuery);
$elements = new SimpleHtmlDomNode();
foreach ($nodesList as $node) {
$elements[] = new SimpleHtmlDom($node);
}
// return all elements
if ($idx === null) {
if (\count($elements) === 0) {
return new SimpleHtmlDomNodeBlank();
}
return $elements;
}
// handle negative values
if ($idx < 0) {
$idx = \count($elements) + $idx;
}
// return one element
return $elements[$idx] ?? new SimpleHtmlDomBlank();
}
/**
* @param string $content
* @param bool $multiDecodeNewHtmlEntity
*
* @return string
*/
public function fixHtmlOutput(string $content, bool $multiDecodeNewHtmlEntity = false): string
{
// INFO: DOMDocument will encapsulate plaintext into a e.g. paragraph tag (<p>),
// so we try to remove it here again ...
if ($this->isDOMDocumentCreatedWithoutHtmlWrapper) {
/** @noinspection HtmlRequiredLangAttribute */
$content = \str_replace(
[
'<body>',
'</body>',
'<html>',
'</html>',
],
'',
$content
);
}
if ($this->isDOMDocumentCreatedWithoutHeadWrapper) {
/** @noinspection HtmlRequiredTitleElement */
$content = \str_replace(
[
'<head>',
'</head>',
],
'',
$content
);
}
if ($this->isDOMDocumentCreatedWithFakeEndScript) {
$content = \str_replace(
'</script>',
'',
$content
);
}
if ($this->isDOMDocumentCreatedWithoutWrapper) {
$content = (string) \preg_replace('/^<p>/', '', $content);
$content = (string) \preg_replace('/<\/p>/', '', $content);
}
if ($this->isDOMDocumentCreatedWithoutHtml) {
$content = \str_replace(
[
'<p>',
'</p>',
'<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">',
],
'',
$content
);
}
/** @noinspection CheckTagEmptyBody */
/** @noinspection HtmlExtraClosingTag */
/** @noinspection HtmlRequiredTitleElement */
$content = \trim(
\str_replace(
[
'<simpleHtmlDomP>',
'</simpleHtmlDomP>',
'<head><head>',
'</head></head>',
'<br></br>',
],
[
'',
'',
'<head>',
'</head>',
'<br>',
],
$content
)
);
if ($multiDecodeNewHtmlEntity) {
if (\class_exists('\voku\helper\UTF8')) {
/** @noinspection PhpUndefinedClassInspection */
$content = UTF8::rawurldecode($content);
} else {
do {
$content_compare = $content;
$content = \rawurldecode(
\html_entity_decode(
$content,
\ENT_QUOTES | \ENT_HTML5
)
);
} while ($content_compare !== $content);
}
} else {
$content = \rawurldecode(
\html_entity_decode(
$content,
\ENT_QUOTES | \ENT_HTML5
)
);
}
return self::putReplacedBackToPreserveHtmlEntities($content);
}
/**
* @return \DOMDocument
*/
public function getDocument(): \DOMDocument
{
return $this->document;
}
/**
* Get the encoding to use.
*
* @return string
*/
private function getEncoding(): string
{
return $this->encoding;
}
/**
* @return bool
*/
public function getIsDOMDocumentCreatedWithoutHtml(): bool
{
return $this->isDOMDocumentCreatedWithoutHtml;
}
/**
* @return bool
*/
public function getIsDOMDocumentCreatedWithoutHtmlWrapper(): bool
{
return $this->isDOMDocumentCreatedWithoutHtmlWrapper;
}
/**
* @return bool
*/
public function getIsDOMDocumentCreatedWithoutHeadWrapper(): bool
{
return $this->isDOMDocumentCreatedWithoutHeadWrapper;
}
/**
* @return bool
*/
public function getIsDOMDocumentCreatedWithoutWrapper(): bool
{
return $this->isDOMDocumentCreatedWithoutWrapper;
}
/**
* Get dom node's outer html.
*
* @param bool $multiDecodeNewHtmlEntity
*
* @return string
*/
public function html(bool $multiDecodeNewHtmlEntity = false): string
{
if ($this::$callback !== null) {
\call_user_func($this::$callback, [$this]);
}
if ($this->getIsDOMDocumentCreatedWithoutHtmlWrapper()) {
$content = $this->document->saveHTML($this->document->documentElement);
} else {
$content = $this->document->saveHTML();
}
if ($content === false) {
return '';
}
return $this->fixHtmlOutput($content, $multiDecodeNewHtmlEntity);
}
/**
* @param bool $keepBrokenHtml
*
* @return HtmlDomParser
*/
public function useKeepBrokenHtml(bool $keepBrokenHtml): self
{
$this->keepBrokenHtml = $keepBrokenHtml;
return $this;
}
/**
* Get the HTML as XML.
*
* @param bool $multiDecodeNewHtmlEntity
*
* @return string
*/
public function xml(bool $multiDecodeNewHtmlEntity = false): string
{
$xml = $this->document->saveXML(null, \LIBXML_NOEMPTYTAG);
// remove the XML-header
$xml = \ltrim((string) \preg_replace('/<\?xml.*\?>/', '', $xml));
return $this->fixHtmlOutput($xml, $multiDecodeNewHtmlEntity);
}
/**
* Get dom node's inner html.
*
* @param bool $multiDecodeNewHtmlEntity
*
* @return string
*/
public function innerHtml(bool $multiDecodeNewHtmlEntity = false): string
{
// init
$text = '';
foreach ($this->document->documentElement->childNodes as $node) {
$text .= $this->document->saveHTML($node);
}
return $this->fixHtmlOutput($text, $multiDecodeNewHtmlEntity);
}
/**
* Load HTML from string.
*
* @param string $html
* @param int|null $libXMLExtraOptions
*
* @throws \InvalidArgumentException if argument is not string
*
* @return HtmlDomParser
*/
public function loadHtml(string $html, $libXMLExtraOptions = null): self
{
$this->document = $this->createDOMDocument($html, $libXMLExtraOptions);
return $this;
}
/**
* Load HTML from file.
*
* @param string $filePath
* @param int|null $libXMLExtraOptions
*
* @throws \RuntimeException
* @throws \InvalidArgumentException
*
* @return HtmlDomParser
*/
public function loadHtmlFile(string $filePath, $libXMLExtraOptions = null): self
{
if (
!\preg_match("/^https?:\/\//i", $filePath)
&&
!\file_exists($filePath)
) {
throw new \RuntimeException("File ${filePath} not found");
}
try {
if (\class_exists('\voku\helper\UTF8')) {
/** @noinspection PhpUndefinedClassInspection */
$html = UTF8::file_get_contents($filePath);
} else {
$html = \file_get_contents($filePath);
}
} catch (\Exception $e) {
throw new \RuntimeException("Could not load file ${filePath}");
}
if ($html === false) {
throw new \RuntimeException("Could not load file ${filePath}");
}
return $this->loadHtml($html, $libXMLExtraOptions);
}
/**
* Save the html-dom as string.
*
* @param string $filepath
*
* @return string
*/
public function save(string $filepath = ''): string
{
$string = $this->innerHtml();
if ($filepath !== '') {
\file_put_contents($filepath, $string, \LOCK_EX);
}
return $string;
}
/**
* @param callable $functionName
*/
public function set_callback($functionName)
{
static::$callback = $functionName;
}
/**
* Get dom node's plain text.
*
* @param bool $multiDecodeNewHtmlEntity
*
* @return string
*/
public function text(bool $multiDecodeNewHtmlEntity = false): string
{
return $this->fixHtmlOutput($this->document->textContent, $multiDecodeNewHtmlEntity);
}
public function __clone()
{
$this->document = clone $this->document;
}
}
<?php
declare(strict_types=1);
namespace voku\helper;
use Symfony\Component\CssSelector\CssSelectorConverter;
class SelectorConverter
{
/**
* @var array
*/
protected static $compiled = [];
/**
* @param string $selector
*
* @throws \RuntimeException
*
* @return mixed|string
*/
public static function toXPath(string $selector)
{
if (isset(self::$compiled[$selector])) {
return self::$compiled[$selector];
}
// Select DOMText
if ($selector === 'text') {
return '//text()';
}
// Select DOMComment
if ($selector === 'comment') {
return '//comment()';
}
if (\strpos($selector, '//') === 0) {
return $selector;
}
if (!\class_exists(CssSelectorConverter::class)) {
throw new \RuntimeException('Unable to filter with a CSS selector as the Symfony CssSelector 2.8+ is not installed (you can use filterXPath instead).');
}
$converter = new CssSelectorConverter(true);
$xPathQuery = $converter->toXPath($selector);
self::$compiled[$selector] = $xPathQuery;
return $xPathQuery;
}
}
<?php
declare(strict_types=1);
namespace voku\helper;
/** @noinspection PhpHierarchyChecksInspection */
class SimpleHtmlDom extends AbstractSimpleHtmlDom implements \IteratorAggregate, SimpleHtmlDomInterface
{
/**
* @var \DOMElement|\DOMNode
*/
protected $node;
/**
* @param \DOMElement|\DOMNode $node
*/
public function __construct(\DOMNode $node)
{
$this->node = $node;
}
/**
* @param string $name
* @param array $arguments
*
* @throws \BadMethodCallException
*
* @return SimpleHtmlDomInterface|string|null
*/
public function __call($name, $arguments)
{
$name = \strtolower($name);
if (isset(self::$functionAliases[$name])) {
return \call_user_func_array([$this, self::$functionAliases[$name]], $arguments);
}
throw new \BadMethodCallException('Method does not exist');
}
/**
* Returns children of node.
*
* @param int $idx
*
* @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface|null
*/
public function childNodes(int $idx = -1)
{
$nodeList = $this->getIterator();
if ($idx === -1) {
return $nodeList;
}
return $nodeList[$idx] ?? null;
}
/**
* Find list of nodes with a CSS selector.
*
* @param string $selector
* @param int|null $idx
*
* @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function find(string $selector, $idx = null)
{
return $this->getHtmlDomParser()->find($selector, $idx);
}
/**
* Find one node with a CSS selector.
*
* @param string $selector
*
* @return SimpleHtmlDomInterface
*/
public function findOne(string $selector): SimpleHtmlDomInterface
{
return $this->find($selector, 0);
}
/**
* Find nodes with a CSS selector.
*
* @param string $selector
*
* @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function findMulti(string $selector): SimpleHtmlDomNodeInterface
{
return $this->find($selector, null);
}
/**
* Returns the first child of node.
*
* @return SimpleHtmlDomInterface|null
*/
public function firstChild()
{
/** @var \DOMNode|null $node */
$node = $this->node->firstChild;
if ($node === null) {
return null;
}
return new static($node);
}
/**
* Returns an array of attributes.
*
* @return array|null
*/
public function getAllAttributes()
{
if ($this->node->hasAttributes()) {
$attributes = [];
foreach ($this->node->attributes as $attr) {
$attributes[$attr->name] = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($attr->value);
}
return $attributes;
}
return null;
}
/**
* Return attribute value.
*
* @param string $name
*
* @return string
*/
public function getAttribute(string $name): string
{
if ($this->node instanceof \DOMElement) {
return HtmlDomParser::putReplacedBackToPreserveHtmlEntities(
$this->node->getAttribute($name)
);
}
return '';
}
/**
* Return element by #id.
*
* @param string $id
*
* @return SimpleHtmlDomInterface
*/
public function getElementById(string $id): SimpleHtmlDomInterface
{
return $this->findOne("#${id}");
}
/**
* Returns elements by #id.
*
* @param string $id
* @param int|null $idx
*
* @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function getElementsById(string $id, $idx = null)
{
return $this->find("#${id}", $idx);
}
/**
* Return elements by .class.
*
* @param string $class
*
* @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function getElementByClass(string $class): SimpleHtmlDomNodeInterface
{
return $this->findMulti(".${class}");
}
/**
* Return element by tag name.
*
* @param string $name
*
* @return SimpleHtmlDomInterface
*/
public function getElementByTagName(string $name): SimpleHtmlDomInterface
{
if ($this->node instanceof \DOMElement) {
$node = $this->node->getElementsByTagName($name)->item(0);
} else {
$node = null;
}
if ($node === null) {
return new SimpleHtmlDomBlank();
}
return new static($node);
}
/**
* Returns elements by tag name.
*
* @param string $name
* @param int|null $idx
*
* @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function getElementsByTagName(string $name, $idx = null)
{
if ($this->node instanceof \DOMElement) {
$nodesList = $this->node->getElementsByTagName($name);
} else {
$nodesList = [];
}
$elements = new SimpleHtmlDomNode();
foreach ($nodesList as $node) {
$elements[] = new static($node);
}
// return all elements
if ($idx === null) {
if (\count($elements) === 0) {
return new SimpleHtmlDomNodeBlank();
}
return $elements;
}
// handle negative values
if ($idx < 0) {
$idx = \count($elements) + $idx;
}
// return one element
return $elements[$idx] ?? new SimpleHtmlDomBlank();
}
/**
* Create a new "HtmlDomParser"-object from the current context.
*
* @return HtmlDomParser
*/
public function getHtmlDomParser(): HtmlDomParser
{
return new HtmlDomParser($this);
}
/**
* Retrieve an external iterator.
*
* @see http://php.net/manual/en/iteratoraggregate.getiterator.php
*
* @return SimpleHtmlDomNode
* <p>
* An instance of an object implementing <b>Iterator</b> or
* <b>Traversable</b>
* </p>
*/
public function getIterator(): SimpleHtmlDomNodeInterface
{
$elements = new SimpleHtmlDomNode();
if ($this->node->hasChildNodes()) {
foreach ($this->node->childNodes as $node) {
$elements[] = new static($node);
}
}
return $elements;
}
/**
* @return \DOMNode
*/
public function getNode(): \DOMNode
{
return $this->node;
}
/**
* Determine if an attribute exists on the element.
*
* @param string $name
*
* @return bool
*/
public function hasAttribute(string $name): bool
{
if (!$this->node instanceof \DOMElement) {
return false;
}
return $this->node->hasAttribute($name);
}
/**
* Get dom node's outer html.
*
* @param bool $multiDecodeNewHtmlEntity
*
* @return string
*/
public function html(bool $multiDecodeNewHtmlEntity = false): string
{
return $this->getHtmlDomParser()->html($multiDecodeNewHtmlEntity);
}
/**
* Get dom node's inner html.
*
* @param bool $multiDecodeNewHtmlEntity
*
* @return string
*/
public function innerHtml(bool $multiDecodeNewHtmlEntity = false): string
{
return $this->getHtmlDomParser()->innerHtml($multiDecodeNewHtmlEntity);
}
/**
* Returns the last child of node.
*
* @return SimpleHtmlDomInterface|null
*/
public function lastChild()
{
/** @var \DOMNode|null $node */
$node = $this->node->lastChild;
if ($node === null) {
return null;
}
return new static($node);
}
/**
* Returns the next sibling of node.
*
* @return SimpleHtmlDomInterface|null
*/
public function nextSibling()
{
/** @var \DOMNode|null $node */
$node = $this->node->nextSibling;
if ($node === null) {
return null;
}
return new static($node);
}
/**
* Returns the parent of node.
*
* @return SimpleHtmlDomInterface
*/
public function parentNode(): SimpleHtmlDomInterface
{
return new static($this->node->parentNode);
}
/**
* Nodes can get partially destroyed in which they're still an
* actual DOM node (such as \DOMElement) but almost their entire
* body is gone, including the `nodeType` attribute.
*
* @return bool true if node has been destroyed
*/
public function isRemoved(): bool
{
return !isset($this->node->nodeType);
}
/**
* Returns the previous sibling of node.
*
* @return SimpleHtmlDomInterface|null
*/
public function previousSibling()
{
/** @var \DOMNode|null $node */
$node = $this->node->previousSibling;
if ($node === null) {
return null;
}
return new static($node);
}
/**
* Replace child node.
*
* @param string $string
*
* @return SimpleHtmlDomInterface
*/
protected function replaceChildWithString(string $string): SimpleHtmlDomInterface
{
if (!empty($string)) {
$newDocument = new HtmlDomParser($string);
$tmpDomString = $this->normalizeStringForComparision($newDocument);
$tmpStr = $this->normalizeStringForComparision($string);
if ($tmpDomString !== $tmpStr) {
throw new \RuntimeException(
'Not valid HTML fragment!' . "\n" .
$tmpDomString . "\n" .
$tmpStr
);
}
}
if (\count($this->node->childNodes) > 0) {
foreach ($this->node->childNodes as $node) {
$this->node->removeChild($node);
}
}
if (!empty($newDocument)) {
$newDocument = $this->cleanHtmlWrapper($newDocument);
$ownerDocument = $this->node->ownerDocument;
if ($ownerDocument !== null) {
$newNode = $ownerDocument->importNode($newDocument->getDocument()->documentElement, true);
/** @noinspection UnusedFunctionResultInspection */
$this->node->appendChild($newNode);
}
}
return $this;
}
/**
* Replace this node with text
*
* @param string $string
*
* @return SimpleHtmlDomInterface
*/
protected function replaceTextWithString($string): SimpleHtmlDomInterface
{
if (empty($string)) {
$this->node->parentNode->removeChild($this->node);
return $this;
}
$ownerDocument = $this->node->ownerDocument;
if ($ownerDocument !== null) {
$newElement = $ownerDocument->createTextNode($string);
$newNode = $ownerDocument->importNode($newElement, true);
$this->node->parentNode->replaceChild($newNode, $this->node);
$this->node = $newNode;
}
return $this;
}
/**
* Replace this node.
*
* @param string $string
*
* @return SimpleHtmlDomInterface
*/
protected function replaceNodeWithString(string $string): SimpleHtmlDomInterface
{
if (empty($string)) {
$this->node->parentNode->removeChild($this->node);
return $this;
}
$newDocument = new HtmlDomParser($string);
$tmpDomOuterTextString = $this->normalizeStringForComparision($newDocument);
$tmpStr = $this->normalizeStringForComparision($string);
if ($tmpDomOuterTextString !== $tmpStr) {
throw new \RuntimeException(
'Not valid HTML fragment!' . "\n"
. $tmpDomOuterTextString . "\n" .
$tmpStr
);
}
$newDocument = $this->cleanHtmlWrapper($newDocument, true);
$ownerDocument = $this->node->ownerDocument;
if ($ownerDocument === null) {
return $this;
}
$newNode = $ownerDocument->importNode($newDocument->getDocument()->documentElement, true);
$this->node->parentNode->replaceChild($newNode, $this->node);
$this->node = $newNode;
// Remove head element, preserving child nodes. (again)
if (
$this->node->parentNode instanceof \DOMElement
&&
$newDocument->getIsDOMDocumentCreatedWithoutHeadWrapper()
) {
$html = $this->node->parentNode->getElementsByTagName('head')[0];
if ($this->node->parentNode->ownerDocument !== null) {
$fragment = $this->node->parentNode->ownerDocument->createDocumentFragment();
if ($html !== null) {
/** @var \DOMNode $html */
while ($html->childNodes->length > 0) {
$tmpNode = $html->childNodes->item(0);
if ($tmpNode !== null) {
/** @noinspection UnusedFunctionResultInspection */
$fragment->appendChild($tmpNode);
}
}
/** @noinspection UnusedFunctionResultInspection */
$html->parentNode->replaceChild($fragment, $html);
}
}
}
return $this;
}
/**
* Normalize the given input for comparision.
*
* @param HtmlDomParser|string $input
*
* @return string
*/
private function normalizeStringForComparision($input): string
{
if ($input instanceof HtmlDomParser) {
$string = $input->outerText();
if ($input->getIsDOMDocumentCreatedWithoutHeadWrapper()) {
/** @noinspection HtmlRequiredTitleElement */
$string = \str_replace(['<head>', '</head>'], '', $string);
}
} else {
$string = (string) $input;
}
return
\urlencode(
\urldecode(
\trim(
\str_replace(
[
' ',
"\n",
"\r",
'/>',
],
[
'',
'',
'',
'>',
],
\strtolower($string)
)
)
)
);
}
/**
* @param HtmlDomParser $newDocument
* @param bool $removeExtraHeadTag
*
* @return HtmlDomParser
*/
protected function cleanHtmlWrapper(HtmlDomParser $newDocument, $removeExtraHeadTag = false): HtmlDomParser
{
if (
$newDocument->getIsDOMDocumentCreatedWithoutHtml()
||
$newDocument->getIsDOMDocumentCreatedWithoutHtmlWrapper()
) {
// Remove doc-type node.
if ($newDocument->getDocument()->doctype !== null) {
/** @noinspection UnusedFunctionResultInspection */
$newDocument->getDocument()->doctype->parentNode->removeChild($newDocument->getDocument()->doctype);
}
// Remove html element, preserving child nodes.
$html = $newDocument->getDocument()->getElementsByTagName('html')->item(0);
$fragment = $newDocument->getDocument()->createDocumentFragment();
if ($html !== null) {
while ($html->childNodes->length > 0) {
$tmpNode = $html->childNodes->item(0);
if ($tmpNode !== null) {
/** @noinspection UnusedFunctionResultInspection */
$fragment->appendChild($tmpNode);
}
}
/** @noinspection UnusedFunctionResultInspection */
$html->parentNode->replaceChild($fragment, $html);
}
// Remove body element, preserving child nodes.
$body = $newDocument->getDocument()->getElementsByTagName('body')->item(0);
$fragment = $newDocument->getDocument()->createDocumentFragment();
if ($body instanceof \DOMElement) {
while ($body->childNodes->length > 0) {
$tmpNode = $body->childNodes->item(0);
if ($tmpNode !== null) {
/** @noinspection UnusedFunctionResultInspection */
$fragment->appendChild($tmpNode);
}
}
/** @noinspection UnusedFunctionResultInspection */
$body->parentNode->replaceChild($fragment, $body);
// At this point DOMDocument still added a "<p>"-wrapper around our string,
// so we replace it with "<simpleHtmlDomP>" and delete this at the ending ...
$item = $newDocument->getDocument()->getElementsByTagName('p')->item(0);
if ($item !== null) {
/** @noinspection UnusedFunctionResultInspection */
$this->changeElementName($item, 'simpleHtmlDomP');
}
}
}
// Remove head element, preserving child nodes.
if (
$removeExtraHeadTag
&&
$this->node->parentNode instanceof \DOMElement
&&
$newDocument->getIsDOMDocumentCreatedWithoutHeadWrapper()
) {
$html = $this->node->parentNode->getElementsByTagName('head')[0];
if ($this->node->parentNode->ownerDocument !== null) {
$fragment = $this->node->parentNode->ownerDocument->createDocumentFragment();
if ($html !== null) {
/** @var \DOMNode $html */
while ($html->childNodes->length > 0) {
$tmpNode = $html->childNodes->item(0);
if ($tmpNode !== null) {
/** @noinspection UnusedFunctionResultInspection */
$fragment->appendChild($tmpNode);
}
}
/** @noinspection UnusedFunctionResultInspection */
$html->parentNode->replaceChild($fragment, $html);
}
}
}
return $newDocument;
}
/**
* Change the name of a tag in a "DOMNode".
*
* @param \DOMNode $node
* @param string $name
*
* @return \DOMElement|false
* <p>DOMElement a new instance of class DOMElement or false
* if an error occured.</p>
*/
protected function changeElementName(\DOMNode $node, string $name)
{
$ownerDocument = $node->ownerDocument;
if ($ownerDocument) {
$newNode = $ownerDocument->createElement($name);
} else {
return false;
}
foreach ($node->childNodes as $child) {
$child = $ownerDocument->importNode($child, true);
/** @noinspection UnusedFunctionResultInspection */
$newNode->appendChild($child);
}
foreach ($node->attributes as $attrName => $attrNode) {
/** @noinspection UnusedFunctionResultInspection */
$newNode->setAttribute($attrName, $attrNode);
}
/** @noinspection UnusedFunctionResultInspection */
$newNode->ownerDocument->replaceChild($newNode, $node);
return $newNode;
}
/**
* Set attribute value.
*
* @param string $name <p>The name of the html-attribute.</p>
* @param string|null $value <p>Set to NULL or empty string, to remove the attribute.</p>
* @param bool $strict </p>
* $value must be NULL, to remove the attribute,
* so that you can set an empty string as attribute-value e.g. autofocus=""
* </p>
*
* @return SimpleHtmlDomInterface
*/
public function setAttribute(string $name, $value = null, bool $strict = false): SimpleHtmlDomInterface
{
if (
($strict && $value === null)
||
(!$strict && empty($value))
) {
/** @noinspection UnusedFunctionResultInspection */
$this->removeAttribute($name);
} elseif (\method_exists($this->node, 'setAttribute')) {
/** @noinspection UnusedFunctionResultInspection */
$this->node->setAttribute($name, $value);
}
return $this;
}
/**
* @param string|string[]|null $value <p>
* null === get the current input value
* text === set a new input value
* </p>
*
* @return string|string[]|null
*/
public function val($value = null)
{
if ($value === null) {
if (
$this->tag === 'input'
&&
(
$this->getAttribute('type') === 'text'
||
!$this->hasAttribute('type')
)
) {
return $this->getAttribute('value');
}
if (
$this->hasAttribute('checked')
&&
\in_array($this->getAttribute('type'), ['checkbox', 'radio'], true)
) {
return $this->getAttribute('value');
}
if ($this->node->nodeName === 'select') {
$valuesFromDom = [];
$options = $this->getElementsByTagName('option');
if ($options instanceof SimpleHtmlDomNode) {
foreach ($options as $option) {
if ($this->hasAttribute('checked')) {
/** @noinspection UnnecessaryCastingInspection */
$valuesFromDom[] = (string) $option->getAttribute('value');
}
}
}
if (\count($valuesFromDom) === 0) {
return null;
}
return $valuesFromDom;
}
if ($this->node->nodeName === 'textarea') {
return $this->node->nodeValue;
}
} else {
/** @noinspection NestedPositiveIfStatementsInspection */
if (\in_array($this->getAttribute('type'), ['checkbox', 'radio'], true)) {
if ($value === $this->getAttribute('value')) {
/** @noinspection UnusedFunctionResultInspection */
$this->setAttribute('checked', 'checked');
} else {
/** @noinspection UnusedFunctionResultInspection */
$this->removeAttribute('checked');
}
} elseif ($this->node instanceof \DOMElement && $this->node->nodeName === 'select') {
foreach ($this->node->getElementsByTagName('option') as $option) {
/** @var \DOMElement $option */
if ($value === $option->getAttribute('value')) {
/** @noinspection UnusedFunctionResultInspection */
$option->setAttribute('selected', 'selected');
} else {
$option->removeAttribute('selected');
}
}
} elseif ($this->node->nodeName === 'input' && \is_string($value)) {
// Set value for input elements
/** @noinspection UnusedFunctionResultInspection */
$this->setAttribute('value', $value);
} elseif ($this->node->nodeName === 'textarea' && \is_string($value)) {
$this->node->nodeValue = $value;
}
}
return null;
}
/**
* Remove attribute.
*
* @param string $name <p>The name of the html-attribute.</p>
*
* @return SimpleHtmlDomInterface
*/
public function removeAttribute(string $name): SimpleHtmlDomInterface
{
if (\method_exists($this->node, 'removeAttribute')) {
$this->node->removeAttribute($name);
}
return $this;
}
/**
* Get dom node's plain text.
*
* @return string
*/
public function text(): string
{
return $this->getHtmlDomParser()->fixHtmlOutput($this->node->textContent);
}
}
<?php
declare(strict_types=1);
namespace voku\helper;
/** @noinspection PhpHierarchyChecksInspection */
class SimpleHtmlDomBlank extends AbstractSimpleHtmlDom implements \IteratorAggregate, SimpleHtmlDomInterface
{
/**
* @var null
*/
protected $node;
/**
* Retrieve an external iterator.
*
* @see http://php.net/manual/en/iteratoraggregate.getiterator.php
*
* @return SimpleHtmlDomNodeInterface
* <p>
* An instance of an object implementing <b>Iterator</b> or
* <b>Traversable</b>
* </p>
*/
public function getIterator(): SimpleHtmlDomNodeInterface
{
return new SimpleHtmlDomNodeBlank();
}
/**
* Returns children of node.
*
* @param int $idx
*
* @return null
*/
public function childNodes(int $idx = -1)
{
return null;
}
/**
* Find list of nodes with a CSS selector.
*
* @param string $selector
* @param int|null $idx
*
* @return SimpleHtmlDomNodeInterface
*/
public function find(string $selector, $idx = null)
{
return new SimpleHtmlDomNodeBlank();
}
/**
* Find one node with a CSS selector.
*
* @param string $selector
*
* @return SimpleHtmlDomInterface
*/
public function findOne(string $selector): SimpleHtmlDomInterface
{
return new static();
}
/**
* Find nodes with a CSS selector.
*
* @param string $selector
*
* @return SimpleHtmlDomNodeInterface
*/
public function findMulti(string $selector): SimpleHtmlDomNodeInterface
{
return new SimpleHtmlDomNodeBlank();
}
/**
* Returns the first child of node.
*
* @return null
*/
public function firstChild()
{
return null;
}
/**
* Returns an array of attributes.
*
* @return null
*/
public function getAllAttributes()
{
return null;
}
/**
* Return attribute value.
*
* @param string $name
*
* @return string
*/
public function getAttribute(string $name): string
{
return '';
}
/**
* Return element by #id.
*
* @param string $id
*
* @return SimpleHtmlDomInterface
*/
public function getElementById(string $id): SimpleHtmlDomInterface
{
return new static();
}
/**
* Returns elements by #id.
*
* @param string $id
* @param int|null $idx
*
* @return SimpleHtmlDomNodeInterface
*/
public function getElementsById(string $id, $idx = null)
{
return new SimpleHtmlDomNodeBlank();
}
/**
* Return elements by .class.
*
* @param string $class
*
* @return SimpleHtmlDomNodeInterface
*/
public function getElementByClass(string $class): SimpleHtmlDomNodeInterface
{
return new SimpleHtmlDomNodeBlank();
}
/**
* Return element by tag name.
*
* @param string $name
*
* @return SimpleHtmlDomInterface
*/
public function getElementByTagName(string $name): SimpleHtmlDomInterface
{
return new static();
}
/**
* Returns elements by tag name.
*
* @param string $name
* @param int|null $idx
*
* @return SimpleHtmlDomNodeInterface
*/
public function getElementsByTagName(string $name, $idx = null)
{
return new SimpleHtmlDomNodeBlank();
}
/**
* Create a new "HtmlDomParser"-object from the current context.
*
* @return HtmlDomParser
*/
public function getHtmlDomParser(): HtmlDomParser
{
return new HtmlDomParser($this);
}
/**
* @return \DOMNode
*/
public function getNode(): \DOMNode
{
return new \DOMNode();
}
/**
* Determine if an attribute exists on the element.
*
* @param string $name
*
* @return bool
*/
public function hasAttribute(string $name): bool
{
return false;
}
/**
* Get dom node's outer html.
*
* @param bool $multiDecodeNewHtmlEntity
*
* @return string
*/
public function html(bool $multiDecodeNewHtmlEntity = false): string
{
return '';
}
/**
* Get dom node's inner html.
*
* @param bool $multiDecodeNewHtmlEntity
*
* @return string
*/
public function innerHtml(bool $multiDecodeNewHtmlEntity = false): string
{
return '';
}
/**
* Returns the last child of node.
*
* @return null
*/
public function lastChild()
{
return null;
}
/**
* Returns the next sibling of node.
*
* @return null
*/
public function nextSibling()
{
return null;
}
/**
* Returns the parent of node.
*
* @return SimpleHtmlDomInterface
*/
public function parentNode(): SimpleHtmlDomInterface
{
return new static();
}
/**
* Nodes can get partially destroyed in which they're still an
* actual DOM node (such as \DOMElement) but almost their entire
* body is gone, including the `nodeType` attribute.
*
* @return bool true if node has been destroyed
*/
public function isRemoved(): bool
{
return true;
}
/**
* Returns the previous sibling of node.
*
* @return null
*/
public function previousSibling()
{
return null;
}
/**
* Set attribute value.
*
* @param string $name <p>The name of the html-attribute.</p>
* @param string|null $value <p>Set to NULL or empty string, to remove the attribute.</p>
* @param bool $strict </p>
* $value must be NULL, to remove the attribute,
* so that you can set an empty string as attribute-value e.g. autofocus=""
* </p>
*
* @return SimpleHtmlDomInterface
*/
public function setAttribute(string $name, $value = null, bool $strict = false): SimpleHtmlDomInterface
{
return $this;
}
/**
* @param string|string[]|null $value <p>
* null === get the current input value
* text === set a new input value
* </p>
*
* @return string|string[]|null
*/
public function val($value = null)
{
return null;
}
/**
* Remove attribute.
*
* @param string $name <p>The name of the html-attribute.</p>
*
* @return SimpleHtmlDomInterface
*/
public function removeAttribute(string $name): SimpleHtmlDomInterface
{
return $this;
}
/**
* Get dom node's plain text.
*
* @return string
*/
public function text(): string
{
return '';
}
/**
* @param string $name
* @param array $arguments
*
* @throws \BadMethodCallException
*
* @return SimpleHtmlDomInterface|string|null
*/
public function __call($name, $arguments)
{
$name = \strtolower($name);
if (isset(self::$functionAliases[$name])) {
return \call_user_func_array([$this, self::$functionAliases[$name]], $arguments);
}
throw new \BadMethodCallException('Method does not exist');
}
protected function replaceNodeWithString(string $string): SimpleHtmlDomInterface
{
return new static();
}
protected function replaceChildWithString(string $string): SimpleHtmlDomInterface
{
return new static();
}
protected function replaceTextWithString($string): SimpleHtmlDomInterface
{
return new static();
}
}
<?php
namespace voku\helper;
/**
* @property string $outertext
* <p>Get dom node's outer html (alias for "outerHtml").</p>
* @property string $outerhtml
* <p>Get dom node's outer html.</p>
* @property string $innertext
* <p>Get dom node's inner html (alias for "innerHtml").</p>
* @property string $innerhtml
* <p>Get dom node's inner html.</p>
* @property string $plaintext
* <p>Get dom node's plain text.</p>
* @property-read string $tag
* <p>Get dom node name.</p>
* @property-read string $attr
* <p>Get dom node attributes.</p>
* @property-read string $text
* <p>Get dom node name.</p>
* @property-read string $html
* <p>Get dom node's outer html.</p>
*
* @method SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface|null children() children($idx = -1)
* <p>Returns children of node.</p>
* @method SimpleHtmlDomInterface|null first_child()
* <p>Returns the first child of node.</p>
* @method SimpleHtmlDomInterface|null last_child()
* <p>Returns the last child of node.</p>
* @method SimpleHtmlDomInterface|null next_sibling()
* <p>Returns the next sibling of node.</p>
* @method SimpleHtmlDomInterface|null prev_sibling()
* <p>Returns the previous sibling of node.</p>
* @method SimpleHtmlDomInterface|null parent()
* <p>Returns the parent of node.</p>
* @method string outerText()
* <p>Get dom node's outer html (alias for "outerHtml()").</p>
* @method string outerHtml()
* <p>Get dom node's outer html.</p>
* @method string innerText()
* <p>Get dom node's inner html (alias for "innerHtml()").</p>
*/
interface SimpleHtmlDomInterface extends \IteratorAggregate
{
/**
* Returns children of node.
*
* @param int $idx
*
* @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface|null
*/
public function childNodes(int $idx = -1);
/**
* Find list of nodes with a CSS selector.
*
* @param string $selector
* @param int|null $idx
*
* @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function find(string $selector, $idx = null);
/**
* Find one node with a CSS selector.
*
* @param string $selector
*
* @return SimpleHtmlDomInterface
*/
public function findOne(string $selector): self;
/**
* Find nodes with a CSS selector.
*
* @param string $selector
*
* @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function findMulti(string $selector): SimpleHtmlDomNodeInterface;
/**
* Returns the first child of node.
*
* @return SimpleHtmlDomInterface|null
*/
public function firstChild();
/**
* Returns an array of attributes.
*
* @return array|null
*/
public function getAllAttributes();
/**
* Return attribute value.
*
* @param string $name
*
* @return string
*/
public function getAttribute(string $name): string;
/**
* Return element by #idtext.
*
* @param string $id
*
* @return SimpleHtmlDomInterface
*/
public function getElementById(string $id): self;
/**
* Returns elements by #id.
*
* @param string $id
* @param int|null $idx
*
* @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function getElementsById(string $id, $idx = null);
/**
* Return elements by .class.
*
* @param string $class
*
* @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function getElementByClass(string $class);
/**
* Return element by tag name.
*
* @param string $name
*
* @return SimpleHtmlDomInterface
*/
public function getElementByTagName(string $name): self;
/**
* Returns elements by tag name.
*
* @param string $name
* @param int|null $idx
*
* @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function getElementsByTagName(string $name, $idx = null);
/**
* Create a new "HtmlDomParser"-object from the current context.
*
* @return HtmlDomParser
*/
public function getHtmlDomParser(): HtmlDomParser;
/**
* Retrieve an external iterator.
*
* @see http://php.net/manual/en/iteratoraggregate.getiterator.php
*
* @return SimpleHtmlDomNodeInterface
* <p>
* An instance of an object implementing <b>Iterator</b> or
* <b>Traversable</b>
* </p>
*/
public function getIterator(): SimpleHtmlDomNodeInterface;
/**
* @return \DOMNode
*/
public function getNode(): \DOMNode;
/**
* Determine if an attribute exists on the element.
*
* @param string $name
*
* @return bool
*/
public function hasAttribute(string $name): bool;
/**
* Get dom node's outer html.
*
* @param bool $multiDecodeNewHtmlEntity
*
* @return string
*/
public function html(bool $multiDecodeNewHtmlEntity = false): string;
/**
* Get dom node's inner html.
*
* @param bool $multiDecodeNewHtmlEntity
*
* @return string
*/
public function innerHtml(bool $multiDecodeNewHtmlEntity = false): string;
/**
* Returns the last child of node.
*
* @return SimpleHtmlDomInterface|null
*/
public function lastChild();
/**
* Returns the next sibling of node.
*
* @return SimpleHtmlDomInterface|null
*/
public function nextSibling();
/**
* Returns the parent of node.
*
* @return SimpleHtmlDomInterface
*/
public function parentNode(): self;
/**
* Nodes can get partially destroyed in which they're still an
* actual DOM node (such as \DOMElement) but almost their entire
* body is gone, including the `nodeType` attribute.
*
* @return bool true if node has been destroyed
*/
public function isRemoved(): bool;
/**
* Returns the previous sibling of node.
*
* @return SimpleHtmlDomInterface|null
*/
public function previousSibling();
/**
* Set attribute value.
*
* @param string $name <p>The name of the html-attribute.</p>
* @param string|null $value <p>Set to NULL or empty string, to remove the attribute.</p>
* @param bool $strict </p>
* $value must be NULL, to remove the attribute,
* so that you can set an empty string as attribute-value e.g. autofocus=""
* </p>
*
* @return SimpleHtmlDomInterface
*/
public function setAttribute(string $name, $value = null, bool $strict = false): self;
/**
* @param string|string[]|null $value <p>
* null === get the current input value
* text === set a new input value
* </p>
*
* @return string|string[]|null
*/
public function val($value = null);
/**
* Remove attribute.
*
* @param string $name <p>The name of the html-attribute.</p>
*
* @return SimpleHtmlDomInterface
*/
public function removeAttribute(string $name): self;
/**
* Get dom node's plain text.
*
* @return string
*/
public function text(): string;
/**
* @param string $name
*
* @return array|string|null
*/
public function __get($name);
/**
* @param string $selector
* @param int $idx
*
* @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function __invoke($selector, $idx = null);
/**
* @param string $name
*
* @return bool
*/
public function __isset($name);
/**
* @param string $name
* @param array $arguments
*
* @throws \BadMethodCallException
*
* @return SimpleHtmlDomInterface|string|null
*/
public function __call($name, $arguments);
/**
* @return string
*/
public function __toString();
}
<?php
declare(strict_types=1);
namespace voku\helper;
class SimpleHtmlDomNode extends AbstractSimpleHtmlDomNode implements SimpleHtmlDomNodeInterface
{
/**
* Find list of nodes with a CSS selector.
*
* @param string $selector
* @param int|null $idx
*
* @return SimpleHtmlDomNodeInterface|SimpleHtmlDomNodeInterface[]|null
*/
public function find(string $selector, $idx = null)
{
// init
$elements = new static();
foreach ($this as $node) {
foreach ($node->find($selector) as $res) {
$elements->append($res);
}
}
// return all elements
if ($idx === null) {
if (\count($elements) === 0) {
return new SimpleHtmlDomNodeBlank();
}
return $elements;
}
// handle negative values
if ($idx < 0) {
$idx = \count($elements) + $idx;
}
// return one element
return $elements[$idx] ?? null;
}
/**
* Find one node with a CSS selector.
*
* @param string $selector
*
* @return SimpleHtmlDomNodeInterface|null
*/
public function findOne(string $selector)
{
return $this->find($selector, 0);
}
/**
* Find nodes with a CSS selector.
*
* @param string $selector
*
* @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function findMulti(string $selector): SimpleHtmlDomNodeInterface
{
return $this->find($selector, null);
}
/**
* Get html of elements.
*
* @return string[]
*/
public function innerHtml(): array
{
// init
$html = [];
foreach ($this as $node) {
$html[] = $node->outertext;
}
return $html;
}
/**
* alias for "$this->innerHtml()" (added for compatibly-reasons with v1.x)
*/
public function innertext()
{
return $this->innerHtml();
}
/**
* alias for "$this->innerHtml()" (added for compatibly-reasons with v1.x)
*/
public function outertext()
{
return $this->innerHtml();
}
/**
* Get plain text.
*
* @return string[]
*/
public function text(): array
{
// init
$text = [];
foreach ($this as $node) {
$text[] = $node->plaintext;
}
return $text;
}
}
<?php
declare(strict_types=1);
namespace voku\helper;
class SimpleHtmlDomNodeBlank extends AbstractSimpleHtmlDomNode implements SimpleHtmlDomNodeInterface
{
/**
* @param string $selector
* @param int|null $idx
*
* @return null
*/
public function find(string $selector, $idx = null)
{
return null;
}
/**
* Find one node with a CSS selector.
*
* @param string $selector
*
* @return null
*/
public function findOne(string $selector)
{
return null;
}
/**
* Find nodes with a CSS selector.
*
* @param string $selector
*
* @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function findMulti(string $selector): SimpleHtmlDomNodeInterface
{
return new self();
}
/**
* @return string[]
*/
public function innerHtml(): array
{
return [];
}
/**
* alias for "$this->innerHtml()" (added for compatibly-reasons with v1.x)
*/
public function innertext()
{
return [];
}
/**
* alias for "$this->innerHtml()" (added for compatibly-reasons with v1.x)
*/
public function outertext()
{
return [];
}
/**
* @return string[]
*/
public function text(): array
{
return [];
}
}
<?php
namespace voku\helper;
/**
* @property-read string[] $outertext
* <p>Get dom node's outer html.</p>
* @property-read string[] $plaintext
* <p>Get dom node's plain text.</p>
*/
interface SimpleHtmlDomNodeInterface extends \IteratorAggregate
{
/**
* Find list of nodes with a CSS selector.
*
* @param string $selector
* @param int $idx
*
* @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|null
*/
public function find(string $selector, $idx = null);
/**
* Find one node with a CSS selector.
*
* @param string $selector
*
* @return SimpleHtmlDomNode|null
*/
public function findOne(string $selector);
/**
* Find nodes with a CSS selector.
*
* @param string $selector
*
* @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
*/
public function findMulti(string $selector): self;
/**
* Get html of elements.
*
* @return string[]
*/
public function innerHtml(): array;
/**
* alias for "$this->innerHtml()" (added for compatibly-reasons with v1.x)
*/
public function innertext();
/**
* alias for "$this->innerHtml()" (added for compatibly-reasons with v1.x)
*/
public function outertext();
/**
* Get plain text.
*
* @return string[]
*/
public function text(): array;
/**
* @param string $name
*
* @return array|null
*/
public function __get($name);
/**
* @param string $selector
* @param int $idx
*
* @return SimpleHtmlDomNodeInterface|SimpleHtmlDomNodeInterface[]|null
*/
public function __invoke($selector, $idx = null);
/**
* @return string
*/
public function __toString();
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment