diff --git a/.gitattributes b/.gitattributes
index 0ecaca90..0839efd8 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,7 +1,11 @@
-/demo export-ignore
-/tests export-ignore
-.gitattributes export-ignore
-.gitignore export-ignore
-.travis.yml export-ignore
-composer.lock export-ignore
-phpunit.xml export-ignore
+/demo export-ignore
+/tests export-ignore
+/scripts export-ignore
+/.github export-ignore
+.gitattributes export-ignore
+.gitignore export-ignore
+.php_cs.dist export-ignore
+.travis.yml export-ignore
+phpunit.xml.dist export-ignore
+/tests/cache/4pda.to.2022-12-04-406834-sostoyalsya_reliz_clown_of_duty_parodii_na_call_of_duty.php working-tree-encoding=windows-1251 diff=windows-1251
+/tests/cache/www.itmedia.co.jp.news-articles-2410-28-news159.html.php working-tree-encoding=sjis diff=sjis
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 00000000..13871fc1
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1,3 @@
+github: oscarotero
+patreon: misteroom
+custom: "https://paypal.me/oscarotero"
\ No newline at end of file
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 00000000..9e6af5d3
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,70 @@
+name: "testing"
+
+on:
+ push:
+ branches: [ master ]
+ pull_request:
+ branches: [ master ]
+
+jobs:
+ tests:
+ name: Tests
+ runs-on: ubuntu-latest
+
+ strategy:
+ matrix:
+ php:
+ - 7.4
+ - 8.0
+ - 8.1
+ - 8.2
+ - 8.3
+ - 8.4
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Install PHP
+ uses: shivammathur/setup-php@v2
+ with:
+ php-version: ${{ matrix.php }}
+
+ - name: Cache PHP dependencies
+ uses: actions/cache@v4
+ with:
+ path: vendor
+ key: ${{ runner.os }}-php-${{ matrix.php }}-composer-${{ hashFiles('**/composer.json') }}
+ restore-keys: ${{ runner.os }}-php-${{ matrix.php }}-composer-
+
+ - name: Install dependencies
+ run: composer install
+
+ - name: Tests
+ run: composer test
+
+ phpstan:
+ name: PHPStan Static Analysis
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Install PHP
+ uses: shivammathur/setup-php@v2
+ with:
+ php-version: 8.4
+
+ - name: Cache PHP dependencies
+ uses: actions/cache@v4
+ with:
+ path: vendor
+ key: ${{ runner.os }}-php-8.4-composer-${{ hashFiles('**/composer.json') }}
+ restore-keys: ${{ runner.os }}-php-8.4-composer-
+
+ - name: Install dependencies
+ run: composer install
+
+ - name: Run PHPStan
+ run: composer phpstan
diff --git a/.gitignore b/.gitignore
index f317d741..29e4a489 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
-# COMPOSER
-vendor
+*.cache
+*.code-workspace
composer.lock
+env.php
+phpunit.xml
+vendor
diff --git a/.php_cs.dist b/.php_cs.dist
new file mode 100644
index 00000000..31a8de65
--- /dev/null
+++ b/.php_cs.dist
@@ -0,0 +1,13 @@
+setFinder(
+ PhpCsFixer\Finder::create()
+ ->files()
+ ->name('*.php')
+ ->in(__DIR__.'/src')
+ ->in(__DIR__.'/demo')
+ ->in(__DIR__.'/tests')
+ ->exclude('cache')
+ ->exclude('fixtures')
+ );
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 1ecfee99..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,20 +0,0 @@
-language: php
-sudo: false
-
-php:
- - 5.4
- - 5.5
- - 5.6
- - 7.0
- - hhvm
-
-matrix:
- allow_failures:
- - php: hhvm
-
-before_install:
- - composer install
-
-script:
- - 'phpunit'
- - './vendor/bin/php7cc ./src'
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000..5f4b497f
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,302 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](http://keepachangelog.com/)
+and this project adheres to [Semantic Versioning](http://semver.org/).
+
+## [4.4.17] - 2025-05-13
+### Fixed
+- Adapters hostname detection [#556].
+
+## [4.4.16] - 2025-05-09
+### Fixed
+- Adapters hostname detection [#555].
+
+## [4.4.15] - 2025-01-02
+### Fixed
+- Type bug [#553].
+
+## [4.4.14] - 2024-12-04
+### Fixed
+- Php 8.4 support [#551].
+
+## [4.4.13] - 2024-11-21
+### Fixed
+- Php 8.4 support [#548].
+
+## [4.4.12] - 2024-07-24
+### Fixed
+- X.com (Twitter) [#540]
+- Updated oembed resources.
+
+## [4.4.11] - 2024-06-10
+### Fixed
+- Updated oEmbed entry points [#537]
+
+## [4.4.10] - 2023-12-10
+### Fixed
+- PHP 7.4 support
+- Use correct method for string length [#529]
+
+## [4.4.9] - 2023-12-01
+### Fixed
+- Performance and memory leak issues [#525], [#527].
+
+## [4.4.8] - 2023-05-22
+### Fixed
+- Support for `psr/http-message@2` [#514], [#515]
+
+## [4.4.7] - 2022-12-12
+### Fixed
+- Href attributes with `undefined` values [#501], [#502]
+- Deprecated warning for var interpolation in PHP 8.2 [#506]
+- Prevent unsupported operand types exception [#507]
+
+## [4.4.6] - 2022-10-02
+### Fixed
+- Some code issues detected by phpstan: [#495], [#496], [#497], [#498].
+- Fix for quotation marks in redirect URL [#499]
+
+## [4.4.5] - 2022-09-06
+### Fixed
+- Updated oembed endpoints [#494]
+
+## [4.4.4] - 2022-04-13
+### Fixed
+- Error getting data from Linked data [#481].
+
+## [4.4.3] - 2022-03-13
+### Fixed
+- PHP 8.1 deprecation notice [#480].
+
+## [4.4.2] - 2022-02-13
+### Added
+- Options to customize the CurlClient to perform http queries [#474].
+
+## [4.4.1] - 2022-02-06
+### Fixed
+- PHP 8.1 deprecation notice [#473].
+
+## [4.4.0] - 2022-01-08
+### Added
+- New settings option `twitter:token` to use Twitter API to get the data [#364] [#468].
+
+### Fixed
+- Headers not sent properly by curl [#466], [#467].
+
+## [4.3.5] - 2021-10-10
+### Fixed
+- Updated oEmbed endpoints
+- Fixed embed code for Instagram [#456], [#459]
+
+### Security
+- Fixed a possible XML Quadratic Blowup vulnerability.
+
+## [4.3.4] - 2021-06-22
+### Fixed
+- Urls of images should include the same url for the `$info->image` value. [#452]
+
+## [4.3.3] - 2021-06-22
+### Fixed
+- Facebook embed redirects to `/login`. [#450], [#451]
+
+## [4.3.2] - 2021-04-04
+### Fixed
+- Add configured oEmbed query parameters to all oEmbed endpoints [#437]
+- Updated oEmbed endpoints.
+- Replaced Travis with Github workflows for testing
+
+## [4.3.1] - 2021-03-21
+### Added
+- Support for binary files (video, audio, images, etc) [#412] [#413]
+
+### Fixed
+- Oembed for facebook photos [#405] [#406]
+- Oembed for facebook videos [#432] [#433]
+- Added more ways to detect data using meta tags [#427]
+- Bandcamp provider name [#429] [#430]
+
+## [4.3.0] - 2020-11-04
+### Added
+- New function `$embed->setSettings()` to pass the settings before get the site info
+
+### Fixed
+- PHP 8 compatibility [#394]
+- Facebook and Instagram adapted to the new API changes [#392] [#399]
+
+## [4.2.7] - 2020-09-23
+### Added
+- New option `twitch:parent` to fix Twitch embed with iframes [#384]
+
+### Fixed
+- Added `datePublished` check to `PublishedTime` extractor [#385] [#386]
+- Added `@property-read` for IDE suppport [#387] [#388]
+
+## [4.2.6] - 2020-08-28
+### Fixed
+- Code width and height when the provided value is not numeric (ex: 100%) [#380]
+
+## [4.2.5] - 2020-08-01
+### Fixed
+- Github TypeError exception with some urls [#375]
+
+## [4.2.4] - 2020-07-06
+### Fixed
+- Ignore invalid urls instead throw an exception
+- Updated oembed list of endpoints
+
+## [4.2.3] - 2020-06-12
+### Fixed
+- Suppport for other non-latin alphabets such Persian or Arabic [#366]
+
+## [4.2.2] - 2020-05-31
+### Fixed
+- Provided a fallback for oEmbed compatible sites like Instagram that redirects to login page [#357]
+
+## [4.2.1] - 2020-05-25
+### Fixed
+- Redirect urls like `t.co`.
+
+## [4.2.0] - 2020-05-23
+### Added
+- Added the `ignored_errors` settings to ignore some curls errors instead throw an exception [#355]
+- Support for Twitch embeds [#332]
+
+### Fixed
+- Ignored linkedData errors [#356]
+
+## [4.1.1] - 2020-04-24
+### Added
+- Updated oembed endpoints from `oembed.com`
+- Add support for tiktok.com
+
+## [4.1.0] - 2020-04-19
+### Added
+- Ability to send settings to `CurlClient`. Added the `cookies_path` setting to customize the file used for cookies. [#345]
+- `Document::selectCss()` function to select elements using css selectors instead xpath (it requires `symfony/css-selector`)
+- `Document::removeCss()` function to remove elements using css selectors instead xpath (it requires `symfony/css-selector`)
+- Ability to configure OEmbed parameters from the outside using the `oembed:query_parameters` setting [#346]
+
+## [4.0.0] - 2020-03-13
+Full library refactoring.
+
+### Added
+- Support for multiple parallel request with `curl_multi`
+- Support for PSR-7 Http Messages, PSR-17 Http Factories and PSR-18 Http Client
+- `cms` value
+- `language` to detect the page language
+- `languages` to detect urls to versions in different languages
+- `favicon` to detect small favicons (16 or 32px)
+- `icon` to detect big icons (from 48px)
+
+### Changed
+- Changed providers (oEmbed, Html, OpenGraph etc) by independent detectors (title, url, language etc).
+- The `tags` value is renamed to `keywords`
+- Use Psr standards instead custom interfaces.
+- Improved tests using cached responses.
+
+### Removed
+- Support for PHP<7.4
+- `type` value (is was very confusing)
+- `images` value
+- `providerImage` (use `favicon` or `icon` instead)
+- Support for files (pdf, jpg, video, etc).
+
+[#332]: https://github.com/oscarotero/Embed/issues/332
+[#345]: https://github.com/oscarotero/Embed/issues/345
+[#346]: https://github.com/oscarotero/Embed/issues/346
+[#355]: https://github.com/oscarotero/Embed/issues/355
+[#356]: https://github.com/oscarotero/Embed/issues/356
+[#357]: https://github.com/oscarotero/Embed/issues/357
+[#364]: https://github.com/oscarotero/Embed/issues/364
+[#366]: https://github.com/oscarotero/Embed/issues/366
+[#375]: https://github.com/oscarotero/Embed/issues/375
+[#380]: https://github.com/oscarotero/Embed/issues/380
+[#384]: https://github.com/oscarotero/Embed/issues/384
+[#385]: https://github.com/oscarotero/Embed/issues/385
+[#386]: https://github.com/oscarotero/Embed/issues/386
+[#387]: https://github.com/oscarotero/Embed/issues/387
+[#388]: https://github.com/oscarotero/Embed/issues/388
+[#392]: https://github.com/oscarotero/Embed/issues/392
+[#394]: https://github.com/oscarotero/Embed/issues/394
+[#399]: https://github.com/oscarotero/Embed/issues/399
+[#405]: https://github.com/oscarotero/Embed/issues/405
+[#406]: https://github.com/oscarotero/Embed/issues/406
+[#412]: https://github.com/oscarotero/Embed/issues/412
+[#413]: https://github.com/oscarotero/Embed/issues/413
+[#427]: https://github.com/oscarotero/Embed/issues/427
+[#429]: https://github.com/oscarotero/Embed/issues/429
+[#430]: https://github.com/oscarotero/Embed/issues/430
+[#432]: https://github.com/oscarotero/Embed/issues/432
+[#433]: https://github.com/oscarotero/Embed/issues/433
+[#437]: https://github.com/oscarotero/Embed/issues/437
+[#450]: https://github.com/oscarotero/Embed/issues/450
+[#451]: https://github.com/oscarotero/Embed/issues/451
+[#452]: https://github.com/oscarotero/Embed/issues/452
+[#456]: https://github.com/oscarotero/Embed/issues/456
+[#459]: https://github.com/oscarotero/Embed/issues/459
+[#466]: https://github.com/oscarotero/Embed/issues/466
+[#467]: https://github.com/oscarotero/Embed/issues/467
+[#468]: https://github.com/oscarotero/Embed/issues/468
+[#473]: https://github.com/oscarotero/Embed/issues/473
+[#474]: https://github.com/oscarotero/Embed/issues/474
+[#480]: https://github.com/oscarotero/Embed/issues/480
+[#481]: https://github.com/oscarotero/Embed/issues/481
+[#494]: https://github.com/oscarotero/Embed/issues/494
+[#495]: https://github.com/oscarotero/Embed/issues/495
+[#496]: https://github.com/oscarotero/Embed/issues/496
+[#497]: https://github.com/oscarotero/Embed/issues/497
+[#498]: https://github.com/oscarotero/Embed/issues/498
+[#499]: https://github.com/oscarotero/Embed/issues/499
+[#501]: https://github.com/oscarotero/Embed/issues/501
+[#502]: https://github.com/oscarotero/Embed/issues/502
+[#506]: https://github.com/oscarotero/Embed/issues/506
+[#507]: https://github.com/oscarotero/Embed/issues/507
+[#514]: https://github.com/oscarotero/Embed/issues/514
+[#515]: https://github.com/oscarotero/Embed/issues/515
+[#525]: https://github.com/oscarotero/Embed/issues/525
+[#527]: https://github.com/oscarotero/Embed/issues/527
+[#529]: https://github.com/oscarotero/Embed/issues/529
+[#537]: https://github.com/oscarotero/Embed/issues/537
+[#540]: https://github.com/oscarotero/Embed/issues/540
+[#548]: https://github.com/oscarotero/Embed/issues/548
+[#551]: https://github.com/oscarotero/Embed/issues/551
+[#553]: https://github.com/oscarotero/Embed/issues/553
+[#555]: https://github.com/oscarotero/Embed/issues/555
+[#556]: https://github.com/oscarotero/Embed/issues/556
+
+[4.4.17]: https://github.com/oscarotero/Embed/compare/v4.4.16...v4.4.17
+[4.4.16]: https://github.com/oscarotero/Embed/compare/v4.4.15...v4.4.16
+[4.4.15]: https://github.com/oscarotero/Embed/compare/v4.4.14...v4.4.15
+[4.4.14]: https://github.com/oscarotero/Embed/compare/v4.4.13...v4.4.14
+[4.4.13]: https://github.com/oscarotero/Embed/compare/v4.4.12...v4.4.13
+[4.4.12]: https://github.com/oscarotero/Embed/compare/v4.4.11...v4.4.12
+[4.4.11]: https://github.com/oscarotero/Embed/compare/v4.4.10...v4.4.11
+[4.4.10]: https://github.com/oscarotero/Embed/compare/v4.4.9...v4.4.10
+[4.4.9]: https://github.com/oscarotero/Embed/compare/v4.4.8...v4.4.9
+[4.4.8]: https://github.com/oscarotero/Embed/compare/v4.4.7...v4.4.8
+[4.4.7]: https://github.com/oscarotero/Embed/compare/v4.4.6...v4.4.7
+[4.4.6]: https://github.com/oscarotero/Embed/compare/v4.4.5...v4.4.6
+[4.4.5]: https://github.com/oscarotero/Embed/compare/v4.4.4...v4.4.5
+[4.4.4]: https://github.com/oscarotero/Embed/compare/v4.4.3...v4.4.4
+[4.4.3]: https://github.com/oscarotero/Embed/compare/v4.4.2...v4.4.3
+[4.4.2]: https://github.com/oscarotero/Embed/compare/v4.4.1...v4.4.2
+[4.4.1]: https://github.com/oscarotero/Embed/compare/v4.4.0...v4.4.1
+[4.4.0]: https://github.com/oscarotero/Embed/compare/v4.3.5...v4.4.0
+[4.3.5]: https://github.com/oscarotero/Embed/compare/v4.3.4...v4.3.5
+[4.3.4]: https://github.com/oscarotero/Embed/compare/v4.3.3...v4.3.4
+[4.3.3]: https://github.com/oscarotero/Embed/compare/v4.3.2...v4.3.3
+[4.3.2]: https://github.com/oscarotero/Embed/compare/v4.3.1...v4.3.2
+[4.3.1]: https://github.com/oscarotero/Embed/compare/v4.3.0...v4.3.1
+[4.3.0]: https://github.com/oscarotero/Embed/compare/v4.2.7...v4.3.0
+[4.2.7]: https://github.com/oscarotero/Embed/compare/v4.2.6...v4.2.7
+[4.2.6]: https://github.com/oscarotero/Embed/compare/v4.2.5...v4.2.6
+[4.2.5]: https://github.com/oscarotero/Embed/compare/v4.2.4...v4.2.5
+[4.2.4]: https://github.com/oscarotero/Embed/compare/v4.2.3...v4.2.4
+[4.2.3]: https://github.com/oscarotero/Embed/compare/v4.2.2...v4.2.3
+[4.2.2]: https://github.com/oscarotero/Embed/compare/v4.2.1...v4.2.2
+[4.2.1]: https://github.com/oscarotero/Embed/compare/v4.2.0...v4.2.1
+[4.2.0]: https://github.com/oscarotero/Embed/compare/v4.1.1...v4.2.0
+[4.1.1]: https://github.com/oscarotero/Embed/compare/v4.1.0...v4.1.1
+[4.1.0]: https://github.com/oscarotero/Embed/compare/v4.0.0...v4.1.0
+[4.0.0]: https://github.com/oscarotero/Embed/releases/tag/v4.0.0
diff --git a/LICENSE b/LICENSE
index c811c59a..2385321a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
The MIT License (MIT)
-Copyright (c) 2015 Oscar Otero Marzoa
+Copyright (c) 2017 Oscar Otero Marzoa
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
index acf26eba..538a0c46 100644
--- a/README.md
+++ b/README.md
@@ -1,286 +1,360 @@
-# Embed
+# Embed
-[](https://travis-ci.org/oscarotero/Embed)
-[](https://scrutinizer-ci.com/g/oscarotero/Embed/)
-[](https://www.versioneye.com/php/embed:embed/references)
-[](https://packagist.org/packages/embed/embed)
-[](https://packagist.org/packages/embed/embed)
-[](https://packagist.org/packages/embed/embed)
-[](https://packagist.org/packages/embed/embed)
-[](https://www.gratipay.com/oscarotero/)
-[](https://insight.sensiolabs.com/projects/f0beab9f-fe41-47db-8806-373f80c50f9e)
+[![Latest Version on Packagist][ico-version]][link-packagist]
+[![Total Downloads][ico-downloads]][link-packagist]
+[![Monthly Downloads][ico-m-downloads]][link-packagist]
+[![Software License][ico-license]](LICENSE)
PHP library to get information from any web page (using oembed, opengraph, twitter-cards, scrapping the html, etc). It's compatible with any web service (youtube, vimeo, flickr, instagram, etc) and has adapters to some sites like (archive.org, github, facebook, etc).
Requirements:
-* PHP 5.4+
+* PHP 7.4+
* Curl library installed
+* PSR-17 implementation. By default these libraries are detected automatically:
+ * [laminas/laminas-diactoros](https://github.com/laminas/laminas-diactoros)
+ * [guzzle/psr7](https://github.com/guzzle/psr7)
+ * [nyholm/psr7](https://github.com/Nyholm/psr7)
+ * [sunrise/http-message](https://github.com/sunrise-php/http-message)
-If you need PHP 5.3 support, use the 1.x version (but not maintained anymore)
+> If you need PHP 5.5-7.3 support, [use the 3.x version](https://github.com/oscarotero/Embed/tree/v3.x)
## Online demo
-http://oscarotero.com/embed2/demo
+Run `php -S localhost:8888 demo/index.php`
+
+## Video Tutorial
+ [](https://youtu.be/4YCLRpKY1cs)
+
+
+## Installation
+
+This package is installable and autoloadable via Composer as [embed/embed](https://packagist.org/packages/embed/embed).
+
+```
+$ composer require embed/embed
+```
## Usage
```php
-//Load library (if you don't have composer or any psr-4 compatible loader):
-include('src/autoloader.php');
+use Embed\Embed;
+
+$embed = new Embed();
//Load any url:
-$info = Embed\Embed::create('https://www.youtube.com/watch?v=PP1xn5wHtxE');
+$info = $embed->get('https://www.youtube.com/watch?v=PP1xn5wHtxE');
//Get content info
$info->title; //The page title
$info->description; //The page description
$info->url; //The canonical url
-$info->type; //The page type (link, video, image, rich)
-$info->tags; //The page keywords (tags)
+$info->keywords; //The page keywords
-$info->images; //List of all images found in the page
-$info->image; //The image choosen as main image
-$info->imageWidth; //The width of the main image
-$info->imageHeight; //The height of the main image
+$info->image; //The thumbnail or main image
-$info->code; //The code to embed the image, video, etc
-$info->width; //The width of the embed code
-$info->height; //The height of the embed code
-$info->aspectRatio; //The aspect ratio (width/height)
+$info->code->html; //The code to embed the image, video, etc
+$info->code->width; //The exact width of the embed code (if exists)
+$info->code->height; //The exact height of the embed code (if exists)
+$info->code->ratio; //The percentage of height / width to emulate the aspect ratio using paddings.
-$info->authorName; //The (video/article/image/whatever) author
+$info->authorName; //The resource author
$info->authorUrl; //The author url
-$info->providerName; //The provider name of the page (youtube, twitter, instagram, etc)
+$info->cms; //The cms used
+$info->language; //The language of the page
+$info->languages; //The alternative languages
+
+$info->providerName; //The provider name of the page (Youtube, Twitter, Instagram, etc)
$info->providerUrl; //The provider url
-$info->providerIcons; //All provider icons found in the page
-$info->providerIcon; //The icon choosen as main icon
+$info->icon; //The big icon of the site
+$info->favicon; //The favicon of the site (an .ico file or a png with up to 32x32px)
-$info->publishedDate; //The (video/article/image/whatever) published date
+$info->publishedTime; //The published time of the resource
+$info->license; //The license url of the resource
+$info->feeds; //The RSS/Atom feeds
```
-## Customization
+## Parallel multiple requests
-You can set some options using an array as second argument. In this array you can configure the adapters, providers, resolvers, etc.
+```php
+use Embed\Embed;
-### The adapter
+$embed = new Embed();
-The adapter is the class that get all information of the page from the providers and choose the best result for each value. For example, a page can provide multiple titles from opengraph, twitter cards, oembed, the `
` html element, etc, so the adapter get all this titles and choose the best one.
+//Load multiple urls asynchronously:
+$infos = $embed->getMulti(
+ 'https://www.youtube.com/watch?v=PP1xn5wHtxE',
+ 'https://twitter.com/carlosmeixidefl/status/1230894146220625933',
+ 'https://en.wikipedia.org/wiki/Tordoia',
+);
-Embed has an generic adapter called "Webpage" to use in any web but has also some specific adapters for sites like archive.org, facebook, google, github, spotify, etc, that provides information using their own apis, or have any other special issue.
+foreach ($infos as $info) {
+ echo $info->title;
+}
+```
-You can configure these adapters and even create your own adapter, that must implement the `Embed\Adapters\AdapterInterface`.
+## Document
+The document is the object that store the html code of the page. You can use it to extract extra info from the html code:
-The available options for the adapters are:
+```php
+//Get the document object
+$document = $info->getDocument();
-* minImageWidth (int): Minimal image width used to choose the main image
-* minImageHeight (int): Minimal image height used to choose the main image
-* imagesBlacklist (array): Images that you don't want to be used. Could be plain text or [Url](https://github.com/oscarotero/Embed/blob/master/src/Url.php) match pattern.
-* getBiggerImage (bool): Choose the bigger image as the main image (instead the first found, that usually is the most relevant).
-* getBiggerIcon (bool): The same than getBiggerImage but used to choose the main icon
+$document->link('image_src'); //Returns the href of a
+$document->getDocument(); //Returns the DOMDocument instance
+$html = (string) $document; //Returns the html code
-```php
-$config = [
- 'adapter' => [
- 'class' => 'MyCustomClass', //Your custom adapter
-
- 'config' => [
- 'minImageWidth' => 16,
- 'minImageHeight' => 16,
- 'imagesBlacklist' => null,
- 'getBiggerImage' => false,
- 'getBiggerIcon' => false,
- ]
- ]
-];
+$document->select('.//h1'); //Search
```
-### The providers
+You can perform xpath queries in order to select specific elements. A search always return an instance of a `Embed\QueryResult`:
-The providers get the data from different sources. Each source has it's own provider. For example, there is a provider for open graph, other for twitter cards, for oembed, html, etc. The providers that receive options are:
-
-#### oembed
+```php
+//Search the A elements
+$result = $document->select('.//a');
-Used to get data from oembed api if it's available. It accepts two options:
+//Filter the results
+$result->filter(fn ($node) => $node->getAttribute('href'));
-* parameters (array): Extra query parameters to send with the oembed request
-* embedlyKey (string): If it's defined, use embed.ly api as fallback oembed provider.
-* iframelyKey (string): If it's defined, use iframe.ly api as fallback oembed provider.
+$id = $result->str('id'); //Return the id of the first result as string
+$text = $result->str(); //Return the content of the first result
-#### html
+$ids = $result->strAll('id'); //Return an array with the ids of all results as string
+$texts = $result->strAll(); //Return an array with the content of all results as string
-Used to get data directly from the html code of the page:
+$tabindex = $result->int('tabindex'); //Return the tabindex attribute of the first result as integer
+$number = $result->int(); //Return the content of the first result as integer
-* maxImages (int): Max number of images fetched from the html code (searching for the `` elements). By default is -1 (no limit). Use 0 to no get images.
+$href = $result->url('http://github.com/href'); //Return the href attribute of the first result as url (converts relative urls to absolutes)
+$url = $result->url(); //Return the content of the first result as url
-#### facebook
+$node = $result->node(); //Return the first node found (DOMElement)
+$nodes = $result->nodes(); //Return all nodes found
+```
-This provider is used only for facebook pages, to get information from the [graph api](https://developers.facebook.com/docs/graph-api)
+## Metas
-* key (string): the key used
+For convenience, the object `Metas` stores the value of all `` elements located in the html, so you can get the values easier. The key of every meta is get from the `name`, `property` or `itemprop` attributes and the value is get from `content`.
-#### google
+```php
+//Get the Metas object
+$metas = $info->getMetas();
+
+$metas->all(); //Return all values
+$metas->get('og:title'); //Return a key value
+$metas->str('og:title'); //Return the value as string (remove html tags)
+$metas->html('og:description'); //Return the value as html
+$metas->int('og:video:width'); //Return the value as integer
+$metas->url('/service/og:url'); //Return the value as full url (converts relative urls to absolutes)
+```
-This provider is used only for google maps, to generate the embed code [using the embed api](https://developers.google.com/maps/documentation/embed/)
+## OEmbed
-* key (string): the key used
+In addition to the html and metas, this library uses [oEmbed](https://oembed.com/) endpoints to get additional data. You can get this data as following:
-#### soundcloud
+```php
+//Get the oEmbed object
+$oembed = $info->getOEmbed();
+
+$oembed->all(); //Return all raw data
+$oembed->get('title'); //Return a key value
+$oembed->str('title'); //Return the value as string (remove html tags)
+$oembed->html('html'); //Return the value as html
+$oembed->int('width'); //Return the value as integer
+$oembed->http://github.com/url('url'); //Return the value as full url (converts relative urls to absolutes)
+```
-Used only for soundcloud pages, to get information using its api.
+Additional oEmbed parameters (like instagrams `hidecaption`) can also be provided:
+```php
+$embed = new Embed();
-* key (string): to get info from soundcloud API.
+$result = $embed->get('https://www.instagram.com/p/B_C0wheCa4V/');
+$result->setSettings([
+ 'oembed:query_parameters' => ['hidecaption' => true]
+]);
+$oembed = $info->getOEmbed();
+```
+## LinkedData
-### The request resolver
+Another API available by default, used to extract info using the [JsonLD](https://www.w3.org/TR/json-ld/) schema.
-Embed uses the `Embed\RequestResolvers\Curl` class to resolve all requests using the curl library. You can set options to the curl request or use your custom resolver creating a class implementing the `Embed\RequestResolvers\RequestResolverInterface`.
+```php
+//Get the linkedData object
+$ld = $info->getLinkedData();
+
+$ld->all(); //Return all data
+$ld->get('name'); //Return a key value
+$ld->str('name'); //Return the value as string (remove html tags)
+$ld->html('description'); //Return the value as html
+$ld->int('width'); //Return the value as integer
+$ld->http://github.com/url('url'); //Return the value as full url (converts relative urls to absolutes)
+```
-The resolver configuration is defined under the "resolver" key and it has two options:
+## Other APIs
-* class: Your custom class name if you want to use your own implementation
-* config: The options passed to the class. If you use the default curl class, the config are the same than the [curl_setopt PHP function](http://php.net/manual/en/function.curl-setopt.php)
+Some sites like Wikipedia or Archive.org provide a custom API that is used to fetch more reliable data. You can get the API object with the method `getApi()` but note that not all results have this method. The Api object has the same methods than oEmbed:
```php
-// CURL
-$config = [
- 'resolver' => [
- 'class' => 'Embed\\RequestResolvers\\Curl', // The default resolver used
-
- 'config' => [
- CURLOPT_MAXREDIRS => 20,
- CURLOPT_CONNECTTIMEOUT => 10,
- CURLOPT_TIMEOUT => 10,
- CURLOPT_SSL_VERIFYPEER => false,
- CURLOPT_SSL_VERIFYHOST => false,
- CURLOPT_ENCODING => '',
- CURLOPT_AUTOREFERER => true,
- CURLOPT_USERAGENT => 'Embed PHP Library',
- CURLOPT_IPRESOLVE => CURL_IPRESOLVE_V4,
- ]
- ]
-];
-
-// Guzzle (5.x)
-$config = [
- 'resolver' => [
- 'class' => 'Embed\\RequestResolvers\\Guzzle5', // Guzzle5 resolver used
-
- 'config' => [
- // optional: if you need to use your custom Guzzle instance
- 'client' => $myGuzzleClient,
- ]
- ]
-];
+//Get the API object
+$api = $info->getApi();
+
+$api->all(); //Return all raw data
+$api->get('title'); //Return a key value
+$api->str('title'); //Return the value as string (remove html tags)
+$api->html('html'); //Return the value as html
+$api->int('width'); //Return the value as integer
+$api->http://github.com/url('url'); //Return the value as full url (converts relative urls to absolutes)
```
-[You can see here](https://github.com/oscarotero/Embed/tree/master/src/RequestResolvers) the RequestResolvers included.
+## Extending Embed
-### Image info
+Depending of your needs, you may want to extend this library with extra features or change the way it makes some operations.
-To check the images and get their mimetype and dimmensions, we have the class `Embed\ImageInfo\Curl`. This class uses curl to make request, get the first bytes to get the image type and dimmensions and close the connection. So the image wont be downloaded entirely, just until the downloaded data is enought to get this information.
+### PSR
-Like the resolver class, you can provide your own image class (it must implement the `Embed\ImageInfo\ImageInfoInterface`) and/or change the configuration. The available options are the same:
+Embed use some PSR standards to be the most interoperable possible:
-* class: Your custom class name if you want to use your own implementation
-* config: The options passed to the class. If you use the default curl class, the config are the same than the [curl_setopt PHP function](http://php.net/manual/en/function.curl-setopt.php)
+- [PSR-7](https://www.php-fig.org/psr/psr-7/) Standard interfaces to represent http requests, responses and uris
+- [PSR-17](https://www.php-fig.org/psr/psr-17/) Standard factories to create PSR-7 objects
+- [PSR-18](https://www.php-fig.org/psr/psr-18/) Standard interface to send a http request and return a response
+Embed comes with a CURL client compatible with PSR-18 but you need to install a PSR-7 / PSR-17 library. [Here you can see a list of popular libraries](https://github.com/middlewares/awesome-psr15-middlewares#psr-7-implementations) and the library can detect automatically 'laminas\diactoros', 'guzzleHttp\psr7', 'slim\psr7', 'nyholm\psr7' and 'sunrise\http' (in this order). If you want to use a different PSR implementation, you can do it in this way:
```php
-//CURL
-$config = [
- 'image' => [
- 'class' => 'Embed\\ImageInfo\\Curl', //The default imageInfo used
-
- 'config' => [
- CURLOPT_MAXREDIRS => 20,
- CURLOPT_CONNECTTIMEOUT => 10,
- CURLOPT_TIMEOUT => 10,
- CURLOPT_SSL_VERIFYPEER => false,
- CURLOPT_SSL_VERIFYHOST => false,
- CURLOPT_ENCODING => '',
- CURLOPT_AUTOREFERER => true,
- CURLOPT_USERAGENT => 'Embed PHP Library',
- CURLOPT_IPRESOLVE => CURL_IPRESOLVE_V4,
- ]
- ]
-];
-
-// Guzzle (5.x)
-$config = [
- 'image' => [
- 'class' => 'Embed\\ImageInfo\\Guzzle5',
-
- 'config' => [
- 'client' => $myGuzzleClient,
- ]
- ]
-];
+use Embed\Embed;
+use Embed\Http\Crawler;
+
+$client = new CustomHttpClient();
+$requestFactory = new CustomRequestFactory();
+$uriFactory = new CustomUriFactory();
+
+//The Crawler is responsible for perform http queries
+$crawler = new Crawler($client, $requestFactory, $uriFactory);
+
+//Create an embed instance passing the Crawler
+$embed = new Embed($crawler);
```
-[You can see here](https://github.com/oscarotero/Embed/tree/master/src/ImageInfo) the ImageInfo implementations included.
+### Adapters
+
+There are some sites with special needs: because they provide public APIs that allows to extract more info (like Wikipedia or Archive.org) or because we need to change how to extract the data in this particular site. For all that cases we have the adapters, that are classes extending the default classes to provide extra functionality.
+
+Before creating an adapter, you need to understand how Embed work: when you execute this code, you get a `Extractor` class
+```php
+//Get the Extractor with all info
+$info = $embed->get($url);
+
+//The extractor have document and oembed:
+$document = $info->getDocument();
+$oembed = $info->getOEmbed();
+```
-### Configuration example
+The `Extractor` class has many `Detectors`. Each detector is responsible to detect a specific piece of info. For example, there's a detector for the title, other for description, image, code, etc.
+So, an adapter is basically an extractor created specifically for a site. It can contains also custom detectors or apis. If you see the `src/Adapters` folder you can see all adapters.
+
+If you create an adapter, you need also register to Embed, so it knows in which website needs to use. To do that, there's the `ExtractorFactory` object, that is responsible for instantiate the right extractor for each site.
```php
-$config = [
- 'adapter' => [
- 'config' => [
- 'minImageWidth' => 16,
- 'minImageHeight' => 16,
- 'imagesBlacklist' => [
- 'http://example.com/full/path/to/image.jpg',
- 'http?://test.*/*.png/',
- '*/bad_image.gif'
- ]
- ]
- ],
- 'providers' => [
- 'oembed' => [
- 'parameters' => [],
- 'embedlyKey' => null
- ],
- 'html' => [
- 'maxImages' => 3
- ],
- 'facebook' => [
- 'key' => 'our-access-token'
- ]
- ],
- 'resolver' => [
- 'config' => [
- CURLOPT_USERAGENT => 'My spider',
- CURLOPT_MAXREDIRS => 3
- ]
- ]
- 'image' => [
- 'class' => 'App\\MyImageInfoClass'
- ]
-];
+use Embed\Embed;
+
+$embed = new Embed();
+
+$factory = $embed->getExtractorFactory();
+
+//Use this MySite adapter for mysite.com
+$factory->addAdapter('mysite.com', MySite::class);
+
+//Remove the adapter for pinterest.com, so it will use the default extractor
+$factory->removeAdapter('pinterest.com');
+
+//Change the default extractor
+$factory->setDefault(CustomExtractor::class);
```
-### Access to more data
+### Detectors
-As said before, the adapter get the data from all providers and choose the best values. But you can get the data directly from the providers, useful if you want to get the specific value returned by any provider.
+Embed comes with several predefined detectors, but you may want to change or add more. Just create a class extending `Embed\Detectors\Detector` class and register it in the extractor factory. For example:
```php
use Embed\Embed;
+use Embed\Detectors\Detector;
+
+class Robots extends Detector
+{
+ public function detect(): ?string
+ {
+ $response = $this->extractor->getResponse();
+ $metas = $this->extractor->getMetas();
+
+ return $response->getHeaderLine('x-robots-tag'),
+ ?: $metas->str('robots');
+ }
+}
+
+//Register the detector
+$embed = new Embed();
+$embed->getExtractorFactory()->addDetector('robots', Robots::class);
+
+//Use it
+$info = $embed->get('http://example.com/');
+$robots = $info->robots;
+```
-//Get the info
-$info = Embed::create('https://www.youtube.com/watch?v=PP1xn5wHtxE');
+### Settings
-//Get the oembed provider
-$oembed = $info->getProvider('oembed');
+If you need to pass settings to the CurlClient to perform http queries:
-//Get the oembed title:
-echo $oembed->getTitle();
+```php
+use Embed\Embed;
+use Embed\Http\Crawler;
+use Embed\Http\CurlClient;
+
+$client = new CurlClient();
+$client->setSettings([
+ 'cookies_path' => $cookies_path,
+ 'ignored_errors' => [18],
+ 'max_redirs' => 3, // see CURLOPT_MAXREDIRS
+ 'connect_timeout' => 2, // see CURLOPT_CONNECTTIMEOUT
+ 'timeout' => 2, // see CURLOPT_TIMEOUT
+ 'ssl_verify_host' => 2, // see CURLOPT_SSL_VERIFYHOST
+ 'ssl_verify_peer' => 1, // see CURLOPT_SSL_VERIFYPEER
+ 'follow_location' => true, // see CURLOPT_FOLLOWLOCATION
+ 'user_agent' => 'Mozilla', // see CURLOPT_USERAGENT
+]);
+
+$embed = new Embed(new Crawler($client));
+```
+
+If you need to pass settings to your detectors, you can add settings to the `ExtractorFactory`:
-//Get any value returned by oembed api
-echo $oembed->bag->get('author_name');
+```php
+use Embed\Embed;
+
+$embed = new Embed();
+$embed->setSettings([
+ 'oembed:query_parameters' => [], //Extra parameters send to oembed
+ 'twitch:parent' => 'example.com', //Required to embed twitch videos as iframe
+ 'facebook:token' => '1234|5678', //Required to embed content from Facebook
+ 'instagram:token' => '1234|5678', //Required to embed content from Instagram
+ 'twitter:token' => 'asdf', //Improve the data from twitter
+]);
+$info = $embed->get($url);
```
+
+Note: The built-in detectors does not require settings. This feature is only for convenience if you create a specific detector that requires settings.
+
+---
+
+[ico-version]: https://poser.pugx.org/embed/embed/v/stable
+[ico-license]: https://poser.pugx.org/embed/embed/license
+[ico-downloads]: https://poser.pugx.org/embed/embed/downloads
+[ico-m-downloads]: https://poser.pugx.org/embed/embed/d/monthly
+
+[link-packagist]: https://packagist.org/packages/embed/embed
diff --git a/composer.json b/composer.json
index 7ba59288..993f65e5 100644
--- a/composer.json
+++ b/composer.json
@@ -2,7 +2,13 @@
"name": "embed/embed",
"type": "library",
"description": "PHP library to retrieve page info using oembed, opengraph, etc",
- "keywords": ["oembed", "opengraph", "twitter cards", "embed", "embedly"],
+ "keywords": [
+ "oembed",
+ "opengraph",
+ "twitter cards",
+ "embed",
+ "embedly"
+ ],
"homepage": "https://github.com/oscarotero/Embed",
"license": "MIT",
"authors": [
@@ -18,20 +24,52 @@
"issues": "https://github.com/oscarotero/Embed/issues"
},
"require": {
- "php": ">=5.4.0",
- "ext-curl": "*"
+ "php": "^7.4|^8",
+ "ext-curl": "*",
+ "ext-dom": "*",
+ "ext-json": "*",
+ "ext-mbstring": "*",
+ "composer/ca-bundle": "^1.0",
+ "oscarotero/html-parser": "^0.1.4",
+ "psr/http-message": "^1.0|^2.0",
+ "psr/http-client": "^1.0",
+ "psr/http-factory": "^1.0",
+ "ml/json-ld": "^1.1"
},
"require-dev": {
- "guzzlehttp/guzzle": "5.x",
- "sstalle/php7cc": "^1.0",
- "phpunit/phpunit": "*"
+ "phpunit/phpunit": "^9.0",
+ "friendsofphp/php-cs-fixer": "^2.0",
+ "nyholm/psr7": "^1.2",
+ "oscarotero/php-cs-fixer-config": "^1.0",
+ "brick/varexporter": "^0.3.1",
+ "symfony/css-selector": "^5.0",
+ "phpstan/phpstan": "^2.1",
+ "phpstan/phpstan-strict-rules": "^2.0"
},
"suggest": {
- "guzzlehttp/guzzle@5.x": "To use Guzzle5 request resolver"
+ "symfony/css-selector": "If you want to get elements using css selectors"
},
"autoload": {
"psr-4": {
"Embed\\": "src"
+ },
+ "files": [
+ "src/functions.php"
+ ]
+ },
+ "autoload-dev": {
+ "psr-4": {
+ "Embed\\Tests\\": "tests/"
}
+ },
+ "scripts": {
+ "demo": "php -S localhost:8888 demo/index.php",
+ "test": "phpunit",
+ "cs-fix": "php-cs-fixer fix",
+ "phpstan": "phpstan --memory-limit=-1",
+ "update-resources": [
+ "php scripts/update-oembed.php",
+ "php scripts/update-suffix.php"
+ ]
}
}
diff --git a/demo/index.php b/demo/index.php
index 6fb9c454..850aba98 100644
--- a/demo/index.php
+++ b/demo/index.php
@@ -2,43 +2,70 @@
ini_set('display_errors', '1');
ini_set('display_startup_errors', '1');
-include '../src/autoloader.php';
+include __DIR__.'/../vendor/autoload.php';
-function get($name, $default = '')
+function getUrl(): ?string
{
- if (!isset($_GET[$name])) {
- return $default;
+ $skipParams = ['url', 'settings'];
+ $url = getParam('url');
+
+ if (!$url) {
+ return null;
}
- if ($name === 'url') {
- if (!filter_var($_GET['url'], FILTER_VALIDATE_URL)) {
- return 'http://donottrytoxss.invalid/';
+ //fix for unescaped urls
+ foreach ($_GET as $name => $value) {
+ if (in_array($name, $skipParams, true)) {
+ continue;
}
+
+ $url .= "&{$name}={$value}";
}
- return $_GET[$name];
+ return $url;
+}
+
+function getParam(string $paramName): ?string
+{
+ return $_GET[$paramName] ?? null;
}
-function getEscaped($name, $default = '')
+function getJsonSettings(): array
{
- return htmlspecialchars(get($name, $default), ENT_QUOTES, 'UTF-8');
+ $jsonString = getParam('settings') ?: '{}';
+ return json_decode($jsonString, true, 512, JSON_THROW_ON_ERROR);
}
-function printAny($text)
+function getEscapedUrl(): ?string
+{
+ $url = getUrl();
+ return $url ? htmlspecialchars($url, ENT_QUOTES, 'UTF-8') : null;
+}
+
+function printAny($text): void
{
if (is_array($text)) {
printArray($text);
} else {
- printText($text);
+ printText((string) $text);
+ }
+}
+
+function printText(?string $text): void
+{
+ if ($text) {
+ echo htmlspecialchars($text, ENT_IGNORE);
}
}
-function printText($text)
+function printDatetime(?DateTimeInterface $date): void
{
- echo htmlspecialchars($text, ENT_IGNORE);
+ if ($date) {
+ echo $date->format('Y-m-d H:i:s');
+ }
}
-function printImage($image)
+function printImage(?string $image): void
{
if ($image) {
echo <<'.htmlspecialchars(print_r($array, true), ENT_IGNORE).'';
}
}
-function printCode($code, $asHtml = true)
+function printHeaders(array $array): void
+{
+ $headers = [];
+
+ foreach ($array as $name => $values) {
+ $headers[$name] = implode(', ', $values);
+ }
+
+ printArray($headers);
+}
+
+function printCode(?string $code, bool $asHtml = true): void
{
if ($asHtml) {
echo $code;
@@ -75,47 +113,25 @@ function printCode($code, $asHtml = true)
}
}
-$providerData = [
- 'title' => 'printText',
- 'description' => 'printText',
- 'url' => 'printUrl',
- 'type' => 'printText',
- 'tags' => 'printArray',
- 'imagesUrls' => 'printArray',
- 'code' => 'printCode',
- 'source' => 'printUrl',
- 'width' => 'printText',
- 'height' => 'printText',
- 'authorName' => 'printText',
- 'authorUrl' => 'printUrl',
- 'providerIconsUrls' => 'printArray',
- 'providerName' => 'printText',
- 'providerUrl' => 'printUrl',
- 'publishedTime' => 'printText',
-];
-
-$adapterData = [
+$detectors = [
'title' => 'printText',
'description' => 'printText',
'url' => 'printUrl',
- 'type' => 'printText',
- 'tags' => 'printArray',
+ 'keywords' => 'printArray',
'image' => 'printImage',
- 'imageWidth' => 'printText',
- 'imageHeight' => 'printText',
- 'images' => 'printArray',
'code' => 'printCode',
- 'source' => 'printUrl',
- 'width' => 'printText',
- 'height' => 'printText',
- 'aspectRatio' => 'printText',
+ 'feeds' => 'printArray',
'authorName' => 'printText',
'authorUrl' => 'printUrl',
- 'providerIcon' => 'printImage',
- 'providerIcons' => 'printArray',
+ 'icon' => 'printImage',
+ 'favicon' => 'printImage',
'providerName' => 'printText',
'providerUrl' => 'printUrl',
- 'publishedTime' => 'printText',
+ 'publishedTime' => 'printDatetime',
+ 'license' => 'printUrl',
+ 'cms' => 'printText',
+ 'language' => 'printText',
+ 'languages' => 'printArray',
];
?>
@@ -127,7 +143,38 @@ function printCode($code, $asHtml = true)
Embed tests
-
+
@@ -135,95 +182,134 @@ function printCode($code, $asHtml = true)
-
+
-
-
+
+