Skip to content

Commit

Permalink
consertado workflow
Browse files Browse the repository at this point in the history
adicionado withWebDriver no ScraPHPBuilder
passado logs para classe scraphp
adicionado builder para scraphp
  • Loading branch information
rodrigoaramburu committed Dec 17, 2023
1 parent 9b0242a commit 555dbce
Show file tree
Hide file tree
Showing 33 changed files with 674 additions and 551 deletions.
14 changes: 11 additions & 3 deletions .github/workflows/ci.yml → .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,20 @@ jobs:

- name: Download dependencies
run: composer update --no-interaction --no-progress


- uses: isbang/[email protected]
with:
compose-file: "./docker-compose.yml"
down-flags: "--volumes"
services: |
php
selenium
- name: Start test servers
- name: Sleep to wait for Selenium
shell: bash
run: |
cd tests/fixtures
php -S 0.0.0.0:8000 &
sleep 30 &
- name: Run tests
run: ./vendor/bin/pest
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@


![example workflow](https://github.com/rodrigoaramburu/scraphp/actions/workflows/ci.yml/badge.svg)

# Executar Selenium

Expand Down
6 changes: 6 additions & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,11 @@
"allow-plugins": {
"pestphp/pest-plugin": true
}
},
"scripts": {
"lint": ["pint"],
"test": ["pest"],
"test:lint": ["pint --test"],
"test:stan": ["phpstan analyse"]
}
}
2 changes: 1 addition & 1 deletion tests/docker-compose.yml → docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ services:
ports:
- 8000:8000
volumes:
- './test-pages:/application'
- './tests/test-pages:/application'

selenium:
image: selenium/standalone-chrome:latest
Expand Down
File renamed without changes.
21 changes: 8 additions & 13 deletions src/HttpClient/AssetFetcher.php
Original file line number Diff line number Diff line change
@@ -1,25 +1,23 @@
<?php
<?php

declare(strict_types=1);

namespace ScraPHP\HttpClient;

use Psr\Log\LoggerInterface;
use GuzzleHttp\Exception\ClientException;
use GuzzleHttp\Exception\ConnectException;
use ScraPHP\Exceptions\HttpClientException;
use ScraPHP\Exceptions\AssetNotFoundException;
use ScraPHP\Exceptions\HttpClientException;

final class AssetFetcher
{

private \GuzzleHttp\Client $client;

public function __construct(
private LoggerInterface $logger,
){
public function __construct()
{
$this->client = new \GuzzleHttp\Client();
}

/**
* Fetches an asset from the given URL.
*
Expand All @@ -31,18 +29,15 @@ public function __construct(
public function fetchAsset(string $url): string
{
try {
$this->logger->info('Fetching asset '.$url);
$response = $this->client->request('GET', $url);
$this->logger->info('Status: '.$response->getStatusCode().' '.$url);
} catch (ClientException $e) {
if ($e->getCode() === 404) {
$this->logger->error('404 NOT FOUND '.$url);
throw new AssetNotFoundException($url.' not found');
}
} catch(ConnectException $e) {
} catch (ConnectException $e) {
throw new HttpClientException($e->getMessage(), $e->getCode(), $e);
}

return $response->getBody()->getContents();
}
}
}
4 changes: 2 additions & 2 deletions src/HttpClient/FilteredElement.php
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
<?php
<?php

declare(strict_types=1);

namespace ScraPHP\HttpClient;
Expand All @@ -12,5 +13,4 @@ public function attr(string $attr): ?string;
public function filterCSS(string $cssSelector): ?FilteredElement;

public function filterCSSEach(string $cssSelector, callable $callback): array;

}
7 changes: 3 additions & 4 deletions src/HttpClient/Guzzle/GuzzleFilteredElement.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,10 @@
namespace ScraPHP\HttpClient\Guzzle;

use ScraPHP\HttpClient\FilteredElement;
use ScraPHP\HttpClient\HtmlElement;
use Symfony\Component\DomCrawler\Crawler;

final class GuzzleFilteredElement implements FilteredElement
{

public function __construct(private Crawler $crawler)
{
}
Expand All @@ -36,7 +34,8 @@ public function attr(string $attr): ?string
return $this->crawler->attr($attr);
}

public function filterCSS(string $cssSelector): ?FilteredElement{
public function filterCSS(string $cssSelector): ?FilteredElement
{
$crawler = $this->crawler->filter($cssSelector);
if ($crawler->count() === 0) {
return null;
Expand All @@ -46,7 +45,7 @@ public function filterCSS(string $cssSelector): ?FilteredElement{
}

public function filterCSSEach(string $cssSelector, callable $callback): array
{
{
$filter = $this->crawler->filter($cssSelector);

return $filter->each(static function (Crawler $crawler, int $i) use ($callback) {
Expand Down
28 changes: 11 additions & 17 deletions src/HttpClient/Guzzle/GuzzleHttpClient.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,31 @@

namespace ScraPHP\HttpClient\Guzzle;

use Psr\Log\LoggerInterface;
use ScraPHP\HttpClient\Page;
use ScraPHP\HttpClient\HttpClient;
use ScraPHP\HttpClient\AssetFetcher;
use GuzzleHttp\Exception\ClientException;
use GuzzleHttp\Exception\ConnectException;
use Psr\Log\LoggerInterface;
use ScraPHP\Exceptions\AssetNotFoundException;
use ScraPHP\Exceptions\HttpClientException;
use ScraPHP\Exceptions\UrlNotFoundException;
use ScraPHP\Exceptions\AssetNotFoundException;
use ScraPHP\HttpClient\AssetFetcher;
use ScraPHP\HttpClient\HttpClient;
use ScraPHP\HttpClient\Page;

final class GuzzleHttpClient implements HttpClient
{
private \GuzzleHttp\Client $client;

private AssetFetcher $assetFetcher;

/**
* Constructor for the class.
*
* @param LoggerInterface $logger The logger instance.
* @param LoggerInterface $logger The logger instance.
*/
public function __construct(
private LoggerInterface $logger,

)
public function __construct()
{
$this->client = new \GuzzleHttp\Client();
$this->assetFetcher = new AssetFetcher($this->logger);
$this->assetFetcher = new AssetFetcher();
}

/**
Expand All @@ -45,15 +43,12 @@ public function __construct(
public function get(string $url): Page
{
try {
$this->logger->info('Accessing '.$url);
$response = $this->client->request('GET', $url);
$this->logger->info('Status: '.$response->getStatusCode().' '.$url);
} catch (ClientException $e) {
if ($e->getCode() === 404) {
$this->logger->error('404 NOT FOUND '.$url);
throw new UrlNotFoundException($url.' not found');
}
} catch(ConnectException $e) {
} catch (ConnectException $e) {
throw new HttpClientException($e->getMessage(), $e->getCode(), $e);
}

Expand All @@ -77,5 +72,4 @@ public function fetchAsset(string $url): string
{
return $this->assetFetcher->fetchAsset($url);
}

}
16 changes: 10 additions & 6 deletions src/HttpClient/Guzzle/GuzzlePage.php
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
<?php
<?php

declare(strict_types=1);

namespace ScraPHP\HttpClient\Guzzle;

use ScraPHP\HttpClient\Page;
use ScraPHP\HttpClient\FilteredElement;
use ScraPHP\HttpClient\Page;
use Symfony\Component\DomCrawler\Crawler;

final class GuzzlePage implements Page
Expand All @@ -15,26 +15,29 @@ public function __construct(
private int $statusCode,
private string $content,
private array $headers
)
{
) {
}

public function statusCode(): int
{
return $this->statusCode;
}

public function url(): string
{
return $this->url;
}

public function htmlBody(): string
{
return $this->content;
}

public function headers(): array
{
return $this->headers;
}

public function header(string $key): array
{
return $this->headers[$key] ?? [];
Expand All @@ -50,14 +53,15 @@ public function filterCSS(string $cssSelector): ?FilteredElement

return new GuzzleFilteredElement(crawler: $crawler);
}

public function filterCSSEach(string $cssSelector, callable $callback): array
{
$crawler = new Crawler($this->content);

$filter = $crawler->filter($cssSelector);

return $filter->each(static function (Crawler $crawler, int $i) use ($callback) {
return $callback(new GuzzleFilteredElement(crawler: $crawler), $i);
});
}
}
}
3 changes: 0 additions & 3 deletions src/HttpClient/HttpClient.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,9 @@

namespace ScraPHP\HttpClient;

use ScraPHP\HttpClient\Page;

interface HttpClient
{
public function get(string $url): Page;

public function fetchAsset(string $url): string;

}
11 changes: 7 additions & 4 deletions src/HttpClient/Page.php
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
<?php
<?php

declare(strict_types=1);

namespace ScraPHP\HttpClient;

use ScraPHP\HttpClient\FilteredElement;

interface Page
{
public function statusCode(): int;

public function url(): string;

public function htmlBody(): string;

public function headers(): array;

public function header(string $key): array;

public function filterCSS(string $cssSelector): ?FilteredElement;
public function filterCSSEach(string $cssSelector, callable $callback): array;

public function filterCSSEach(string $cssSelector, callable $callback): array;
}
18 changes: 10 additions & 8 deletions src/HttpClient/WebDriver/WebDriverFilteredElement.php
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
<?php
<?php

declare(strict_types=1);

namespace ScraPHP\HttpClient\WebDriver;

use Facebook\WebDriver\Exception\NoSuchElementException;
use Facebook\WebDriver\Remote\RemoteWebElement;
use Facebook\WebDriver\WebDriverBy;
use ScraPHP\HttpClient\FilteredElement;
use Facebook\WebDriver\Remote\RemoteWebElement;
use Facebook\WebDriver\Exception\NoSuchElementException;

final class WebDriverFilteredElement implements FilteredElement
{

public function __construct(
private RemoteWebElement $remoteWebElement
){}
) {
}

public function text(): string
{
Expand All @@ -28,13 +28,14 @@ public function attr(string $attr): ?string

public function filterCSS(string $cssSelector): ?FilteredElement
{
try{
try {
$remoteWebElement = $this->remoteWebElement->findElement(
WebDriverBy::cssSelector($cssSelector)
);
}catch (NoSuchElementException $exception){
} catch (NoSuchElementException $exception) {
return null;
}

return new WebDriverFilteredElement(
remoteWebElement: $remoteWebElement
);
Expand All @@ -48,6 +49,7 @@ public function filterCSSEach(string $cssSelector, callable $callback): array
foreach ($elements as $key => $element) {
$data[] = $callback(new WebDriverFilteredElement(remoteWebElement: $element), $key);
}

return $data;
}
}
}
Loading

0 comments on commit 555dbce

Please sign in to comment.