Skip to content

Commit

Permalink
Merge pull request #6 from prezly-forks/fix/http-headers-lowercase
Browse files Browse the repository at this point in the history
Fixed code to work with both upper, lowercase HTTP headers
  • Loading branch information
mpclarkson authored Mar 21, 2017
2 parents e9de2d3 + 7e84584 commit c969d84
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 25 deletions.
17 changes: 16 additions & 1 deletion src/IconScraper/DataAccess.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,23 @@ public function retrieveUrl($url) {

public function retrieveHeader($url) {
$this->setContext();
return @get_headers($url, TRUE);

// get_headers already follows redirects and stacks headers up in array
$headers = @get_headers($url, TRUE);
$headers = array_change_key_case($headers);

// if there were multiple redirects flatten down the location header
if (isset($headers['location']) && is_array($headers['location'])) {
$headers['location'] = array_filter($headers['location'], function ($header) {
return strpos($header, '://') !== false; // leave only absolute urls
});

$headers['location'] = end($headers['location']);
}

return $headers;
}


public function saveCache($file, $data) {
file_put_contents($file, $data);
Expand Down
39 changes: 15 additions & 24 deletions src/IconScraper/Scraper.php
Original file line number Diff line number Diff line change
Expand Up @@ -64,41 +64,32 @@ public static function baseUrl($url, $path = false)
return $return;
}

public function info($url)
{
public function info($url) {
if (empty($url) || $url === false) {
return false;
}

$max_loop = 5;

// Discover real status by following redirects.
$loop = true;
$status = null;
while ($loop && $max_loop-- > 0) {
$headers = $this->dataAccess->retrieveHeader($url);

$headers = $this->dataAccess->retrieveHeader($url);
// leaves only numeric keys
$status_lines = array_filter($headers, function ($key) {
return is_int($key);
}, ARRAY_FILTER_USE_KEY);

$exploded = explode(' ', $headers[0]);
// uses last returned status line header
$exploded = explode(' ', end($status_lines));

if (!array_key_exists(1, $exploded)) {
return false;
}
if (! array_key_exists(1, $exploded)) {
return false;
}

list(,$status) = $exploded;
list(, $status) = $exploded;

switch ($status) {
case '301':
case '302':
$url = $headers['Location'];
break;
default:
$loop = false;
break;
}
if (isset($headers['location'])) {
$url = $headers['location'];
}

return array('status' => $status, 'url' => $url);
return ['status' => $status, 'url' => $url];
}

/**
Expand Down

0 comments on commit c969d84

Please sign in to comment.