Skip to content

Commit 3aa5eea

Browse files
authored
Merge pull request #48 from ingenerator/share-useful-components
Share some more useful components
2 parents f338cd5 + 5e2d22f commit 3aa5eea

13 files changed

+946
-2
lines changed

.gitattributes

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,4 @@ README text eol=lf
2929
*.sqlite binary
3030

3131
# Ignore paths that should not be included in an archive (eg for a distribution version)
32-
/test
32+
/test export-ignore

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
11
### Unreleased
22

3+
### v1.18.0 (2022-12-15)
4+
5+
* Fix - the /tests directory was badly configured in the gitattributes and not actually excluded from the package.
6+
7+
* Add a ConsistentStringScrambler - Consistently "randomise" the words in a string to be the same for the same random seed value using a Seeded Fisher-Yates shuffle
8+
9+
* Add classes to simplify the creation and parsing of XML sitemaps
10+
11+
* Add method to factory a DateTimeImmutable from a strict format and throw if it doesn't comply
12+
13+
* Add TemporaryDirectoryManager to simplify the creation and destruction of temporary directories
14+
315
### v1.17.2 (2022-10-31)
416

517
* Update `JSON::decode` to throw an explicit exception on NULL input

src/DateTime/DateTimeImmutableFactory.php

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ public static function atMicrotime(float $microtime): DateTimeImmutable
4848
return $dt->setTimezone(new \DateTimeZone(\date_default_timezone_get()));
4949
}
5050

51-
5251
/**
5352
* Create from a unix timestamp (in seconds) in the current timezone
5453
*
@@ -127,4 +126,14 @@ public static function fromYmdHis(string $input): DateTimeImmutable
127126
throw new \InvalidArgumentException($input.' is not in the format Y-m-d H:i:s');
128127
}
129128

129+
public static function fromStrictFormat(string $value, string $format): \DateTimeImmutable
130+
{
131+
$date = \DateTimeImmutable::createFromFormat('!'.$format, $value);
132+
if ($date && ($date->format($format) === $value)) {
133+
return $date;
134+
}
135+
136+
throw new \InvalidArgumentException("`$value` is not a valid date/time in the format `$format`");
137+
}
138+
130139
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
<?php
2+
3+
namespace Ingenerator\PHPUtils\Filesystem;
4+
5+
6+
use InvalidArgumentException;
7+
use function array_keys;
8+
use function escapeshellarg;
9+
10+
class TemporaryDirectoryManager
11+
{
12+
/**
13+
* @var boolean[]
14+
*/
15+
protected array $directories = [];
16+
public function __construct(protected string $base_dir)
17+
{
18+
}
19+
20+
/**
21+
* Create a temporary directory with a unique name under this prefix. The directory
22+
* will be deleted when the manager is destroyed, if it still exists. Does not include trailing slash!
23+
*/
24+
public function mkTemp(string $prefix): string
25+
{
26+
$path = $this->base_dir.'/'.\uniqid($prefix, TRUE);
27+
28+
try {
29+
if ( ! mkdir($path, 0700, TRUE) && ! is_dir($path)) {
30+
throw new \ErrorException('Could not make '.$path.': reason unknown');
31+
}
32+
} catch (\ErrorException $e) {
33+
throw new \RuntimeException('Could not make temporary directory: '.$e->getMessage());
34+
}
35+
36+
$this->directories[$path] = TRUE;
37+
38+
return $path;
39+
}
40+
41+
/**
42+
* Explicitly clean up the managed directory
43+
*/
44+
public function cleanup(string $path): void
45+
{
46+
if ( ! isset($this->directories[$path])) {
47+
throw new InvalidArgumentException('Cannot cleanup `'.$path.'` - not a managed directory');
48+
}
49+
50+
$this->removeDirectory($path);
51+
}
52+
53+
protected function removeDirectory(string $path): void
54+
{
55+
\exec('rm -rf '.escapeshellarg($path));
56+
unset($this->directories[$path]);
57+
}
58+
59+
/**
60+
* Removes all remaining directories
61+
*/
62+
public function __destruct()
63+
{
64+
foreach (array_keys($this->directories) as $directory) {
65+
$this->removeDirectory($directory);
66+
}
67+
}
68+
69+
70+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
<?php
2+
3+
namespace Ingenerator\PHPUtils\Random;
4+
5+
use InvalidArgumentException;
6+
use function array_filter;
7+
use function array_values;
8+
use function crc32;
9+
use function implode;
10+
use function mt_rand;
11+
use function mt_srand;
12+
use function preg_split;
13+
14+
class ConsistentStringScrambler
15+
{
16+
/**
17+
* Consistently "randomise" the words in a string to be the same for the same random seed value
18+
*/
19+
public function shuffleWords(?string $input, string $random_seed): ?string
20+
{
21+
if (empty($random_seed)) {
22+
throw new InvalidArgumentException('No seed value provided to '.__METHOD__);
23+
}
24+
25+
// Break the string into words (and return null if there is no content / only whitespace)
26+
$words = array_filter(preg_split('/\s+/', $input ?? ''));
27+
28+
if (empty($words)) {
29+
return NULL;
30+
}
31+
32+
33+
// Convert the arbitrary seed input into an integer suitable for seeding the PRNG - doesn't
34+
// need to be complex, just enough to give the randomness a bit of variety
35+
$seed = crc32($random_seed);
36+
37+
return implode(' ', $this->seededShuffle($words, $seed));
38+
39+
}
40+
41+
/**
42+
* Seeded Fisher-Yates shuffle implemented as per https://stackoverflow.com/a/19658344
43+
*/
44+
private function seededShuffle(array $items, int $seed): array
45+
{
46+
mt_srand($seed);
47+
// Ensure the array is 0-indexed
48+
$items = array_values($items);
49+
50+
try {
51+
52+
for ($i = count($items) - 1; $i > 0; $i--) {
53+
// Swap each item with an item from a random position (which may mean some values
54+
// are swapped more than once).
55+
$rnd = mt_rand(0, $i);
56+
$old_item_i = $items[$i];
57+
$items[$i] = $items[$rnd];
58+
$items[$rnd] = $old_item_i;
59+
}
60+
61+
return $items;
62+
} finally {
63+
// Reset the random seed to be random again so that this does not impact on later
64+
// random numbers from elsewhere in the app.
65+
mt_srand();
66+
}
67+
}
68+
}

src/Sitemap/SitemapParser.php

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
<?php
2+
3+
namespace Ingenerator\PHPUtils\Sitemap;
4+
5+
class SitemapParser
6+
{
7+
8+
public static function parse(string $sitemap): array
9+
{
10+
$doc = static::validateSitemapXML($sitemap);
11+
12+
$xml = \simplexml_import_dom($doc);
13+
$urls = [];
14+
foreach ($xml->url as $x_url) {
15+
$url = (string) $x_url->loc;
16+
if (isset($urls[$url])) {
17+
throw new \InvalidArgumentException('Duplicate sitemap entry for '.$url);
18+
}
19+
$urls[$url] = [
20+
'lastmod' => $x_url->lastmod ? (string) $x_url->lastmod : NULL,
21+
'changefreq' => $x_url->changefreq ? (string) $x_url->changefreq : NULL,
22+
'priority' => $x_url->priority ? (string) $x_url->priority : NULL,
23+
];
24+
}
25+
26+
return $urls;
27+
}
28+
29+
private static function validateSitemapXML(string $sitemap): \DOMDocument
30+
{
31+
$old_use_errors = \libxml_use_internal_errors(TRUE);
32+
try {
33+
$doc = new \DOMDocument;
34+
$doc->loadXML($sitemap);
35+
$valid = $doc->schemaValidate(__DIR__.'/sitemap.xsd');
36+
if ( ! $valid) {
37+
$errors = \array_map(fn($e) => static::formatError($e), \libxml_get_errors());
38+
throw new \InvalidArgumentException("Invalid sitemap XML:\n".\implode("\n", $errors));
39+
}
40+
} finally {
41+
\libxml_clear_errors();
42+
\libxml_use_internal_errors($old_use_errors);
43+
}
44+
45+
return $doc;
46+
}
47+
48+
private static function formatError(\LibXMLError $error): string
49+
{
50+
$level = match ($error->level) {
51+
LIBXML_ERR_WARNING => 'warning',
52+
LIBXML_ERR_ERROR => 'error',
53+
LIBXML_ERR_FATAL => 'fatal',
54+
default => "unknown (".$error->level.")"
55+
};
56+
57+
return \sprintf(
58+
' - [%s] %s at %s:%s (code %d)',
59+
$level,
60+
trim($error->message),
61+
$error->line,
62+
$error->column,
63+
$error->code,
64+
);
65+
}
66+
67+
}

src/Sitemap/SitemapRenderer.php

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
<?php
2+
3+
namespace Ingenerator\PHPUtils\Sitemap;
4+
5+
class SitemapRenderer
6+
{
7+
private array $urls = [];
8+
9+
private bool $is_rendered = FALSE;
10+
11+
public function addUrl(
12+
string $url,
13+
?\DateTimeImmutable $lastmod = NULL,
14+
?string $changefreq = NULL,
15+
?float $priority = NULL
16+
): void {
17+
if ($this->is_rendered) {
18+
throw new \LogicException('Cannot modify a sitemap after it has been rendered');
19+
}
20+
21+
$xml = "<loc>".\htmlspecialchars($url, ENT_XML1 | ENT_QUOTES, 'UTF-8')."</loc>";
22+
if ($lastmod !== NULL) {
23+
$xml .= '<lastmod>'.$lastmod->format('Y-m-d').'</lastmod>';
24+
}
25+
if ($changefreq !== NULL) {
26+
$xml .= '<changefreq>'.$changefreq.'</changefreq>';
27+
}
28+
if ($priority !== NULL) {
29+
$xml .= '<priority>'.$priority.'</priority>';
30+
}
31+
32+
$this->urls[] = "<url>$xml</url>";
33+
}
34+
35+
public function render(): string
36+
{
37+
if ($this->is_rendered) {
38+
throw new \LogicException('Cannot render a sitemap more than once');
39+
}
40+
41+
if ($this->urls === []) {
42+
throw new \UnderflowException('Cannot render a sitemap containing no <url> entries');
43+
}
44+
45+
46+
$xml = '<?xml version="1.0" encoding="UTF-8"?>'."\n"
47+
.'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'
48+
.\implode('', $this->urls)
49+
.'</urlset>';
50+
51+
// Mark rendered to prevent reuse and clear the urls collection to release the memory
52+
$this->is_rendered = TRUE;
53+
$this->urls = [];
54+
55+
return $xml;
56+
}
57+
}

0 commit comments

Comments
 (0)