Skip to content

Commit fa56ce4

Browse files
committed
ConsistentStringScrambler to consistently scramble words in a string
1 parent 69145bc commit fa56ce4

File tree

3 files changed

+197
-0
lines changed

3 files changed

+197
-0
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
### Unreleased
22

3+
* Add a ConsistentStringScrambler - Consistently "randomise" the words in a string to be the same for the same random seed value using a Seeded Fisher-Yates shuffle
4+
35
* Add classes to simplify the creation and parsing of XML sitemaps
46

57
* Add method to factory a DateTimeImmutable from a strict format and throw if it doesn't comply
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
<?php
2+
3+
namespace Ingenerator\PHPUtils\Random;
4+
5+
use InvalidArgumentException;
6+
use function array_filter;
7+
use function array_values;
8+
use function crc32;
9+
use function implode;
10+
use function mt_rand;
11+
use function mt_srand;
12+
use function preg_split;
13+
14+
class ConsistentStringScrambler
15+
{
16+
/**
17+
* Consistently "randomise" the words in a string to be the same for the same random seed value
18+
*/
19+
public function shuffleWords(?string $input, string $random_seed): ?string
20+
{
21+
if (empty($random_seed)) {
22+
throw new InvalidArgumentException('No seed value provided to '.__METHOD__);
23+
}
24+
25+
// Break the string into words (and return null if there is no content / only whitespace)
26+
$words = array_filter(preg_split('/\s+/', $input ?? ''));
27+
28+
if (empty($words)) {
29+
return NULL;
30+
}
31+
32+
33+
// Convert the arbitrary seed input into an integer suitable for seeding the PRNG - doesn't
34+
// need to be complex, just enough to give the randomness a bit of variety
35+
$seed = crc32($random_seed);
36+
37+
return implode(' ', $this->seededShuffle($words, $seed));
38+
39+
}
40+
41+
/**
42+
* Seeded Fisher-Yates shuffle implemented as per https://stackoverflow.com/a/19658344
43+
*/
44+
private function seededShuffle(array $items, int $seed): array
45+
{
46+
mt_srand($seed);
47+
// Ensure the array is 0-indexed
48+
$items = array_values($items);
49+
50+
try {
51+
52+
for ($i = count($items) - 1; $i > 0; $i--) {
53+
// Swap each item with an item from a random position (which may mean some values
54+
// are swapped more than once).
55+
$rnd = mt_rand(0, $i);
56+
$old_item_i = $items[$i];
57+
$items[$i] = $items[$rnd];
58+
$items[$rnd] = $old_item_i;
59+
}
60+
61+
return $items;
62+
} finally {
63+
// Reset the random seed to be random again so that this does not impact on later
64+
// random numbers from elsewhere in the app.
65+
mt_srand();
66+
}
67+
}
68+
}
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
<?php
2+
3+
namespace test\unit\Ingenerator\PHPUtils\Random;
4+
5+
use Ingenerator\PHPUtils\Random\ConsistentStringScrambler;
6+
use PHPUnit\Framework\TestCase;
7+
use function mt_rand;
8+
9+
10+
class ConsistentStringScramblerTest extends TestCase
11+
{
12+
13+
public function test_it_is_initialisable(): void
14+
{
15+
$this->assertInstanceOf(ConsistentStringScrambler::class, $this->newSubject());
16+
}
17+
18+
public function test_it_returns_null_for_null_string(): void
19+
{
20+
$this->assertNull($this->newSubject()->shuffleWords(NULL, 'anything'));
21+
}
22+
23+
public function test_it_returns_null_for_empty_string(): void
24+
{
25+
$this->assertNull($this->newSubject()->shuffleWords('', 'anything'));
26+
}
27+
28+
/**
29+
* @testWith ["Foo"]
30+
* [" Foo"]
31+
* ["Foo "]
32+
* ["\nFoo\n"]
33+
*/
34+
public function test_it_returns_same_output_for_single_word_string_even_with_extra_whitespace(
35+
$input
36+
): void {
37+
$this->assertSame('Foo', $this->newSubject()->shuffleWords($input, 'any'));
38+
}
39+
40+
public function test_it_always_produces_same_shuffled_words_for_same_hash_input(): void
41+
{
42+
$input = 'I am the eggman, I am the walrus';
43+
$result = $this->newSubject()->shuffleWords($input, 'my-hash');
44+
45+
$this->assertSame(
46+
$result,
47+
$this->newSubject()->shuffleWords($input, 'my-hash'),
48+
'Should give same result on every run'
49+
);
50+
51+
$this->assertSame(
52+
'the am eggman, the I I am walrus',
53+
$this->newSubject()->shuffleWords($input, 'my-hash'),
54+
'Should be expected value across all platforms'
55+
);
56+
}
57+
58+
public function test_it_produces_different_shuffled_words_for_different_hash_input(): void
59+
{
60+
$input = 'I am the eggman, I am the walrus';
61+
$first = $this->newSubject()->shuffleWords($input, 'my-hash');
62+
$this->assertNotEquals(
63+
$first,
64+
$this->newSubject()->shuffleWords($input, 'other-hash'),
65+
'Different hash key should produce different sequence'
66+
);
67+
}
68+
69+
/**
70+
* @testWith ["I, robot am\t alive", "am I, robot alive"]
71+
* ["Thus!\nspake Zarathustra ", "spake Zarathustra Thus!"]
72+
* [" 51 Niddry St\n", "Niddry St 51"]
73+
*/
74+
public function test_it_strips_repeated_and_enclosing_whitespace_in_incoming_string($input, $expect): void
75+
{
76+
$this->assertSame($expect, $this->newSubject()->shuffleWords($input, 'any-hash'));
77+
}
78+
79+
public function test_it_does_not_cause_subsequent_random_numbers_to_be_predictable(): void
80+
{
81+
// Seeding the PRNG inside the class will of course affect randomness generated subsequently
82+
// anywhere in the code, we need to make sure it's randomy again before returning to avoid
83+
// affecting any later randomisation from other places.
84+
//
85+
// There is, of course, a very tiny chance that it would randomly produce the next number
86+
// in sequence even with a random seed, producing a false failure. So compare the next
87+
// three "random" numbers, it should be virtually impossible that we'd get these actual
88+
// numbers actually at random.
89+
$result = $this->newSubject()->shuffleWords('whatever I said', 'some-seed');
90+
91+
// If this sanity check fails, the internal seed / random sequence is now changed and
92+
// therefore the three numbers in the next check will need to be updated to be the ones
93+
// that the PRNG would give if the seed hadn't been reset.
94+
$this->assertSame(
95+
'I whatever said',
96+
$result,
97+
'Should be using expected seed internally'
98+
);
99+
100+
$this->assertNotSame(
101+
[
102+
1753721728,
103+
1187560983,
104+
1193164547,
105+
],
106+
[
107+
mt_rand(0, 2140000000),
108+
mt_rand(0, 2140000000),
109+
mt_rand(0, 2140000000),
110+
],
111+
'Later random numbers are random again'
112+
);
113+
}
114+
115+
public function test_it_throws_with_empty_hash_input(): void
116+
{
117+
$subject = $this->newSubject();
118+
$this->expectException(\InvalidArgumentException::class);
119+
$subject->shuffleWords('anything', '');
120+
}
121+
122+
private function newSubject(): ConsistentStringScrambler
123+
{
124+
return new ConsistentStringScrambler;
125+
}
126+
127+
}

0 commit comments

Comments
 (0)