Skip to content

Commit

Permalink
Normalizing and scaling ranks
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidBelicza committed Sep 19, 2020
1 parent c1bf117 commit c507d3b
Show file tree
Hide file tree
Showing 8 changed files with 213 additions and 31 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ long-running calculation can be scheduled in batches using the Strategy OOP patt
iteration number.
* However, the iteration stops when the ranks are accurate enough even if the max iteration didn't reach its limit.
* The accuracy measured by the float epsilon constant.
* At the end the algorithm normalizes the ranks between 0 and 1 and then scale them between 1 and 10. The scaling range
is configurable.
* Getting, setting, updating the nodes from the resource is a responsibility of the NodeDataSourceStrategyInterface.
* The package provides a simple implementation of the NodeDataSourceStrategyInterface that only keeps the nodes in the
memory. Another way of implementing the NodeDataSourceStrategyInterface could be a simple class that uses an ORM to
Expand Down Expand Up @@ -62,9 +64,12 @@ $ranking = new Ranking(
$strategy
);

$normalizer = new Normalizer();

$pageRankAlgorithm = new PageRankAlgorithm(
$ranking,
$strategy
$strategy,
$normalizer
);

$maxIteration = 100;
Expand Down
40 changes: 40 additions & 0 deletions src/Service/Normalizer.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<?php

declare(strict_types=1);

namespace PhpScience\PageRank\Service;

use PhpScience\PageRank\Data\NodeCollectionInterface;

class Normalizer implements NormalizerInterface
{
private float $min;
private float $max;

public function __construct(
float $min = 1,
float $max = 10
) {
$this->min = $min;
$this->max = $max;
}

public function normalize(
NodeCollectionInterface $nodeCollection,
float $lowestRank,
float $highestRank
): void {
foreach ($nodeCollection->getNodes() as $node) {
$rank = $this->getRank($node->getRank(), $lowestRank, $highestRank);
$node->setRank($rank);
}
}

private function getRank(float $value, float $min, float $max): float
{
$normalized = ($value - $min) / ($max - $min);
$scaled = ($normalized * ($this->max - $this->min)) + $this->min;

return $scaled;
}
}
23 changes: 23 additions & 0 deletions src/Service/NormalizerInterface.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?php

declare(strict_types=1);

namespace PhpScience\PageRank\Service;

use PhpScience\PageRank\Data\NodeCollectionInterface;

interface NormalizerInterface
{
/**
* It normalizes and scales the ranks in the node collection.
*
* @param NodeCollectionInterface $nodeCollection
* @param float $lowestRank
* @param float $highestRank
*/
public function normalize(
NodeCollectionInterface $nodeCollection,
float $lowestRank,
float $highestRank
): void;
}
23 changes: 21 additions & 2 deletions src/Service/PageRankAlgorithm.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,24 @@ class PageRankAlgorithm implements PageRankAlgorithmInterface
{
private NodeDataSourceStrategyInterface $nodeDataStrategy;
private RankingInterface $ranking;
private NormalizerInterface $normalizer;

public function __construct(
RankingInterface $ranking,
NodeDataSourceStrategyInterface $nodeDataStrategy
NodeDataSourceStrategyInterface $nodeDataStrategy,
NormalizerInterface $normalizer
) {
$this->nodeDataStrategy = $nodeDataStrategy;
$this->ranking = $ranking;
$this->normalizer = $normalizer;
}

public function run(int $maxIterate): NodeCollectionInterface
{
$this->initiateRanking();
$this->runBatch($maxIterate);

return $this->runBatch($maxIterate);
return $this->normalize();
}

public function initiateRanking(): NodeCollectionInterface
Expand All @@ -47,6 +51,21 @@ public function runBatch(int $maxIterate): NodeCollectionInterface
return $nodeCollection;
}

public function normalize(): NodeCollectionInterface
{
$nodeCollection = $this->nodeDataStrategy->getNodeCollection();
$min = $this->nodeDataStrategy->getLowestRank();
$max = $this->nodeDataStrategy->getHighestRank();

$this->normalizer->normalize(
$nodeCollection,
$min,
$max
);

return $nodeCollection;
}

private function powerIterate(
NodeCollectionInterface $nodeCollection,
int $maxIterate
Expand Down
9 changes: 9 additions & 0 deletions src/Service/PageRankAlgorithmInterface.php
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,13 @@ public function initiateRanking(): NodeCollectionInterface;
* @return NodeCollectionInterface
*/
public function runBatch(int $maxIterate): NodeCollectionInterface;

/**
* After the pagerank calculation, the ranks have wide range of minus and
* plus values. This method adjusts the ranks between a minimum and a
* maximum value.
*
* @return NodeCollectionInterface
*/
public function normalize(): NodeCollectionInterface;
}
45 changes: 41 additions & 4 deletions src/Strategy/MemorySourceStrategy.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class MemorySourceStrategy implements NodeDataSourceStrategyInterface

private array $previousRanks = [];
private array $nodeListMap;
private ?NodeCollectionInterface $nodeCollection = null;

public function __construct(
NodeBuilder $nodeBuilder,
Expand Down Expand Up @@ -50,12 +51,48 @@ public function updateNodes(NodeCollectionInterface $collection): void

public function getNodeCollection(): NodeCollectionInterface
{
$nodes = [];
if (null === $this->nodeCollection) {
$nodes = [];

foreach ($this->nodeListMap as $nodeMap) {
$nodes[] = $this->nodeBuilder->build($nodeMap);
foreach ($this->nodeListMap as $nodeMap) {
$nodes[] = $this->nodeBuilder->build($nodeMap);
}

$this->nodeCollection = $this->nodeCollectionBuilder->build($nodes);
}

return $this->nodeCollection;
}

public function getHighestRank(): float
{
$highest = null;

foreach ($this->getNodeCollection()->getNodes() as $node) {
if (
null === $highest
|| $node->getRank() > $highest
) {
$highest = $node->getRank();
}
}

return $highest;
}

public function getLowestRank(): float
{
$lowest = null;

foreach ($this->getNodeCollection()->getNodes() as $node) {
if (
null === $lowest
|| $node->getRank() < $lowest
) {
$lowest = $node->getRank();
}
}

return $this->nodeCollectionBuilder->build($nodes);
return $lowest;
}
}
14 changes: 14 additions & 0 deletions src/Strategy/NodeDataSourceStrategyInterface.php
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,18 @@ public function updateNodes(NodeCollectionInterface $collection): void;
* @return NodeCollectionInterface
*/
public function getNodeCollection(): NodeCollectionInterface;

/**
* It returns the highest rank from the node collection.
*
* @return float
*/
public function getHighestRank(): float;

/**
* It returns the lowest rank from the node collection.
*
* @return float
*/
public function getLowestRank(): float;
}
83 changes: 59 additions & 24 deletions tests/functional/Service/PageRankAlgorithmTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,42 +13,50 @@

class PageRankAlgorithmTest extends TestCase
{
private PageRankAlgorithm $pageRankAlgorithm;

protected function setUp(): void
/**
* @dataProvider dataProviderExpectedNormalizedRanks
*
* @param float[] $expectedRanks
*/
public function testRun(array $expectedRanks): void
{
$dataSource = $this->getDataSource();
$pageRankAlgorithm = $this->createPageRankAlgorithm();
$nodeCollection = $pageRankAlgorithm->run(2);

$nodeBuilder = new NodeBuilder();
$nodeCollectionBuilder = new NodeCollectionBuilder();
$strategy = new MemorySourceStrategy(
$nodeBuilder,
$nodeCollectionBuilder,
$dataSource
);
static::assertSame(4, $nodeCollection->getAllNodeCount());

$rankComparator = new RankComparator();
$ranking = new Ranking(
$rankComparator,
$strategy
);
foreach ($nodeCollection->getNodes() as $node) {
$expectedRank = $expectedRanks[$node->getId()];
$actualRank = $node->getRank();

$this->pageRankAlgorithm = new PageRankAlgorithm(
$ranking,
$strategy
);
static::assertSame($expectedRank, $actualRank);
}
}

public function dataProviderExpectedNormalizedRanks(): array
{
return [
'scenario_1' => [
'expectedRanks' => [
1 => 1.0,
2 => 2.4999999999999996,
3 => 10.0,
4 => 8.5
]
]
];
}

/**
* @dataProvider dataProviderExpectedRanks
*
* @param float[] $expectedRanks
*/
public function testRun(array $expectedRanks): void
public function testRunBatch(array $expectedRanks): void
{
$nodeCollection = $this
->pageRankAlgorithm
->run(2);
$pageRankAlgorithm = $this->createPageRankAlgorithm();
$pageRankAlgorithm->initiateRanking();
$nodeCollection = $pageRankAlgorithm->runBatch(2);

static::assertSame(4, $nodeCollection->getAllNodeCount());

Expand Down Expand Up @@ -99,4 +107,31 @@ private function getDataSource(): array
]
];
}

private function createPageRankAlgorithm(): PageRankAlgorithmInterface
{
$dataSource = $this->getDataSource();

$nodeBuilder = new NodeBuilder();
$nodeCollectionBuilder = new NodeCollectionBuilder();
$strategy = new MemorySourceStrategy(
$nodeBuilder,
$nodeCollectionBuilder,
$dataSource
);

$rankComparator = new RankComparator();
$ranking = new Ranking(
$rankComparator,
$strategy
);

$normalizer = new Normalizer();

return new PageRankAlgorithm(
$ranking,
$strategy,
$normalizer
);
}
}

0 comments on commit c507d3b

Please sign in to comment.