-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPowerStationDatasetSplitter.php
46 lines (39 loc) · 1.28 KB
/
PowerStationDatasetSplitter.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
<?php
namespace App\Application\ML;
use App\Application\Path\AppPathResolver;
use App\Domain\FileNames;
use League\Csv\CannotInsertRecord;
use League\Csv\Exception;
use League\Csv\UnavailableStream;
use League\Csv\Writer;
use Rubix\ML\Datasets\Unlabeled;
use Rubix\ML\Extractors\CSV;
use Rubix\ML\Transformers\NumericStringConverter;
readonly class PowerStationDatasetSplitter
{
public function __construct(
private AppPathResolver $appPathResolver,
) {
}
/**
* @throws UnavailableStream
* @throws CannotInsertRecord
* @throws Exception
*/
public function split(): void
{
$dataset = Unlabeled::fromIterator(new CSV(
$this->appPathResolver->getResourcesPath(FileNames::VALID_DATA),
header: true,
))->apply(new NumericStringConverter());
[$training, $testing] = $dataset->randomize()->split(0.8);
$writer = Writer::createFromPath($this->appPathResolver->getResourcesPath(FileNames::TRAINING_SET), 'w');
foreach ($training as $train) {
$writer->insertOne($train);
}
$writer = Writer::createFromPath($this->appPathResolver->getResourcesPath(FileNames::TEST_SET), 'w');
foreach ($testing as $test) {
$writer->insertOne($test);
}
}
}