Skip to content

Commit 0b34a47

Browse files
committed
Refactored DHS data import, added DistinctByXXX() methods.
Now I get national data and background data in DHS, added kTAG_BREAKDOWN descriptor. Added Distinct(), DistinctByQuery() and DistinctByExample() methods.
1 parent 957b81f commit 0b34a47

14 files changed

+928
-730
lines changed

.idea/workspace.xml

+202-203
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

batch/DHS.php

+144-417
Large diffs are not rendered by default.

batch/GetDistinct.php

-87
This file was deleted.

batch/InitDHS.php

+1-8
Original file line numberDiff line numberDiff line change
@@ -137,14 +137,7 @@
137137
// Initialise DHS surveys.
138138
//
139139
echo( "- Initialising DHS surveys: " );
140-
$dhs->InitSurveys();
141-
echo( " Done.\n" );
142-
143-
//
144-
// Initialise DHS data.
145-
//
146-
echo( "- Initialising DHS data: " );
147-
$retries = $dhs->InitData();
140+
$retries = $dhs->InitSurveys();
148141
if( $retries )
149142
echo( " Done (retries: $retries).\n" );
150143
else
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
db._data.aggregate(
2+
3+
// Pipeline
4+
[
5+
// Stage 1
6+
{
7+
$group: {
8+
"_id" : { "breakdown" : "$:breakdown", "category" : "$@4" },
9+
"count" : {
10+
"$sum" : 1
11+
}
12+
}
13+
},
14+
15+
// Stage 2
16+
{
17+
$sort: {
18+
"_id" : 1
19+
}
20+
}
21+
],
22+
23+
// Options
24+
{
25+
cursor: {
26+
batchSize: 50
27+
},
28+
29+
allowDiskUse: true
30+
}
31+
32+
// Created with 3T MongoChef, the GUI for MongoDB - http://3t.io/mongochef
33+
34+
);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
db._data.aggregate(
2+
3+
// Pipeline
4+
[
5+
// Stage 1
6+
{
7+
$group: {
8+
"_id" : "$@4",
9+
"count" : { $sum : 1 }
10+
}
11+
},
12+
13+
// Stage 2
14+
{
15+
$sort: {
16+
"_id" : 1
17+
}
18+
}
19+
],
20+
21+
// Options
22+
{
23+
cursor: {
24+
batchSize: 50
25+
},
26+
27+
allowDiskUse: true
28+
}
29+
30+
// Created with 3T MongoChef, the GUI for MongoDB - http://3t.io/mongochef
31+
32+
);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
db._data.aggregate(
2+
3+
// Pipeline
4+
[
5+
// Stage 1
6+
{
7+
$group: {
8+
"_id" : "$:breakdown",
9+
"count" : {
10+
"$sum" : 1
11+
}
12+
}
13+
},
14+
15+
// Stage 2
16+
{
17+
$sort: {
18+
"_id" : 1
19+
}
20+
}
21+
],
22+
23+
// Options
24+
{
25+
cursor: {
26+
batchSize: 50
27+
},
28+
29+
allowDiskUse: true
30+
}
31+
32+
// Created with 3T MongoChef, the GUI for MongoDB - http://3t.io/mongochef
33+
34+
);

src/PHPLib/ArangoDB/Collection.php

+163
Original file line numberDiff line numberDiff line change
@@ -1153,6 +1153,169 @@ public function MapReduce( $thePipeline, array $theOptions = [] )
11531153

11541154

11551155

1156+
/*=======================================================================================
1157+
* *
1158+
* PUBLIC DISTINCT INTERFACE *
1159+
* *
1160+
*======================================================================================*/
1161+
1162+
1163+
1164+
/*===================================================================================
1165+
* Distinct *
1166+
*==================================================================================*/
1167+
1168+
/**
1169+
* <h4>Return the distinct values of a property.</h4>
1170+
*
1171+
* We implement this method by using the <tt>RETURN DISTINCT</tt> AQL statement.
1172+
*
1173+
* @param string $theOffset The property offset.
1174+
* @param boolean $doCount Return element counts.
1175+
* @return array The result set.
1176+
*
1177+
* @uses triagens\ArangoDb\Statement::execute()
1178+
*/
1179+
public function Distinct( $theOffset, $doCount = FALSE )
1180+
{
1181+
//
1182+
// Init query.
1183+
//
1184+
$statement = ( $doCount )
1185+
? 'FOR r IN @@c COLLECT k = r.@f WITH COUNT INTO n RETURN{ k, n }'
1186+
: 'FOR r IN @@c RETURN DISTINCT r.@f';
1187+
$filter = [
1188+
'query' => $statement,
1189+
'bindVars' => [ '@c' => $this->collectionName(),
1190+
'f' => $theOffset ] ];
1191+
1192+
//
1193+
// Execute statement.
1194+
//
1195+
$statement = new ArangoStatement( $this->mDatabase->Connection(), $filter );
1196+
$result = $statement->execute();
1197+
1198+
//
1199+
// Handle only values.
1200+
//
1201+
if( ! $doCount )
1202+
return $result->getAll(); // ==>
1203+
1204+
//
1205+
// Build results array.
1206+
//
1207+
$list = [];
1208+
foreach( $result as $item )
1209+
{
1210+
$data = $item->getAll();
1211+
$list[ $data[ 'k' ] ] = $data[ 'n' ];
1212+
}
1213+
1214+
return $list; // ==>
1215+
1216+
} // Distinct.
1217+
1218+
1219+
/*===================================================================================
1220+
* DistinctByQuery *
1221+
*==================================================================================*/
1222+
1223+
/**
1224+
* <h4>Return the distinct values of a property by query.</h4>
1225+
*
1226+
* We implement this method by using the {@link DistinctByExample()} method.
1227+
*
1228+
* @param string $theOffset The property offset.
1229+
* @param mixed $theFilter The selection criteria.
1230+
* @param boolean $doCount Return element counts.
1231+
* @return array The result set.
1232+
*
1233+
* @uses DistinctByExample()
1234+
*/
1235+
public function DistinctByQuery( $theOffset, $theFilter, $doCount = FALSE )
1236+
{
1237+
return $this->DistinctByExample( $theOffset, $theFilter, $doCount ); // ==>
1238+
1239+
} // DistinctByQuery.
1240+
1241+
1242+
/*===================================================================================
1243+
* DistinctByExample *
1244+
*==================================================================================*/
1245+
1246+
/**
1247+
* <h4>Return the distinct values of a property by example.</h4>
1248+
*
1249+
* We implement this method by using the <tt>RETURN DISTINCT</tt> AQL statement and
1250+
* building an AQL filter from the provided example document.
1251+
*
1252+
* @param string $theOffset The property offset.
1253+
* @param array $theDocument Example document as an array.
1254+
* @param boolean $doCount Return element counts.
1255+
* @return array The result set.
1256+
*
1257+
* @uses triagens\ArangoDb\Statement::execute()
1258+
*/
1259+
public function DistinctByExample( $theOffset, array $theDocument, $doCount = FALSE )
1260+
{
1261+
//
1262+
// Build statement.
1263+
//
1264+
$i = 0;
1265+
$filter = [];
1266+
$bind = [ '@c' => $this->collectionName(), 'f' => $theOffset ];
1267+
$statement = 'FOR r IN @@c ';
1268+
foreach( $theDocument as $key => $value )
1269+
{
1270+
$i++;
1271+
$filter[] = 'r.' . '`' . $key . '` == ' . "@v$i";
1272+
$bind[ "v$i" ] = $value;
1273+
}
1274+
if( count( $filter ) )
1275+
{
1276+
$filter = implode( ' AND ', $filter );
1277+
$statement .= "FILTER $filter";
1278+
}
1279+
if( $doCount )
1280+
$statement .= ' COLLECT k = r.@f WITH COUNT INTO n RETURN{ k, n }';
1281+
else
1282+
$statement .= ' RETURN DISTINCT r.@f';
1283+
1284+
//
1285+
// Init query.
1286+
//
1287+
$filter = [
1288+
'query' => $statement,
1289+
'bindVars' => $bind ];
1290+
1291+
//
1292+
// Execute statement.
1293+
//
1294+
$statement = new ArangoStatement( $this->mDatabase->Connection(), $filter );
1295+
$result = $statement->execute();
1296+
1297+
//
1298+
// Handle only values.
1299+
//
1300+
if( ! $doCount )
1301+
return $result->getAll(); // ==>
1302+
1303+
//
1304+
// Build results array.
1305+
//
1306+
$list = [];
1307+
foreach( $result as $item )
1308+
{
1309+
$data = $item->getAll();
1310+
$list[ $data[ 'k' ] ] = $data[ 'n' ];
1311+
}
1312+
1313+
return $list; // ==>
1314+
1315+
} // DistinctByExample.
1316+
1317+
1318+
11561319
/*=======================================================================================
11571320
* *
11581321
* PUBLIC COUNTING INTERFACE *

0 commit comments

Comments
 (0)