Skip to content

Commit f4460e5

Browse files
committed
Working on loading DHS metadata.
Stuck on loading enumerations: the descriptor must be saved before one can attach a controlled vocabulary to it, but it doesn't get updated...
1 parent 7f07b3a commit f4460e5

9 files changed

+733
-1068
lines changed

.idea/workspace.xml

+193-164
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

batch/DHS.php

+392
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,392 @@
1+
<?php
2+
3+
/**
4+
* DHS.php
5+
*
6+
* This file contains the definition of the {@link DHS} class.
7+
*/
8+
9+
/*
10+
* Global includes.
11+
*/
12+
require_once(dirname(__DIR__) . "/includes.local.php");
13+
14+
/*
15+
* Local includes.
16+
*/
17+
require_once(dirname(__DIR__) . "/defines.inc.php");
18+
19+
/*
20+
* Driver includes.
21+
*/
22+
if( kENGINE == "MONGO" )
23+
require_once(dirname(__DIR__) . "/mongo.local.php");
24+
elseif( kENGINE == "ARANGO" )
25+
require_once(dirname(__DIR__) . "/arango.local.php");
26+
27+
/*=======================================================================================
28+
* *
29+
* DHS.php *
30+
* *
31+
*======================================================================================*/
32+
33+
/**
34+
* <h4>DHS object.</h4>
35+
*
36+
* This class implements a DHS data and metadata repository, it can be used to initialise
37+
* the data dictionary with DHS data provided by the
38+
* {@link http://api.dhsprogram.com/#/index.html} web services.
39+
*
40+
* The class features methods to initialise the metadata and import data.
41+
*
42+
* @package Data
43+
*
44+
* @author Milko A. Škofič <[email protected]>
45+
* @version 1.00
46+
* @since 13/05/2016
47+
*/
48+
class DHS
49+
{
50+
/**
51+
* <h4>DHS namespace key.</h4>
52+
*
53+
* This constant holds the <i>DHS namespace key</i>.
54+
*
55+
* @var string
56+
*/
57+
const kDHS_NAMESPACE = 'DHS';
58+
59+
/**
60+
* <h4>DHS descriptors URL.</h4>
61+
*
62+
* This constant holds the <i>DHS descriptors URL</i>.
63+
*
64+
* @var string
65+
*/
66+
const kDHS_URL_INDICATORS =
67+
'http://api.dhsprogram.com/rest/dhs/indicators/fields?f=json';
68+
69+
/**
70+
* <h4>Wrapper object.</h4>
71+
*
72+
* This data member holds the <i>database object</i> that contains data and metadata.
73+
*
74+
* @var \Milko\PHPLib\MongoDB\Wrapper|\Milko\PHPLib\ArangoDB\Wrapper
75+
*/
76+
protected $mDatabase = NULL;
77+
78+
/**
79+
* <h4>Namespace object.</h4>
80+
*
81+
* This data member holds the <i>DHS namespace term</i>.
82+
*
83+
* @var \Milko\PHPLib\Term
84+
*/
85+
protected $mNamespace = NULL;
86+
87+
/**
88+
* <h4>Descriptors match table.</h4>
89+
*
90+
* This data member holds the <i>descriptors match table</i>, it is an array whose
91+
* keys represent the DHS variable names and the values the descriptor offsets.
92+
*
93+
* @var array
94+
*/
95+
protected $mMatchTable = [];
96+
97+
98+
99+
100+
/*=======================================================================================
101+
* *
102+
* MAGIC *
103+
* *
104+
*======================================================================================*/
105+
106+
107+
108+
/*===================================================================================
109+
* __construct *
110+
*==================================================================================*/
111+
112+
/**
113+
* <h4>Instantiate class.</h4>
114+
*
115+
* Instances of this class are instantiated by using global definitions:
116+
*
117+
* <ul>
118+
* <li><tt>{@link kENGINE}</tt>: The database engine:
119+
* <ul>
120+
* <li><tt>MONGO</tt>: Use MongoDB.
121+
* <li><tt>ARANGO</tt>: Use ArangoDB.
122+
* </ul>
123+
* <li><tt>{@link kDSN}</tt>: The Data Source Name of the database server.
124+
* <li><tt>{@link kDB}</tt>: The database name.
125+
* <li><tt>{@link kSESSION_CACHE_ID}</tt>: The memcached persistent identifier.
126+
* <li><tt>{@link kSESSION_CACHE_HOST}</tt>: The memcached default host.
127+
* <li><tt>{@link kSESSION_CACHE_PORT}</tt>: The memcached default port.
128+
* </ul>
129+
*
130+
* The provided parameter is a boolean switch that, if <tt>true</tt>, will drop and
131+
* initialise the database, so be careful when overriding the default value.
132+
*
133+
* @param bool $doInitDatabase Initialise database.
134+
* @throws RuntimeException
135+
*/
136+
public function __construct( $doInitDatabase = FALSE )
137+
{
138+
//
139+
// Instantiate server.
140+
//
141+
switch( kENGINE )
142+
{
143+
case "MONGO":
144+
$server = new \Milko\PHPLib\MongoDB\Server( kDSN );
145+
break;
146+
147+
case "ARANGO":
148+
$server = new \Milko\PHPLib\ArangoDB\Server( kDSN );
149+
break;
150+
151+
default:
152+
throw new RuntimeException(
153+
"Invalid database engine [" . kENGINE . "]." ); // !@! ==>
154+
155+
} // Parsing engine.
156+
157+
//
158+
// Drop database.
159+
//
160+
if( $doInitDatabase )
161+
{
162+
//
163+
// Instantiate database.
164+
//
165+
$tmp = $server->NewDatabase( kDB );
166+
$tmp->Drop();
167+
168+
} // Initialise database.
169+
170+
//
171+
// Instantiate wrapper.
172+
//
173+
$this->mDatabase = $server->NewWrapper( kDB );
174+
175+
//
176+
// Cache data dictionary.
177+
//
178+
if( $doInitDatabase )
179+
$this->mDatabase->CacheDataDictionary();
180+
181+
} // Constructor.
182+
183+
184+
185+
/*=======================================================================================
186+
* *
187+
* PUBLIC DATA DICTIONARY INITIALISATION INTERFACE *
188+
* *
189+
*======================================================================================*/
190+
191+
192+
193+
/*===================================================================================
194+
* InitBaseDescriptors *
195+
*==================================================================================*/
196+
197+
/**
198+
* <h4>Initialise base descriptors.</h4>
199+
*
200+
* This method will load the base descriptors and load the descriptor match table.
201+
*/
202+
public function InitBaseDescriptors()
203+
{
204+
//
205+
// Init local storage.
206+
//
207+
$types = $this->mDatabase->NewTypesCollection();
208+
$terms = $this->mDatabase->NewTermsCollection();
209+
$descriptors = $this->mDatabase->NewDescriptorsCollection();
210+
211+
//
212+
// Instantiate namespace term.
213+
//
214+
$namespace = new \Milko\PHPLib\Term(
215+
$terms,
216+
[
217+
kTAG_LID => self::kDHS_NAMESPACE,
218+
kTAG_NAME => [ 'en' => 'Demographic and Health Surveys (DHS) Program' ],
219+
kTAG_DESCRIPTION => [ 'en' =>
220+
'This namespace groups all metadata regarding the USAID ' .
221+
'Demographic and Health Surveys.' ]
222+
]
223+
);
224+
$namespace->Store();
225+
226+
//
227+
// Load base descriptors.
228+
//
229+
$indicators =
230+
json_decode( file_get_contents( self::kDHS_URL_INDICATORS ), TRUE )
231+
[ 'Data' ];
232+
233+
//
234+
// Initialise match table.
235+
//
236+
$this->mMatchTable = [ 'Label' => kTAG_NAME, 'Definition' => kTAG_DESCRIPTION ];
237+
238+
//
239+
// Load match table.
240+
//
241+
$enumerated = [];
242+
foreach( $indicators as $indicator )
243+
{
244+
//
245+
// Skip default or unused descriptors.
246+
//
247+
if( in_array( $indicator[ 'fieldname' ], ['Label', 'Definition'] ) )
248+
continue; // =>
249+
250+
//
251+
// Init descriptor.
252+
//
253+
$descriptor = new \Milko\PHPLib\Descriptor( $descriptors );
254+
255+
//
256+
// Set identifiers.
257+
//
258+
$descriptor[ kTAG_NS ] = self::kDHS_NAMESPACE;
259+
$descriptor[ kTAG_LID ] = $indicator[ 'fieldname' ];
260+
$descriptor[ kTAG_SYMBOL ] = $indicator[ 'fieldname' ];
261+
262+
//
263+
// Set names.
264+
//
265+
$descriptor[ kTAG_NAME ] = [ 'en' => $indicator[ 'fieldname' ] ];
266+
$descriptor[ kTAG_DESCRIPTION ] =
267+
[ 'en' => str_replace( "\t", '', $indicator[ 'fieldDescription' ] ) ];
268+
269+
//
270+
// Set data types.
271+
//
272+
switch( $indicator[ 'fieldname' ] )
273+
{
274+
case 'IndicatorId':
275+
case 'IndicatorOldId':
276+
case 'Level1':
277+
case 'Level2':
278+
case 'Level3':
279+
case 'Denominator':
280+
case 'ShortName':
281+
case 'ByLabels':
282+
$descriptor[ kTAG_DATA_TYPE ] = kTYPE_STRING;
283+
$descriptor[ kTAG_DATA_KIND ] = [ kKIND_DISCRETE ];
284+
break;
285+
286+
case 'IndicatorOrder':
287+
case 'NumberScale':
288+
case 'QuickStatOrder':
289+
case 'SDRID':
290+
$descriptor[ kTAG_DATA_TYPE ] = kTYPE_INT;
291+
$descriptor[ kTAG_DATA_KIND ] = [ kKIND_DISCRETE ];
292+
break;
293+
294+
case 'MeasurementType':
295+
$descriptor[ kTAG_DATA_TYPE ] = kTYPE_STRING;
296+
$descriptor[ kTAG_DATA_KIND ] = [ kKIND_CATEGORICAL, kKIND_SUMMARY ];
297+
break;
298+
299+
case 'IndicatorType':
300+
$descriptor[ kTAG_DATA_TYPE ] = kTYPE_ENUM;
301+
$descriptor[ kTAG_DATA_KIND ] = [ kKIND_CATEGORICAL ];
302+
break;
303+
304+
case 'TagIds':
305+
$descriptor[ kTAG_DATA_TYPE ] = kTYPE_ENUM_SET;
306+
$descriptor[ kTAG_DATA_KIND ] = [ kKIND_CATEGORICAL ];
307+
break;
308+
309+
case 'IsQuickStat':
310+
$descriptor[ kTAG_DATA_TYPE ] = kTYPE_BOOLEAN;
311+
$descriptor[ kTAG_DATA_KIND ] = [ kKIND_DISCRETE ];
312+
break;
313+
}
314+
315+
//
316+
// Store descriptor.
317+
//
318+
$handle = $descriptor->Store();
319+
320+
//
321+
// Set match table entry.
322+
//
323+
if( ! array_key_exists( $indicator[ 'fieldname' ], $this->mMatchTable ) )
324+
$this->mMatchTable[ $indicator[ 'fieldname' ] ]
325+
= $descriptor[ $descriptors->KeyOffset() ];
326+
327+
//
328+
// Collect enumerated descriptor handles.
329+
//
330+
switch( $indicator[ 'fieldname' ] )
331+
{
332+
case 'IndicatorType':
333+
$enumerated[ 'IndicatorType' ] = $handle;
334+
break;
335+
}
336+
337+
} // Loading descriptors.
338+
339+
//
340+
// Instantiate indicator type type.
341+
//
342+
$enum_type = new \Milko\PHPLib\Term( $terms );
343+
$enum_type[ kTAG_NS ] = self::kDHS_NAMESPACE;
344+
$enum_type[ kTAG_LID ] = 'IndicatorType';
345+
$enum_type[ kTAG_NODE_KIND ] = [ kKIND_TYPE ];
346+
$enum_type[ kTAG_NAME ] = [ 'en' => "Indicator types" ];
347+
$dst = $enum_type->Store();
348+
349+
//
350+
// Set indicator type enumerations.
351+
//
352+
$enums = [
353+
'I' => 'Indicator',
354+
'D' => 'Weighted denominator',
355+
'U' => 'Unweighted denominator',
356+
'T' => 'Distribution total (100%)',
357+
'S' => 'Special answers (don\'t know/missing)',
358+
'E' => 'Sampling errors',
359+
'C' => 'Confidence interval'
360+
];
361+
foreach( $enums as $key => $name )
362+
{
363+
$term = new \Milko\PHPLib\Term( $terms );
364+
$term[ kTAG_NS ] = $enum_type[ $terms->KeyOffset() ];
365+
$term[ kTAG_LID ] = $key;
366+
$term[ kTAG_NAME ] = [ 'en' => $name ];
367+
$src = $term->Store();
368+
$pred =
369+
\Milko\PHPLib\Predicate::NewPredicate(
370+
$types, kPREDICATE_ENUM_OF, $src, $dst );
371+
$pred->Store();
372+
}
373+
374+
//
375+
// Link enumerations.
376+
//
377+
$pred =
378+
\Milko\PHPLib\Predicate::NewPredicate(
379+
$types, kPREDICATE_ENUM_OF, $dst, $enumerated[ 'IndicatorType' ] );
380+
$pred->Store();
381+
echo( "\n" );
382+
$x = $this->mDatabase->GetDescriptor( $this->mMatchTable[ 'IndicatorType' ] );
383+
print_r( $x );
384+
385+
} // InitBaseDescriptors.
386+
387+
388+
389+
} // class DHS.
390+
391+
392+
?>

0 commit comments

Comments
 (0)