Skip to content

Commit 8278834

Browse files
committed
initial commit
1 parent 8b57792 commit 8278834

9 files changed

+1528
-1
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
scripts
2-
=======
2+
=======
3+
Repo for various robertson library scripts.

drush/.DS_Store

6 KB
Binary file not shown.
Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
<?php
2+
3+
/*
4+
* This file should be put in the islandora modules plugins directory
5+
* typical usage: drush -u 1 islandora_update_book_rels path_to_query_file TRUE
6+
* the above would give you an interactive update for each book. If TRUE is changed to FALSE it will run against all
7+
* books returned from the query
8+
*
9+
* If this is being run from a multi site env. you will probably have to use the --uri switch
10+
* 'drush -u 1 --uri=http://localhost islandora_update_book_rels /path/to/query.txt TRUE'. You should also be in the subsites
11+
* sites directory or a subdirectory of it.
12+
*
13+
*/
14+
15+
//drush hook
16+
17+
function islandora_lowercase_hocr_drush_command() {
18+
$items = array();
19+
20+
$items['islandora_lowercase_hocr'] = array(
21+
'description' => "updates a books pages so the hocr datastream is all lower case",
22+
'arguments' => array(
23+
'query_file' => 'The path to a plain text file that contains an itql query. This query should return a list of books (pids)',
24+
'interactive' => 'if TRUE then you will be asked to confirm the update for each book object'
25+
),
26+
'examples' => array(
27+
'drush -u 1 --uri=http://localhost islandora_lowercase_hocr /path/to/query.txt TRUE',
28+
),
29+
'aliases' => array('islandorlowerhocr'),
30+
'bootstrap' => DRUSH_BOOTSTRAP_DRUPAL_LOGIN, // we can pass in users id on the command line using drush -u.
31+
);
32+
33+
return $items;
34+
}
35+
36+
/**
37+
* This is the drush command specified in the array create by the drush entry point.
38+
*
39+
* This function checks to make sure parameters are supplied and if everything is ok
40+
* calls the doAction function
41+
*
42+
* @param string $query_file
43+
* path to a text file that contains an itql query
44+
* @param boolean $interactive
45+
*
46+
*/
47+
function drush_islandora_lowercase_hocr($query_file, $interactive) {
48+
drush_print('Current working directory ' . getcwd());
49+
if (isset($query_file)) {
50+
drush_print(" using query file" . $query_file);
51+
}
52+
else {
53+
drush_print(" no query_file found");
54+
return;
55+
}
56+
islandora_lowercase_hocr_doAction($query_file, $interactive);
57+
}
58+
59+
/**
60+
* Iterates through all the books returned by the itql query and updates the rels stream for the pages of each book
61+
*
62+
* @param string $query_file
63+
* @param boolean $interactive
64+
*
65+
*/
66+
function islandora_lowercase_hocr_doAction($query_file, $interactive) {
67+
module_load_include('inc', 'fedora_repository', 'api/fedora_item');
68+
$query = file_get_contents($query_file);
69+
drush_print($query);
70+
if (!isset($query)) {
71+
drush_die('no query found');
72+
return;
73+
}
74+
$results = islandora_run_query($query);
75+
$objects = islandora_sparql_results_as_array($results);
76+
foreach ($objects as $object) {
77+
drush_print($object);
78+
}
79+
$num = count($objects);
80+
if (!drush_confirm(dt('are you sure you want to update all pages in @num of books?', array('@num' => $num)))) {
81+
// was: return drush_set_error('CORE_SYNC_ABORT', 'Aborting.');
82+
drush_die('Aborting.');
83+
}
84+
85+
if ($interactive == 'TRUE') {
86+
foreach ($objects as $object) {
87+
if (drush_confirm(dt('update all page hocr streams in this book @pid ?', array('@pid' => $object)))) {
88+
//$item = new Fedora_Item($object);
89+
islandora_update_books_hocr_streams($object);
90+
}
91+
}
92+
}
93+
else {
94+
foreach ($objects as $object) {
95+
//$item = new Fedora_Item($object);
96+
islandora_update_books_hocr_streams($object);
97+
}
98+
}
99+
}
100+
101+
/**
102+
* returns an array of pids
103+
* @todo pull this up into an api
104+
* @param SimpleXMLElement $content
105+
* @return array
106+
*/
107+
/**
108+
function islandora_sparql_results_as_array($content) {
109+
$content = new SimpleXMLElement($content);
110+
111+
$resultsarray = array();
112+
foreach ($content->results->result as $result) {
113+
$resultsarray[] = substr($result->object->attributes()->uri, 12); // Remove 'info:fedora/'.
114+
}
115+
return $resultsarray;
116+
} */
117+
118+
/**
119+
* update all the pages objects rels-ext of this book
120+
*
121+
* @param string $book_pid
122+
*
123+
*/
124+
function islandora_update_books_hocr_streams($book_pid) {
125+
module_load_include('inc', 'islandora_book', 'book_pack_utils');
126+
module_load_include('inc', 'fedora_repository', 'api/fedora_item');
127+
module_load_include('inc', 'islandora_fedora_api', 'islandora_fedora_api.raw');
128+
//$fapim = new FedoraAPIM(NULL);
129+
130+
$page_pids = islandora_update_book_rels_get_page_pids($book_pid);
131+
$page_num = 1;
132+
133+
//update all the page rels
134+
foreach ($page_pids as $pid) {
135+
/* try {
136+
$response = $fapim->addRelationship($pid, 'info:islandora/islandora-system:def/paged-item-info#sequenceNumber', $page_num++, $optional_args = array('isLiteral' => 'TRUE'));
137+
drush_print(dt('updated sequence number for @pid', array('@pid' => $pid)));
138+
} catch (Exception $e) {
139+
drush_print(dt('Error updating relationship isPageNumber for object %pid', array('%pid' => $pid)));
140+
}
141+
try {
142+
$response = $fapim->addRelationship($pid, 'info:islandora/islandora-system:def/paged-item-info#section', '1' , $optional_args = array('isLiteral' => 'TRUE'));
143+
drush_print(dt('updated section number for @pid', array('@pid' => $pid)));
144+
} catch (Exception $e) {
145+
drush_print(dt('Error updating relationship isPageNumber for object %pid', array('%pid' => $pid)));
146+
}
147+
try {
148+
$response = $fapim->addRelationship($pid, 'info:islandora/islandora-system:def/paged-item-info#isPageOf', $book_pid);
149+
drush_print(dt('updated ispageOf for @pid', array('@pid' => $pid)));
150+
} catch (Excepition $e) {
151+
drush_print(dt('Error updating relationship isPageOf for object %pid', array('%pid' => $pid)));
152+
} */
153+
154+
islandora_update_page_hocr_streams($book_pid, $pid, $page_num++);
155+
time_nanosleep(0, 200000000); //give the server some rest between calls
156+
}
157+
}
158+
159+
function islandora_update_page_hocr_streams($book_pid, $pid, $page_number) {
160+
$rdf_string = <<<RDF
161+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
162+
<rdf:Description rdf:about="info:fedora/$pid">
163+
<hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/islandora:pageCModel"></hasModel>
164+
<isSequenceNumber xmlns="http://islandora.ca/ontology/relsext#">$page_number</isSequenceNumber>
165+
<isMemberOf xmlns="info:fedora/fedora-system:def/relations-external#" rdf:resource="info:fedora/$book_pid"></isMemberOf>
166+
<isSection xmlns="http://islandora.ca/ontology/relsext#">1</isSection>
167+
<isPageOf xmlns="http://islandora.ca/ontology/relsext#" rdf:resource="inf:fedora/$book_pid"></isPageOf>
168+
<isPageNumber xmlns="http://islandora.ca/ontology/relsext#">$page_number</isPageNumber>
169+
</rdf:Description>
170+
</rdf:RDF>
171+
RDF;
172+
//drush_print($rdf_string);
173+
$item = new Fedora_Item($pid);
174+
drupal_get_path('module', 'islandora_bookviewer');
175+
try {
176+
$proc = new XsltProcessor();
177+
$hocr = $item->get_datastream_dissemination('ENCODED_OCR');
178+
} catch (Exception $e) {
179+
drush_print ('error loading xslt');
180+
exit();
181+
}
182+
$path = drupal_get_path ('module','islandora_bookviewer');
183+
$xsl = new DomDocument();
184+
$test = $xsl->load($path . '/xslt/hocrToLowerhocr.xsl');
185+
$error = FALSE;
186+
if(!$test ){
187+
drush_print ('Error loading style sheet when processing '.$pid);
188+
$error = TRUE;
189+
}
190+
if(empty($hocr)){
191+
drush_print('Error loading hocr data when processiong '.$pid);
192+
$error = TRUE;
193+
}
194+
if($error){
195+
$item->forget();
196+
return;
197+
}
198+
$input = new DomDocument();
199+
$input->loadXML($hocr);
200+
$xsl = $proc->importStylesheet($xsl);
201+
$newdom = $proc->transformToDoc($input);
202+
$content = $newdom->saveXML();
203+
$value = $item->modify_datastream($content, 'ENCODED_OCR', 'ENCODED_OCR', 'text/xml', $force = FALSE, $logMessage='Modified by Islandora API using islandora_lowercase_hocr drush script', TRUE);
204+
//$value = $item->modify_datastream_by_value($rdf_string, 'RELS-EXT', 'RELS-EXT', 'text/xml', $force = FALSE, $logMessage = 'Modified by Drush using Islandora API', $quiet = FALSE);
205+
if (isset($value)) {
206+
drush_print('successfully updated ENCODED_OCR for ' . $pid);
207+
}
208+
else {
209+
drush_print('Error updating ENCODED_OCR for ' . $pid);
210+
}
211+
$item->forget();
212+
}
213+
214+
/**
215+
*
216+
* Returns the pids of all pages associated with a book object
217+
* @param string $book_pid
218+
* @return string
219+
*/
220+
/*
221+
function islandora_update_book_rels_get_page_pids($book_pid) {
222+
//return get_sorted_pages($book_pid); //not sure why we have two different functions
223+
$query = <<<XML
224+
select \$object \$title from <#ri>
225+
where (\$object <dc:title> \$title
226+
and \$object <fedora-rels-ext:isMemberOf> <info:fedora/$book_pid>
227+
and \$object <fedora-model:state> <info:fedora/fedora-system:def/model#Active>)
228+
order by \$title
229+
XML;
230+
231+
232+
module_load_include('inc', 'fedora_repository', 'CollectionClass');
233+
$collection = new CollectionClass($book_pid);
234+
$contents = $collection->getRelatedItems($book_pid, $query);
235+
$results_xml = simplexml_load_string($contents);
236+
$resultsarray = array();
237+
$book_pids = array();
238+
foreach ($results_xml->results->result as $result) {
239+
$book_pids[] = str_replace('info:fedora/', '', $result->object->attributes()->uri);
240+
}
241+
return $book_pids;
242+
} */
243+
244+
/**
245+
* @todo pull this function up into an api and remove from here
246+
* executes a supplied itql query
247+
* @param string $query
248+
* @return type
249+
*/
250+
/*
251+
function islandora_run_query($query) {
252+
module_load_include('inc', 'fedora_repository', 'CollectionClass');
253+
$cc = new CollectionClass();
254+
$results = $cc->getRelatedItems(NULL, $query, 1000000);
255+
if (isset($results)) {//still doesn't tell us if these are valid results
256+
return $results;
257+
}
258+
else {
259+
drush_print('Error get related items, relationships cannot be updated');
260+
}
261+
}*/
262+
263+
?>

0 commit comments

Comments
 (0)