Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(#373): merge-contacts action #647

Open
wants to merge 21 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ upload-docs.*.log.json
/.vscode/
/.idea/
/.settings/
/json_docs/
*.swp
coverage
.nyc_output
Expand Down
1 change: 1 addition & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"json-diff": "^1.0.6",
"json-stringify-safe": "^5.0.1",
"json2csv": "^4.5.4",
"lodash": "^4.17.21",
"mime-types": "^2.1.35",
"minimist": "^1.2.8",
"mkdirp": "^3.0.1",
Expand Down
68 changes: 68 additions & 0 deletions src/fn/merge-contacts.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
const minimist = require('minimist');
const path = require('path');

const environment = require('../lib/environment');
const pouch = require('../lib/db');
const { info } = require('../lib/log');

const HierarchyOperations = require('../lib/hierarchy-operations');

module.exports = {
requiresInstance: true,
execute: () => {
const args = parseExtraArgs(environment.pathToProject, environment.extraArgs);
const db = pouch();
const options = {
docDirectoryPath: args.docDirectoryPath,
force: args.force,
};
return HierarchyOperations(options, db).merge(args.sourceIds, args.destinationId);
}
};

// Parses extraArgs and asserts if required parameters are not present
const parseExtraArgs = (projectDir, extraArgs = []) => {
const args = minimist(extraArgs, { boolean: true });

const sourceIds = (args.remove || '')
.split(',')
.filter(Boolean);

if (!args.keep) {
usage();
throw Error(`Action "merge-contacts" is missing required contact ID ${bold('--keep')}. Other contacts will be merged into this contact.`);
}

if (sourceIds.length === 0) {
usage();
throw Error(`Action "merge-contacts" is missing required contact ID(s) ${bold('--remove')}. These contacts will be merged into the contact specified by ${bold('--keep')}`);
}

return {
destinationId: args.keep,
sourceIds,
docDirectoryPath: path.resolve(projectDir, args.docDirectoryPath || 'json_docs'),
force: !!args.force,
};
};

const bold = text => `\x1b[1m${text}\x1b[0m`;
const usage = () => {
info(`
${bold('cht-conf\'s merge-contacts action')}
When combined with 'upload-docs' this action merges multiple contacts and all their associated data into one.

${bold('USAGE')}
cht --local merge-contacts -- --keep=<keep_id> --remove=<remove_id1>,<remove_id2>
Copy link
Member Author

@kennsippell kennsippell Nov 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Struggling with the interface here. I've gone through:

  • sources/destination
  • remove/keep
  • losers/winner

Welcome suggestions that seem intuitive.


${bold('OPTIONS')}
--keep=<keep_id>
Specifies the ID of the contact that should have all other contact data merged into it.

--remove=<remove_id1>,<remove_id2>
A comma delimited list of IDs of contacts which will be deleted and all of their data will be merged into the keep contact.

--docDirectoryPath=<path to stage docs>
Specifies the folder used to store the documents representing the changes in hierarchy.
`);
};
249 changes: 13 additions & 236 deletions src/fn/move-contacts.js
Original file line number Diff line number Diff line change
@@ -1,105 +1,34 @@
const minimist = require('minimist');
const path = require('path');
const userPrompt = require('../lib/user-prompt');

const environment = require('../lib/environment');
const fs = require('../lib/sync-fs');
const lineageManipulation = require('../lib/lineage-manipulation');
const lineageConstraints = require('../lib/lineage-constraints');
const pouch = require('../lib/db');
const { warn, trace, info } = require('../lib/log');
const { info } = require('../lib/log');

const HIERARCHY_ROOT = 'root';
const BATCH_SIZE = 10000;
const HierarchyOperations = require('../lib/hierarchy-operations');

module.exports = {
requiresInstance: true,
execute: () => {
const args = parseExtraArgs(environment.pathToProject, environment.extraArgs);
const db = pouch();
prepareDocumentDirectory(args);
return updateLineagesAndStage(args, db);
const options = {
docDirectoryPath: args.docDirectoryPath,
force: args.force,
};
return HierarchyOperations(options, db).move(args.sourceIds, args.destinationId);
}
};

const prettyPrintDocument = doc => `'${doc.name}' (${doc._id})`;
const updateLineagesAndStage = async (options, db) => {
trace(`Fetching contact details for parent: ${options.parentId}`);
const parentDoc = await fetch.contact(db, options.parentId);

const constraints = await lineageConstraints(db, parentDoc);
const contactDocs = await fetch.contactList(db, options.contactIds);
await validateContacts(contactDocs, constraints);

let affectedContactCount = 0, affectedReportCount = 0;
const replacementLineage = lineageManipulation.createLineageFromDoc(parentDoc);
for (let contactId of options.contactIds) {
const contactDoc = contactDocs[contactId];
const descendantsAndSelf = await fetch.descendantsOf(db, contactId);

// Check that primary contact is not removed from areas where they are required
const invalidPrimaryContactDoc = await constraints.getPrimaryContactViolations(contactDoc, descendantsAndSelf);
if (invalidPrimaryContactDoc) {
throw Error(`Cannot remove contact ${prettyPrintDocument(invalidPrimaryContactDoc)} from the hierarchy for which they are a primary contact.`);
}

trace(`Considering lineage updates to ${descendantsAndSelf.length} descendant(s) of contact ${prettyPrintDocument(contactDoc)}.`);
const updatedDescendants = replaceLineageInContacts(descendantsAndSelf, replacementLineage, contactId);

const ancestors = await fetch.ancestorsOf(db, contactDoc);
trace(`Considering primary contact updates to ${ancestors.length} ancestor(s) of contact ${prettyPrintDocument(contactDoc)}.`);
const updatedAncestors = replaceLineageInAncestors(descendantsAndSelf, ancestors);

minifyLineageAndWriteToDisk([...updatedDescendants, ...updatedAncestors], options);

const movedReportsCount = await moveReports(db, descendantsAndSelf, options, replacementLineage, contactId);
trace(`${movedReportsCount} report(s) created by these affected contact(s) will be updated`);

affectedContactCount += updatedDescendants.length + updatedAncestors.length;
affectedReportCount += movedReportsCount;

info(`Staged updates to ${prettyPrintDocument(contactDoc)}. ${updatedDescendants.length} contact(s) and ${movedReportsCount} report(s).`);
}

info(`Staged changes to lineage information for ${affectedContactCount} contact(s) and ${affectedReportCount} report(s).`);
};

/*
Checks for any errors which this will create in the hierarchy (hierarchy schema, circular hierarchies)
Confirms the list of contacts are possible to move
*/
const validateContacts = async (contactDocs, constraints) => {
Object.values(contactDocs).forEach(doc => {
const hierarchyError = constraints.getHierarchyErrors(doc);
if (hierarchyError) {
throw Error(`Hierarchy Constraints: ${hierarchyError}`);
}
});

/*
It is nice that the tool can move lists of contacts as one operation, but strange things happen when two contactIds are in the same lineage.
For example, moving a district_hospital and moving a contact under that district_hospital to a new clinic causes multiple colliding writes to the same json file.
*/
const contactIds = Object.keys(contactDocs);
Object.values(contactDocs)
.forEach(doc => {
const parentIdsOfDoc = (doc.parent && lineageManipulation.pluckIdsFromLineage(doc.parent)) || [];
const violatingParentId = parentIdsOfDoc.find(parentId => contactIds.includes(parentId));
if (violatingParentId) {
throw Error(`Unable to move two documents from the same lineage: '${doc._id}' and '${violatingParentId}'`);
}
});
};

// Parses extraArgs and asserts if required parameters are not present
const parseExtraArgs = (projectDir, extraArgs = []) => {
const args = minimist(extraArgs, { boolean: true });

const contactIds = (args.contacts || args.contact || '')
const sourceIds = (args.contacts || args.contact || '')
.split(',')
.filter(id => id);

if (contactIds.length === 0) {
if (sourceIds.length === 0) {
usage();
throw Error('Action "move-contacts" is missing required list of contacts to be moved');
}
Expand All @@ -110,28 +39,15 @@ const parseExtraArgs = (projectDir, extraArgs = []) => {
}

return {
parentId: args.parent,
contactIds,
destinationId: args.parent,
sourceIds,
docDirectoryPath: path.resolve(projectDir, args.docDirectoryPath || 'json_docs'),
force: !!args.force,
};
};

const prepareDocumentDirectory = ({ docDirectoryPath, force }) => {
if (!fs.exists(docDirectoryPath)) {
fs.mkdir(docDirectoryPath);
} else if (!force && fs.recurseFiles(docDirectoryPath).length > 0) {
warn(`The document folder '${docDirectoryPath}' already contains files. It is recommended you start with a clean folder. Do you want to delete the contents of this folder and continue?`);
if(userPrompt.keyInYN()) {
fs.deleteFilesInFolder(docDirectoryPath);
} else {
throw new Error('User aborted execution.');
}
}
};

const bold = text => `\x1b[1m${text}\x1b[0m`;
const usage = () => {
const bold = text => `\x1b[1m${text}\x1b[0m`;
info(`
${bold('cht-conf\'s move-contacts action')}
When combined with 'upload-docs' this action effectively moves a contact from one place in the hierarchy to another.
Expand All @@ -144,148 +60,9 @@ ${bold('OPTIONS')}
A comma delimited list of ids of contacts to be moved.

--parent=<parent_id>
Specifies the ID of the new parent. Use '${HIERARCHY_ROOT}' to identify the top of the hierarchy (no parent).
Specifies the ID of the new parent. Use '${HierarchyOperations.HIERARCHY_ROOT}' to identify the top of the hierarchy (no parent).

--docDirectoryPath=<path to stage docs>
Specifies the folder used to store the documents representing the changes in hierarchy.
`);
};

const moveReports = async (db, descendantsAndSelf, writeOptions, replacementLineage, contactId) => {
const contactIds = descendantsAndSelf.map(contact => contact._id);

let skip = 0;
let reportDocsBatch;
do {
info(`Processing ${skip} to ${skip + BATCH_SIZE} report docs`);
reportDocsBatch = await fetch.reportsCreatedBy(db, contactIds, skip);

const updatedReports = replaceLineageInReports(reportDocsBatch, replacementLineage, contactId);
minifyLineageAndWriteToDisk(updatedReports, writeOptions);

skip += reportDocsBatch.length;
} while (reportDocsBatch.length >= BATCH_SIZE);

return skip;
};

const minifyLineageAndWriteToDisk = (docs, parsedArgs) => {
docs.forEach(doc => {
lineageManipulation.minifyLineagesInDoc(doc);
writeDocumentToDisk(parsedArgs, doc);
});
};

const writeDocumentToDisk = ({ docDirectoryPath }, doc) => {
const destinationPath = path.join(docDirectoryPath, `${doc._id}.doc.json`);
if (fs.exists(destinationPath)) {
warn(`File at ${destinationPath} already exists and is being overwritten.`);
}

trace(`Writing updated document to ${destinationPath}`);
fs.writeJson(destinationPath, doc);
};

const fetch = {
/*
Fetches all of the documents associated with the "contactIds" and confirms they exist.
*/
contactList: async (db, ids) => {
const contactDocs = await db.allDocs({
keys: ids,
include_docs: true,
});

const missingContactErrors = contactDocs.rows.filter(row => !row.doc).map(row => `Contact with id '${row.key}' could not be found.`);
if (missingContactErrors.length > 0) {
throw Error(missingContactErrors);
}

return contactDocs.rows.reduce((agg, curr) => Object.assign(agg, { [curr.doc._id]: curr.doc }), {});
},

contact: async (db, id) => {
try {
if (id === HIERARCHY_ROOT) {
return undefined;
}

return await db.get(id);
} catch (err) {
if (err.name !== 'not_found') {
throw err;
}

throw Error(`Contact with id '${id}' could not be found`);
}
},

/*
Given a contact's id, obtain the documents of all descendant contacts
*/
descendantsOf: async (db, contactId) => {
const descendantDocs = await db.query('medic/contacts_by_depth', {
key: [contactId],
include_docs: true,
});

return descendantDocs.rows
.map(row => row.doc)
/* We should not move or update tombstone documents */
.filter(doc => doc && doc.type !== 'tombstone');
},

reportsCreatedBy: async (db, contactIds, skip) => {
const reports = await db.query('medic-client/reports_by_freetext', {
keys: contactIds.map(id => [`contact:${id}`]),
include_docs: true,
limit: BATCH_SIZE,
skip: skip,
});

return reports.rows.map(row => row.doc);
},

ancestorsOf: async (db, contactDoc) => {
const ancestorIds = lineageManipulation.pluckIdsFromLineage(contactDoc.parent);
const ancestors = await db.allDocs({
keys: ancestorIds,
include_docs: true,
});

const ancestorIdsNotFound = ancestors.rows.filter(ancestor => !ancestor.doc).map(ancestor => ancestor.key);
if (ancestorIdsNotFound.length > 0) {
throw Error(`Contact '${prettyPrintDocument(contactDoc)} has parent id(s) '${ancestorIdsNotFound.join(',')}' which could not be found.`);
}

return ancestors.rows.map(ancestor => ancestor.doc);
},
};

const replaceLineageInReports = (reportsCreatedByDescendants, replaceWith, startingFromIdInLineage) => reportsCreatedByDescendants.reduce((agg, doc) => {
if (lineageManipulation.replaceLineage(doc, 'contact', replaceWith, startingFromIdInLineage)) {
agg.push(doc);
}
return agg;
}, []);

const replaceLineageInContacts = (descendantsAndSelf, replacementLineage, contactId) => descendantsAndSelf.reduce((agg, doc) => {
const startingFromIdInLineage = doc._id === contactId ? undefined : contactId;
const parentWasUpdated = lineageManipulation.replaceLineage(doc, 'parent', replacementLineage, startingFromIdInLineage);
const contactWasUpdated = lineageManipulation.replaceLineage(doc, 'contact', replacementLineage, contactId);
if (parentWasUpdated || contactWasUpdated) {
agg.push(doc);
}
return agg;
}, []);

const replaceLineageInAncestors = (descendantsAndSelf, ancestors) => ancestors.reduce((agg, ancestor) => {
let result = agg;
const primaryContact = descendantsAndSelf.find(descendant => ancestor.contact && descendant._id === ancestor.contact._id);
if (primaryContact) {
ancestor.contact = lineageManipulation.createLineageFromDoc(primaryContact);
result = [ancestor, ...result];
}

return result;
}, []);
Loading