From 23021888ece66a92dfd1ee7106046eb3f9ab0c29 Mon Sep 17 00:00:00 2001 From: David Bruant Date: Thu, 8 Apr 2021 01:55:04 +0200 Subject: [PATCH] Improv algo (#48) * suppression du badge de build status de Travis CI * Rewrite tests to be more demanding * Fix test and bahevior when there is no CodNatJurBenefCA to anonymize the LibOrgaBenef * bump xmldom * Change occultation string to something more generic Also, make it a argument to the anonymize function with a default value * PJRef > NomPJ elements are anonymized * Occultation of all Champ_Editeurs * anonymise all s in all s * 1.2.0 --- README.md | 2 +- index.js | 36 +++++++++++---- package-lock.json | 8 ++-- package.json | 4 +- test/index.js | 111 +++++++++++++++++++++++++++++++++++++++------- 5 files changed, 130 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 62da052..e9a1717 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# anonymisation-document-budgetaire [![Build Status](https://travis-ci.org/dtc-innovation/anonymisation-document-budgetaire.svg?branch=master)](https://travis-ci.org/dtc-innovation/anonymisation-document-budgetaire) +# anonymisation-document-budgetaire Outil d'anonymisation de fichiers DocumentBudgetaire diff --git a/index.js b/index.js index 4452507..c57a489 100644 --- a/index.js +++ b/index.js @@ -1,27 +1,47 @@ -const ANONYMIZED_NAME = "Nom anonymisé"; - // http://odm-budgetaire.org/doc-schema/CommunAnnexe_xsd_Complex_Type_ATCodNatJurBenef.html#ATCodNatJurBenef_V const CodNatJurBenefPersonnesPhysiques = 'P3'; -export default function(doc){ +export default function(doc, OCCULTATION_STRING = "Occultation Article L311-7 CRPA"){ const concours = Array.from(doc.getElementsByTagName('CONCOURS')); - concours.forEach(c => { const natJurEl = c.getElementsByTagName('CodNatJurBenefCA')[0]; - if(natJurEl && natJurEl.getAttribute('V') === CodNatJurBenefPersonnesPhysiques){ + if(!natJurEl || natJurEl.getAttribute('V') === CodNatJurBenefPersonnesPhysiques){ const libOrgaBenef = c.getElementsByTagName('LibOrgaBenef')[0]; - libOrgaBenef.setAttribute('V', ANONYMIZED_NAME); + libOrgaBenef.setAttribute('V', OCCULTATION_STRING); } }) const prets = Array.from(doc.getElementsByTagName('PRET')) - prets.forEach(c => { const nomBenefPret = c.getElementsByTagName('NomBenefPret')[0]; if(nomBenefPret){ - nomBenefPret.setAttribute('V', ANONYMIZED_NAME); + nomBenefPret.setAttribute('V', OCCULTATION_STRING); + } + }) + + const BlocBudget = doc.getElementsByTagName('BlocBudget')[0]; + const PJRefs = Array.from(BlocBudget.getElementsByTagName('PJRef')); + PJRefs.forEach(pjref => { + const NomPJ = pjref.getElementsByTagName('NomPJ')[0]; + + if(NomPJ){ + NomPJ.setAttribute('V', OCCULTATION_STRING); + } + }) + + const Champ_Editeurs = Array.from(doc.getElementsByTagName('Champ_Editeur')); + Champ_Editeurs.forEach(ce => { + ce.setAttribute('V', OCCULTATION_STRING); + }) + + const MEMBREASAs = Array.from(doc.getElementsByTagName('MEMBREASA')) + MEMBREASAs.forEach(masa => { + const Proprietaire = masa.getElementsByTagName('Proprietaire')[0]; + + if(Proprietaire){ + Proprietaire.setAttribute('V', OCCULTATION_STRING); } }) } \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 4a27a0a..5809486 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "anon-doc-budg", - "version": "1.1.1", + "version": "1.2.0", "lockfileVersion": 1, "requires": true, "dependencies": { @@ -5115,9 +5115,9 @@ } }, "xmldom": { - "version": "0.1.27", - "resolved": "https://registry.npmjs.org/xmldom/-/xmldom-0.1.27.tgz", - "integrity": "sha1-1QH5ezvbQDr4757MIFcxh6rawOk=" + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/xmldom/-/xmldom-0.5.0.tgz", + "integrity": "sha512-Foaj5FXVzgn7xFzsKeNIde9g6aFBxTPi37iwsno8QvApmtg7KYrr+OPyRHcJF7dud2a5nGRBXK3n0dL62Gf7PA==" }, "xtend": { "version": "4.0.1", diff --git a/package.json b/package.json index c96c39b..68826fd 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "anon-doc-budg", - "version": "1.1.1", + "version": "1.2.0", "description": "Outil d'anonymisation de fichiers DocumentBudgetaire", "type": "module", "engines": { @@ -23,7 +23,7 @@ "fs-extra": "^7.0.1", "p-limit": "^3.1.0", "xml-buffer-tostring": "^0.2.0", - "xmldom": "^0.1.27" + "xmldom": "^0.5.0" }, "devDependencies": { "babel-core": "^6.26.3", diff --git a/test/index.js b/test/index.js index 057ae23..a276147 100644 --- a/test/index.js +++ b/test/index.js @@ -4,8 +4,17 @@ import {DOMParser, XMLSerializer} from 'xmldom'; import anonymize from '../index.js'; +const defaultBlocBudget = ` + + + + + + +` -function makeDocBudg(annexes){ + +function makeDocBudg(annexes = '', blocBudget = defaultBlocBudget){ const xmlStr = ` @@ -23,14 +32,7 @@ function makeDocBudg(annexes){ - - - - - - - - + ${blocBudget} @@ -76,7 +78,7 @@ describe('anonymize', () => { anonymize(doc); - expect( doc.getElementsByTagName('LibOrgaBenef')[0].getAttribute('V') ).to.not.equal(NAME); + expect( (new XMLSerializer()).serializeToString(doc) ).to.not.include(NAME); }) it('should anonymize the document if there are several physical person names', () => { @@ -105,10 +107,10 @@ describe('anonymize', () => { const libOrgaBenefs = doc.getElementsByTagName('LibOrgaBenef'); expect( libOrgaBenefs[0].getAttribute('V') ).to.equal( libOrgaBenefs[1].getAttribute('V') ); - expect( libOrgaBenefs[0].getAttribute('V') ).to.not.equal( NAME_1 ); + expect( (new XMLSerializer()).serializeToString(doc) ).to.not.include(NAME_1); }) - it('should not do anything if there is no in the ', () => { + it('should anonymize if there is no in the ', () => { const NAME = "Asso dtc"; const annexes = ` @@ -124,7 +126,7 @@ describe('anonymize', () => { anonymize(doc); - expect( doc.getElementsByTagName('LibOrgaBenef')[0].getAttribute('V') ).to.equal(NAME); + expect( (new XMLSerializer()).serializeToString(doc) ).to.not.include(NAME); }) it('should not do anything if there are subs, but no physical person', () => { @@ -187,7 +189,7 @@ describe('anonymize', () => { }) - it(`should anonimize all s in all s`, () => { + it(`should anonymize all s in all s`, () => { const NAME = 'David Bruant'; const annexes = ` @@ -207,9 +209,86 @@ describe('anonymize', () => { anonymize(doc); - const nomBenefPret = doc.getElementsByTagName('NomBenefPret')[0].getAttribute('V') + expect( (new XMLSerializer()).serializeToString(doc) ).to.not.include(NAME); + }) + + + it('should occult all Budget > BlocBudget > PJRef > NomPJ[V]', () => { + const PJ_NAME_1 = "Yo.pdf"; + const PJ_NAME_2 = "Document joint"; + + const blocBudget = ` + + + + + + + + + + + + + ` + + const doc = makeDocBudg(undefined, blocBudget); + + anonymize(doc); + + expect( (new XMLSerializer()).serializeToString(doc) ).to.not.include(PJ_NAME_1); + expect( (new XMLSerializer()).serializeToString(doc) ).to.not.include(PJ_NAME_2); + }) + + + it('should occult all Champ_Editeur[V]', () => { + const CHAMP_EDITEUR_1 = "srbdtyndu,yu"; + const CHAMP_EDITEUR_2 = "16151651681653"; + const CHAMP_EDITEUR_3 = "ù$*ù*ù$"; + + const annexes = ` + + + + + + + + + + + + + + + + `; + + const doc = makeDocBudg(annexes); + + anonymize(doc); + + expect( (new XMLSerializer()).serializeToString(doc) ).to.not.include(CHAMP_EDITEUR_1); + expect( (new XMLSerializer()).serializeToString(doc) ).to.not.include(CHAMP_EDITEUR_2); + expect( (new XMLSerializer()).serializeToString(doc) ).to.not.include(CHAMP_EDITEUR_3); + }) + + it(`should anonymize all s in all s`, () => { + const NAME = 'David Bruant'; + + const annexes = ` + + + + + + `; + + const doc = makeDocBudg(annexes); + + anonymize(doc); - expect( nomBenefPret ).to.not.equal(NAME); + expect( (new XMLSerializer()).serializeToString(doc) ).to.not.include(NAME); }) });