Skip to content

Commit 1c26d47

Browse files
committed
fixed bug with list processing
1 parent cd2861a commit 1c26d47

File tree

4 files changed

+86
-57
lines changed

4 files changed

+86
-57
lines changed

EXAMPLE.ipynb

Lines changed: 40 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,13 @@
4747
" None.\r\n",
4848
"\r\n",
4949
"Options:\r\n",
50-
" -v, --verbose\r\n",
50+
" -v, --verbose [x>=0]\r\n",
5151
" -q, --quiet TEXT\r\n",
5252
" --help Show this message and exit.\r\n",
5353
"\r\n",
5454
"Commands:\r\n",
55-
" flatten Flatten a file to TSV/CSV Example: jfl flatten --input my.yaml...\r\n",
56-
" unflatten Unflatten a file from TSV/CSV Example: jfl unflatten --input...\r\n"
55+
" flatten Flatten a file to TSV/CSV\r\n",
56+
" unflatten Unflatten a file from TSV/CSV\r\n"
5757
]
5858
}
5959
],
@@ -97,18 +97,15 @@
9797
" -i, --input PATH Input file, e.g. a SSSOM tsv file. [required]\r\n",
9898
" -I, --input-format TEXT The string denoting the input format, e.g.\r\n",
9999
" tsv,csv,yaml,json\r\n",
100-
"\r\n",
101100
" -o, --output TEXT Output file, e.g. a SSSOM tsv file.\r\n",
102101
" -t, --output-format TEXT Desired output format, e.g. tsv,csv,yaml,json\r\n",
103102
" -L, --multivalued-keys TEXT List of keys that are multivalued\r\n",
104103
" -F, --flatten-keys TEXT List of keys that are to be flattened\r\n",
105104
" -s, --serializer TEXT Serializer to use for complex keys\r\n",
106105
" -S, --serialized-keys TEXT List of keys that are to be serialized using\r\n",
107106
" the serializer\r\n",
108-
"\r\n",
109107
" -C, --config-key TEXT Key configuration. Must be of form\r\n",
110108
" KEY={yaml,json,flat,multivalued}*\r\n",
111-
"\r\n",
112109
" -c, --load-config TEXT Path to global configuration file to be loaded\r\n",
113110
" -O, --save-config TEXT Path to global configuration file to be saved\r\n",
114111
" -k, --key TEXT Key in root object to be used.\r\n",
@@ -309,23 +306,23 @@
309306
},
310307
{
311308
"cell_type": "code",
312-
"execution_count": 6,
309+
"execution_count": 5,
313310
"id": "4332920e",
314311
"metadata": {},
315312
"outputs": [
316313
{
317314
"name": "stdout",
318315
"output_type": "stream",
319316
"text": [
320-
"genres\tid\tname\tcreator_from_country\tcreator_name\tbooks_summary\tbooks_name\tbooks_price\tbooks_id\tcreator_genres\r",
317+
"genres\tid\tname\tcreator_from_country\tcreator_name\tbooks_name\tbooks_price\tbooks_id\tbooks_summary\tcreator_genres\r",
321318
"\r\n",
322-
"[fantasy]\tS001\tLord of the Rings\tEngland\tJRR Tolkein\t[Hobbits|More hobbits|Yet more hobbits]\t[Fellowship of the Ring|The Two Towers|Return of the King]\t[5.99|5.99|6.99]\t[S001.1|S001.2|S001.3]\t\r",
319+
"[fantasy]\tS001\tLord of the Rings\tEngland\tJRR Tolkein\t[Fellowship of the Ring|The Two Towers|Return of the King]\t[5.99|5.99|6.99]\t[S001.1|S001.2|S001.3]\t[Hobbits|More hobbits|Yet more hobbits]\t\r",
323320
"\r\n",
324-
"[scifi]\tS002\tThe Culture Series\tScotland\tIan M Banks\t\t[Consider Phlebas|Player of Games]\t[5.99|5.99]\t[S002.1|S002.2]\t\r",
321+
"[scifi]\tS002\tThe Culture Series\tScotland\tIan M Banks\t[Consider Phlebas|Player of Games]\t[5.99|5.99]\t[S002.1|S002.2]\t\t\r",
325322
"\r\n",
326-
"[scifi|fantasy]\tS003\tBook of the New Sun\tUSA\tGene Wolfe\t\t[Shadow of the Torturer|Claw of the Conciliator]\t[|6.99]\t[S003.1|S003.2]\t[scifi|fantasy]\r",
323+
"[scifi|fantasy]\tS003\tBook of the New Sun\tUSA\tGene Wolfe\t[Shadow of the Torturer|Claw of the Conciliator]\t[|6.99]\t[S003.1|S003.2]\t\t[scifi|fantasy]\r",
327324
"\r\n",
328-
"\tS004\tExample with single book\tUSA\tMs Writer\t\t[Blah]\t\t[S004.1]\t[romance]\r",
325+
"\tS004\tExample with single book\tUSA\tMs Writer\t[Blah]\t\t[S004.1]\t\t[romance]\r",
329326
"\r\n",
330327
"\tS005\tExample with no books\tUSA\tMr Unproductive\t\t\t\t\t[romance|scifi|fantasy]\r",
331328
"\r\n"
@@ -348,7 +345,7 @@
348345
},
349346
{
350347
"cell_type": "code",
351-
"execution_count": 7,
348+
"execution_count": 6,
352349
"id": "8fe7ed12",
353350
"metadata": {},
354351
"outputs": [],
@@ -358,7 +355,7 @@
358355
},
359356
{
360357
"cell_type": "code",
361-
"execution_count": 8,
358+
"execution_count": 7,
362359
"id": "74984e70",
363360
"metadata": {},
364361
"outputs": [
@@ -388,10 +385,10 @@
388385
" <th>name</th>\n",
389386
" <th>creator_from_country</th>\n",
390387
" <th>creator_name</th>\n",
391-
" <th>books_summary</th>\n",
392388
" <th>books_name</th>\n",
393389
" <th>books_price</th>\n",
394390
" <th>books_id</th>\n",
391+
" <th>books_summary</th>\n",
395392
" <th>creator_genres</th>\n",
396393
" </tr>\n",
397394
" </thead>\n",
@@ -403,10 +400,10 @@
403400
" <td>Lord of the Rings</td>\n",
404401
" <td>England</td>\n",
405402
" <td>JRR Tolkein</td>\n",
406-
" <td>[Hobbits|More hobbits|Yet more hobbits]</td>\n",
407403
" <td>[Fellowship of the Ring|The Two Towers|Return ...</td>\n",
408404
" <td>[5.99|5.99|6.99]</td>\n",
409405
" <td>[S001.1|S001.2|S001.3]</td>\n",
406+
" <td>[Hobbits|More hobbits|Yet more hobbits]</td>\n",
410407
" <td>NaN</td>\n",
411408
" </tr>\n",
412409
" <tr>\n",
@@ -416,11 +413,11 @@
416413
" <td>The Culture Series</td>\n",
417414
" <td>Scotland</td>\n",
418415
" <td>Ian M Banks</td>\n",
419-
" <td>NaN</td>\n",
420416
" <td>[Consider Phlebas|Player of Games]</td>\n",
421417
" <td>[5.99|5.99]</td>\n",
422418
" <td>[S002.1|S002.2]</td>\n",
423419
" <td>NaN</td>\n",
420+
" <td>NaN</td>\n",
424421
" </tr>\n",
425422
" <tr>\n",
426423
" <th>2</th>\n",
@@ -429,10 +426,10 @@
429426
" <td>Book of the New Sun</td>\n",
430427
" <td>USA</td>\n",
431428
" <td>Gene Wolfe</td>\n",
432-
" <td>NaN</td>\n",
433429
" <td>[Shadow of the Torturer|Claw of the Conciliator]</td>\n",
434430
" <td>[|6.99]</td>\n",
435431
" <td>[S003.1|S003.2]</td>\n",
432+
" <td>NaN</td>\n",
436433
" <td>[scifi|fantasy]</td>\n",
437434
" </tr>\n",
438435
" <tr>\n",
@@ -442,10 +439,10 @@
442439
" <td>Example with single book</td>\n",
443440
" <td>USA</td>\n",
444441
" <td>Ms Writer</td>\n",
445-
" <td>NaN</td>\n",
446442
" <td>[Blah]</td>\n",
447443
" <td>NaN</td>\n",
448444
" <td>[S004.1]</td>\n",
445+
" <td>NaN</td>\n",
449446
" <td>[romance]</td>\n",
450447
" </tr>\n",
451448
" <tr>\n",
@@ -473,29 +470,29 @@
473470
"3 NaN S004 Example with single book USA \n",
474471
"4 NaN S005 Example with no books USA \n",
475472
"\n",
476-
" creator_name books_summary \\\n",
477-
"0 JRR Tolkein [Hobbits|More hobbits|Yet more hobbits] \n",
478-
"1 Ian M Banks NaN \n",
479-
"2 Gene Wolfe NaN \n",
480-
"3 Ms Writer NaN \n",
481-
"4 Mr Unproductive NaN \n",
473+
" creator_name books_name \\\n",
474+
"0 JRR Tolkein [Fellowship of the Ring|The Two Towers|Return ... \n",
475+
"1 Ian M Banks [Consider Phlebas|Player of Games] \n",
476+
"2 Gene Wolfe [Shadow of the Torturer|Claw of the Conciliator] \n",
477+
"3 Ms Writer [Blah] \n",
478+
"4 Mr Unproductive NaN \n",
482479
"\n",
483-
" books_name books_price \\\n",
484-
"0 [Fellowship of the Ring|The Two Towers|Return ... [5.99|5.99|6.99] \n",
485-
"1 [Consider Phlebas|Player of Games] [5.99|5.99] \n",
486-
"2 [Shadow of the Torturer|Claw of the Conciliator] [|6.99] \n",
487-
"3 [Blah] NaN \n",
488-
"4 NaN NaN \n",
480+
" books_price books_id \\\n",
481+
"0 [5.99|5.99|6.99] [S001.1|S001.2|S001.3] \n",
482+
"1 [5.99|5.99] [S002.1|S002.2] \n",
483+
"2 [|6.99] [S003.1|S003.2] \n",
484+
"3 NaN [S004.1] \n",
485+
"4 NaN NaN \n",
489486
"\n",
490-
" books_id creator_genres \n",
491-
"0 [S001.1|S001.2|S001.3] NaN \n",
492-
"1 [S002.1|S002.2] NaN \n",
493-
"2 [S003.1|S003.2] [scifi|fantasy] \n",
494-
"3 [S004.1] [romance] \n",
495-
"4 NaN [romance|scifi|fantasy] "
487+
" books_summary creator_genres \n",
488+
"0 [Hobbits|More hobbits|Yet more hobbits] NaN \n",
489+
"1 NaN NaN \n",
490+
"2 NaN [scifi|fantasy] \n",
491+
"3 NaN [romance] \n",
492+
"4 NaN [romance|scifi|fantasy] "
496493
]
497494
},
498-
"execution_count": 8,
495+
"execution_count": 7,
499496
"metadata": {},
500497
"output_type": "execute_result"
501498
}
@@ -520,7 +517,7 @@
520517
},
521518
{
522519
"cell_type": "code",
523-
"execution_count": 9,
520+
"execution_count": 8,
524521
"id": "3479e425",
525522
"metadata": {},
526523
"outputs": [
@@ -539,7 +536,7 @@
539536
},
540537
{
541538
"cell_type": "code",
542-
"execution_count": 10,
539+
"execution_count": 9,
543540
"id": "de1bc259",
544541
"metadata": {},
545542
"outputs": [
@@ -600,7 +597,7 @@
600597
},
601598
{
602599
"cell_type": "code",
603-
"execution_count": 11,
600+
"execution_count": 10,
604601
"id": "7abc7bfd",
605602
"metadata": {},
606603
"outputs": [
@@ -618,18 +615,15 @@
618615
" -i, --input PATH Input file, e.g. a SSSOM tsv file. [required]\r\n",
619616
" -I, --input-format TEXT The string denoting the input format, e.g.\r\n",
620617
" tsv,csv,yaml,json\r\n",
621-
"\r\n",
622618
" -o, --output TEXT Output file, e.g. a SSSOM tsv file.\r\n",
623619
" -t, --output-format TEXT Desired output format, e.g. tsv,csv,yaml,json\r\n",
624620
" -L, --multivalued-keys TEXT List of keys that are multivalued\r\n",
625621
" -F, --flatten-keys TEXT List of keys that are to be flattened\r\n",
626622
" -s, --serializer TEXT Serializer to use for complex keys\r\n",
627623
" -S, --serialized-keys TEXT List of keys that are to be serialized using\r\n",
628624
" the serializer\r\n",
629-
"\r\n",
630625
" -C, --config-key TEXT Key configuration. Must be of form\r\n",
631626
" KEY={yaml,json,flat,multivalued}*\r\n",
632-
"\r\n",
633627
" -c, --load-config TEXT Path to global configuration file to be loaded\r\n",
634628
" -k, --key TEXT Key in root object to be used.\r\n",
635629
" --help Show this message and exit.\r\n"
@@ -642,7 +636,7 @@
642636
},
643637
{
644638
"cell_type": "code",
645-
"execution_count": 12,
639+
"execution_count": 11,
646640
"id": "e0bd52be",
647641
"metadata": {},
648642
"outputs": [],
@@ -653,7 +647,7 @@
653647
},
654648
{
655649
"cell_type": "code",
656-
"execution_count": 13,
650+
"execution_count": 12,
657651
"id": "ced1c890",
658652
"metadata": {},
659653
"outputs": [

json_flattener/flattener.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,6 @@ def unflatten(objs: List[ROW], config: GlobalConfig = GlobalConfig()) -> List[OB
219219
if serializer == Serializer.yaml:
220220
nu_obj = yaml.safe_load(serialized_v)
221221
elif serializer == Serializer.json:
222-
print(f'Loading: {injected_field} in {obj}')
223222
nu_obj = json.loads(serialized_v)
224223
elif serializer == Serializer.pickle:
225224
nu_obj = pickle.loads(serialized_v)
@@ -338,6 +337,15 @@ def _getval(x: str) -> Optional[Any]:
338337
except ValueError:
339338
return x
340339

340+
# check which fields are serialized
341+
serialized_fields = set()
342+
gconfig = config.key_configs
343+
for field, kconfig in gconfig.items():
344+
serializers = kconfig.serializers
345+
for serializer in serializers:
346+
injected_field = _serialized_field_name(field, config.sep, serializer)
347+
serialized_fields.add(injected_field)
348+
341349
objs = []
342350
for row in r:
343351
nu_obj = {}
@@ -348,11 +356,11 @@ def _getval(x: str) -> Optional[Any]:
348356
is_direct_list = False
349357
if key_config is not None and key_config.is_list:
350358
is_direct_list = True
351-
if key_config.serializers is not None and len(key_config.serializers) > 0:
352-
is_direct_list = False
353359
if not is_direct_list:
354360
if lo != '' and lc != '' and v.startswith(lo) and v.endswith(lc):
355361
is_direct_list = True
362+
if k in serialized_fields:
363+
is_direct_list = False
356364
#if (lo != '' or lc != '') and v.startswith(lo) and v.endswith(lc) and not k.endswith('_json') and not k.endswith('_yaml'):
357365
if is_direct_list:
358366
if lo != '':

setup.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
REQUIRES_PYTHON = '>=3.7.0'
1010
LICENSE = 'BSD'
1111

12+
VERSION = '0.1.7'
1213
#version = pattern.search(text).group(1)
1314

1415
#with open("requirements.txt", "r") as FH:
@@ -20,6 +21,7 @@
2021
name=NAME,
2122
author=AUTHOR,
2223
author_email=EMAIL,
24+
version=VERSION,
2325
python_requires=REQUIRES_PYTHON,
2426
url=URL,
2527
description=DESCRIPTION,
@@ -42,6 +44,6 @@
4244
entry_points={
4345
'console_scripts': ['jfl=json_flattener.cli:main']
4446
},
45-
setup_requires=['pbr'],
46-
pbr=True,
47+
#setup_requires=['pbr'],
48+
#pbr=True,
4749
)

tests/test_flattener.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import json
44
import yaml
55
import io
6+
import logging
67
from json_flattener import flatten, unflatten, KeyConfig, GlobalConfig, Serializer, flatten_to_csv, unflatten_from_csv
78

89
ROOT = os.path.abspath(os.path.dirname(__file__))
@@ -14,21 +15,24 @@ def _json(obj) -> str:
1415

1516

1617
def _roundtrip_to_tsv(objs, config=None, **params):
18+
"""
19+
Convert json objects to TSV and convert back
20+
"""
1721
output = io.StringIO()
1822
flatten_to_csv(objs, output, config=config, **params)
19-
print(f'CONFIG:')
23+
#print(f'CONFIG:')
2024
config_dict = config.as_dict()
21-
print(_json(config_dict))
25+
#print(_json(config_dict))
2226
config2 = GlobalConfig.from_dict(**config_dict)
2327
print(f'C2 = {config2}')
2428
print('AS TSV')
2529
print(output.getvalue())
2630
inp = io.StringIO(output.getvalue())
2731
objs2 = unflatten_from_csv(inp, config=config, **params)
28-
print('BACK FROM TSV')
29-
print(_json(objs2))
30-
print('ORIG')
31-
print(_json(objs))
32+
logging.info('BACK FROM TSV')
33+
logging.info(_json(objs2))
34+
logging.info('ORIG')
35+
logging.info(_json(objs))
3236
assert objs == objs2
3337

3438
class FlattenerCase(unittest.TestCase):
@@ -237,6 +241,27 @@ def test_flattener(self):
237241
assert 'subject' not in obj
238242
assert 'object' not in obj
239243

244+
def test_lists(self):
245+
obj = {
246+
"id": "X1",
247+
"my_list": [
248+
{"x": "foo", "y": 2},
249+
{"x": "bar", "y": 3},
250+
]
251+
}
252+
key_config = {"my_list": KeyConfig(delete=True, flatten=True, is_list=True, serializers=[Serializer.json])}
253+
global_config = GlobalConfig(key_configs=key_config)
254+
_roundtrip_to_tsv([obj], global_config)
255+
key_config = {"my_list": KeyConfig(delete=True, flatten=False, is_list=True, serializers=[Serializer.json])}
256+
global_config = GlobalConfig(key_configs=key_config)
257+
_roundtrip_to_tsv([obj], global_config)
258+
key_config = {"my_list": KeyConfig(delete=False, flatten=True, is_list=True, serializers=[Serializer.json])}
259+
global_config = GlobalConfig(key_configs=key_config)
260+
_roundtrip_to_tsv([obj], global_config)
261+
key_config = {"my_list": KeyConfig(delete=False, flatten=False, is_list=True, serializers=[Serializer.json])}
262+
global_config = GlobalConfig(key_configs=key_config)
263+
_roundtrip_to_tsv([obj], global_config)
264+
240265
def test_nulls(self):
241266

242267
dict = {

0 commit comments

Comments
 (0)