-
Notifications
You must be signed in to change notification settings - Fork 6
/
nep-0001-npy-format.html
839 lines (650 loc) · 40 KB
/
nep-0001-npy-format.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
<!DOCTYPE html>
<html lang="en" data-content_root="./" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<title>NEP 1 — A simple file format for NumPy arrays — NumPy Enhancement Proposals</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
</script>
<!--
this give us a css class that will be invisible only if js is disabled
-->
<noscript>
<style>
.pst-js-only { display: none !important; }
</style>
</noscript>
<!-- Loaded before other Sphinx assets -->
<link href="_static/styles/theme.css?digest=26a4bc78f4c0ddb94549" rel="stylesheet" />
<link href="_static/styles/pydata-sphinx-theme.css?digest=26a4bc78f4c0ddb94549" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=fa44fd50" />
<!-- So that users can add custom icons -->
<script src="_static/scripts/fontawesome.js?digest=26a4bc78f4c0ddb94549"></script>
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=26a4bc78f4c0ddb94549" />
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=26a4bc78f4c0ddb94549" />
<script src="_static/documentation_options.js?v=7f41d439"></script>
<script src="_static/doctools.js?v=888ff710"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'nep-0001-npy-format';</script>
<link rel="icon" href="_static/favicon.ico"/>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="NEP 5 — Generalized universal functions" href="nep-0005-generalized-ufuncs.html" />
<link rel="prev" title="Finished NEPs" href="finished.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="" />
<meta name="docbuild:last-update" content="Nov 19, 2024"/>
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
<dialog id="pst-search-dialog">
<form class="bd-search d-flex align-items-center"
action="search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form>
</dialog>
<div class="pst-async-banner-revealer d-none">
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
</div>
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
<div class="bd-header__inner bd-page-width">
<button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation">
<span class="fa-solid fa-bars"></span>
</button>
<div class="col-lg-3 navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="content.html">
<img src="_static/numpylogo.svg" class="logo__image only-light" alt="NumPy Enhancement Proposals - Home"/>
<img src="_static/numpylogo.svg" class="logo__image only-dark pst-js-only" alt="NumPy Enhancement Proposals - Home"/>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item current active">
<a class="nav-link nav-internal" href="index.html">
Index
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-internal" href="scope.html">
The Scope of NumPy
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-internal" href="roadmap.html">
Current roadmap
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-external" href="https://github.com/numpy/numpy/issues?q=is%3Aopen+is%3Aissue+label%3A%2223+-+Wish+List%22">
Wish list
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-external" href="https://github.com/numpy/numpy/issues?q=is%3Aopen+is%3Aissue+label%3A%2223+-+Wish+List%22">
Wishlist
</a>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
</div>
<div class="navbar-item">
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i>
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i>
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i>
</button></div>
<div class="navbar-item"><ul class="navbar-icon-links"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/numpy/numpy" title="GitHub" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i>
<span class="sr-only">GitHub</span></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
</div>
<button class="pst-navbar-icon sidebar-toggle secondary-toggle" aria-label="On this page">
<span class="fa-solid fa-outdent"></span>
</button>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<dialog id="pst-primary-sidebar-modal"></dialog>
<div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item current active">
<a class="nav-link nav-internal" href="index.html">
Index
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-internal" href="scope.html">
The Scope of NumPy
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-internal" href="roadmap.html">
Current roadmap
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-external" href="https://github.com/numpy/numpy/issues?q=is%3Aopen+is%3Aissue+label%3A%2223+-+Wish+List%22">
Wish list
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-external" href="https://github.com/numpy/numpy/issues?q=is%3Aopen+is%3Aissue+label%3A%2223+-+Wish+List%22">
Wishlist
</a>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item">
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i>
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i>
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i>
</button></div>
<div class="navbar-item"><ul class="navbar-icon-links"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/numpy/numpy" title="GitHub" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i>
<span class="sr-only">GitHub</span></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
aria-label="Section Navigation">
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
<div class="bd-toc-item navbar-nav"><ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="scope.html">The Scope of NumPy</a></li>
<li class="toctree-l1"><a class="reference internal" href="roadmap.html">Current roadmap</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/numpy/numpy/issues?q=is%3Aopen+is%3Aissue+label%3A%2223+-+Wish+List%22">Wish list</a></li>
</ul>
<ul class="current nav bd-sidenav">
<li class="toctree-l1 has-children"><a class="reference internal" href="meta.html">Meta-NEPs (NEPs about NEPs or active Processes)</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="nep-0000.html">NEP 0 — Purpose and process</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0023-backwards-compatibility.html">NEP 23 — Backwards compatibility and deprecation policy</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0036-fair-play.html">NEP 36 — Fair play</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0045-c_style_guide.html">NEP 45 — C style guide</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0046-sponsorship-guidelines.html">NEP 46 — NumPy sponsorship guidelines</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0048-spending-project-funds.html">NEP 48 — Spending NumPy project funds</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-template.html">NEP X — Template and instructions</a></li>
</ul>
</details></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="provisional.html">Provisional NEPs (provisionally accepted; interface may change)</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="simple">
</ul>
</details></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="accepted.html">Accepted NEPs (implementation in progress)</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="nep-0041-improved-dtype-support.html">NEP 41 — First step towards a new datatype system</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0042-new-dtypes.html">NEP 42 — New and extensible DTypes</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0044-restructuring-numpy-docs.html">NEP 44 — Restructuring the NumPy documentation</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0051-scalar-representation.html">NEP 51 — Changing the representation of NumPy scalars</a></li>
</ul>
</details></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="open.html">Open NEPs (under consideration)</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="nep-0043-extensible-ufuncs.html">NEP 43 — Enhancing the extensibility of UFuncs</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0053-c-abi-evolution.html">NEP 53 — Evolving the NumPy C-API for NumPy 2.0</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0054-simd-cpp-highway.html">NEP 54 — SIMD infrastructure evolution: adopting Google Highway when moving to C++?</a></li>
</ul>
</details></li>
<li class="toctree-l1 current active has-children"><a class="reference internal" href="finished.html">Finished NEPs</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
<li class="toctree-l2 current active"><a class="current reference internal" href="#">NEP 1 — A simple file format for NumPy arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0005-generalized-ufuncs.html">NEP 5 — Generalized universal functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0007-datetime-proposal.html">NEP 7 — A proposal for implementing some date/time types in NumPy</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0010-new-iterator-ufunc.html">NEP 10 — Optimizing iterator/UFunc performance</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0013-ufunc-overrides.html">NEP 13 — A mechanism for overriding Ufuncs</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0014-dropping-python2.7-proposal.html">NEP 14 — Plan for dropping Python 2.7 support</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0015-merge-multiarray-umath.html">NEP 15 — Merging multiarray and umath</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0018-array-function-protocol.html">NEP 18 — A dispatch mechanism for NumPy's high level array functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0019-rng-policy.html">NEP 19 — Random number generator policy</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0020-gufunc-signature-enhancement.html">NEP 20 — Expansion of generalized universal function signatures</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0022-ndarray-duck-typing-overview.html">NEP 22 — Duck typing for NumPy arrays – high level overview</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0027-zero-rank-arrarys.html">NEP 27 — Zero rank arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0028-website-redesign.html">NEP 28 — numpy.org website redesign</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0029-deprecation_policy.html">NEP 29 — Recommend Python and NumPy version support as a community policy standard</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0032-remove-financial-functions.html">NEP 32 — Remove the financial functions from NumPy</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0034-infer-dtype-is-object.html">NEP 34 — Disallow inferring ``dtype=object`` from sequences</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0035-array-creation-dispatch-with-array-function.html">NEP 35 — Array creation dispatching with __array_function__</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0038-SIMD-optimizations.html">NEP 38 — Using SIMD optimization instructions for performance</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0040-legacy-datatype-impl.html">NEP 40 — Legacy datatype implementation in NumPy</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0049.html">NEP 49 — Data allocation strategies</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0050-scalar-promotion.html">NEP 50 — Promotion rules for Python scalars</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0052-python-api-cleanup.html">NEP 52 — Python API cleanup for NumPy 2.0</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0055-string_dtype.html">NEP 55 — Add a UTF-8 variable-width string DType to NumPy</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0056-array-api-main-namespace.html">NEP 56 — Array API standard support in NumPy's main namespace</a></li>
</ul>
</details></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="deferred.html">Deferred and Superseded NEPs</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="nep-0002-warnfix.html">NEP 2 — A proposal to build numpy without warning with a big set of warning flags</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0003-math_config_clean.html">NEP 3 — Cleaning the math configuration of numpy.core</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0004-datetime-proposal3.html">NEP 4 — A (third) proposal for implementing some date/time types in NumPy</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0006-newbugtracker.html">NEP 6 — Replacing Trac with a different bug tracker</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0008-groupby_additions.html">NEP 8 — A proposal for adding groupby functionality to NumPy</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0009-structured_array_extensions.html">NEP 9 — Structured array extensions</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0011-deferred-ufunc-evaluation.html">NEP 11 — Deferred UFunc evaluation</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0012-missing-data.html">NEP 12 — Missing data functionality in NumPy</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0021-advanced-indexing.html">NEP 21 — Simplified and explicit advanced indexing</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0024-missing-data-2.html">NEP 24 — Missing data functionality - alternative 1 to NEP 12</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0025-missing-data-3.html">NEP 25 — NA support via special dtypes</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0026-missing-data-summary.html">NEP 26 — Summary of missing data NEPs and discussion</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0030-duck-array-protocol.html">NEP 30 — Duck typing for NumPy arrays - implementation</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0031-uarray.html">NEP 31 — Context-local and global overrides of the NumPy API</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0037-array-module.html">NEP 37 — A dispatch protocol for NumPy-like modules</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0047-array-api-standard.html">NEP 47 — Adopting the array API standard</a></li>
</ul>
</details></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="rejected.html">Rejected and Withdrawn NEPs</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="nep-0016-abstract-array.html">NEP 16 — An abstract base class for identifying "duck arrays"</a></li>
<li class="toctree-l2"><a class="reference internal" href="nep-0017-split-out-maskedarray.html">NEP 17 — Split out masked arrays</a></li>
</ul>
</details></li>
</ul>
</div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main" role="main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article d-print-none">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb" class="d-print-none">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="content.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="index.html" class="nav-link">Roadmap & NumPy enhancement proposals</a></li>
<li class="breadcrumb-item"><a href="finished.html" class="nav-link">Finished NEPs</a></li>
<li class="breadcrumb-item active" aria-current="page"><span class="ellipsis">NEP 1 — A simple file format for NumPy arrays</span></li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<section id="nep-1-a-simple-file-format-for-numpy-arrays">
<span id="nep01"></span><h1>NEP 1 — A simple file format for NumPy arrays<a class="headerlink" href="#nep-1-a-simple-file-format-for-numpy-arrays" title="Link to this heading">#</a></h1>
<dl class="field-list simple">
<dt class="field-odd">Author<span class="colon">:</span></dt>
<dd class="field-odd"><p>Robert Kern <<a class="reference external" href="mailto:robert.kern%40gmail.com">robert<span>.</span>kern<span>@</span>gmail<span>.</span>com</a>></p>
</dd>
<dt class="field-even">Status<span class="colon">:</span></dt>
<dd class="field-even"><p>Final</p>
</dd>
<dt class="field-odd">Created<span class="colon">:</span></dt>
<dd class="field-odd"><p>20-Dec-2007</p>
</dd>
</dl>
<section id="abstract">
<h2>Abstract<a class="headerlink" href="#abstract" title="Link to this heading">#</a></h2>
<p>We propose a standard binary file format (NPY) for persisting
a single arbitrary NumPy array on disk. The format stores all of
the shape and dtype information necessary to reconstruct the array
correctly even on another machine with a different architecture.
The format is designed to be as simple as possible while achieving
its limited goals. The implementation is intended to be pure
Python and distributed as part of the main numpy package.</p>
</section>
<section id="rationale">
<h2>Rationale<a class="headerlink" href="#rationale" title="Link to this heading">#</a></h2>
<p>A lightweight, omnipresent system for saving NumPy arrays to disk
is a frequent need. Python in general has pickle [1] for saving
most Python objects to disk. This often works well enough with
NumPy arrays for many purposes, but it has a few drawbacks:</p>
<ul class="simple">
<li><p>Dumping or loading a pickle file require the duplication of the
data in memory. For large arrays, this can be a showstopper.</p></li>
<li><p>The array data is not directly accessible through
memory-mapping. Now that numpy has that capability, it has
proved very useful for loading large amounts of data (or more to
the point: avoiding loading large amounts of data when you only
need a small part).</p></li>
</ul>
<p>Both of these problems can be addressed by dumping the raw bytes
to disk using ndarray.tofile() and numpy.fromfile(). However,
these have their own problems:</p>
<ul class="simple">
<li><p>The data which is written has no information about the shape or
dtype of the array.</p></li>
<li><p>It is incapable of handling object arrays.</p></li>
</ul>
<p>The NPY file format is an evolutionary advance over these two
approaches. Its design is mostly limited to solving the problems
with pickles and tofile()/fromfile(). It does not intend to solve
more complicated problems for which more complicated formats like
HDF5 [2] are a better solution.</p>
</section>
<section id="use-cases">
<h2>Use cases<a class="headerlink" href="#use-cases" title="Link to this heading">#</a></h2>
<ul class="simple">
<li><p>Neville Newbie has just started to pick up Python and NumPy. He
has not installed many packages, yet, nor learned the standard
library, but he has been playing with NumPy at the interactive
prompt to do small tasks. He gets a result that he wants to
save.</p></li>
<li><p>Annie Analyst has been using large nested record arrays to
represent her statistical data. She wants to convince her
R-using colleague, David Doubter, that Python and NumPy are
awesome by sending him her analysis code and data. She needs
the data to load at interactive speeds. Since David does not
use Python usually, needing to install large packages would turn
him off.</p></li>
<li><p>Simon Seismologist is developing new seismic processing tools.
One of his algorithms requires large amounts of intermediate
data to be written to disk. The data does not really fit into
the industry-standard SEG-Y schema, but he already has a nice
record-array dtype for using it internally.</p></li>
<li><p>Polly Parallel wants to split up a computation on her multicore
machine as simply as possible. Parts of the computation can be
split up among different processes without any communication
between processes; they just need to fill in the appropriate
portion of a large array with their results. Having several
child processes memory-mapping a common array is a good way to
achieve this.</p></li>
</ul>
</section>
<section id="requirements">
<h2>Requirements<a class="headerlink" href="#requirements" title="Link to this heading">#</a></h2>
<p>The format MUST be able to:</p>
<ul class="simple">
<li><p>Represent all NumPy arrays including nested record
arrays and object arrays.</p></li>
<li><p>Represent the data in its native binary form.</p></li>
<li><p>Be contained in a single file.</p></li>
<li><p>Support Fortran-contiguous arrays directly.</p></li>
<li><p>Store all of the necessary information to reconstruct the array
including shape and dtype on a machine of a different
architecture. Both little-endian and big-endian arrays must be
supported and a file with little-endian numbers will yield
a little-endian array on any machine reading the file. The
types must be described in terms of their actual sizes. For
example, if a machine with a 64-bit C “long int” writes out an
array with “long ints”, a reading machine with 32-bit C “long
ints” will yield an array with 64-bit integers.</p></li>
<li><p>Be reverse engineered. Datasets often live longer than the
programs that created them. A competent developer should be
able to create a solution in his preferred programming language to
read most NPY files that he has been given without much
documentation.</p></li>
<li><p>Allow memory-mapping of the data.</p></li>
<li><p>Be read from a filelike stream object instead of an actual file.
This allows the implementation to be tested easily and makes the
system more flexible. NPY files can be stored in ZIP files and
easily read from a ZipFile object.</p></li>
<li><p>Store object arrays. Since general Python objects are
complicated and can only be reliably serialized by pickle (if at
all), many of the other requirements are waived for files
containing object arrays. Files with object arrays do not have
to be mmapable since that would be technically impossible. We
cannot expect the pickle format to be reverse engineered without
knowledge of pickle. However, one should at least be able to
read and write object arrays with the same generic interface as
other arrays.</p></li>
<li><p>Be read and written using APIs provided in the numpy package
itself without any other libraries. The implementation inside
numpy may be in C if necessary.</p></li>
</ul>
<p>The format explicitly <em>does not</em> need to:</p>
<ul class="simple">
<li><p>Support multiple arrays in a file. Since we require filelike
objects to be supported, one could use the API to build an ad
hoc format that supported multiple arrays. However, solving the
general problem and use cases is beyond the scope of the format
and the API for numpy.</p></li>
<li><p>Fully handle arbitrary subclasses of numpy.ndarray. Subclasses
will be accepted for writing, but only the array data will be
written out. A regular numpy.ndarray object will be created
upon reading the file. The API can be used to build a format
for a particular subclass, but that is out of scope for the
general NPY format.</p></li>
</ul>
</section>
<section id="format-specification-version-1-0">
<h2>Format specification: version 1.0<a class="headerlink" href="#format-specification-version-1-0" title="Link to this heading">#</a></h2>
<p>The first 6 bytes are a magic string: exactly “x93NUMPY”.</p>
<p>The next 1 byte is an unsigned byte: the major version number of
the file format, e.g. x01.</p>
<p>The next 1 byte is an unsigned byte: the minor version number of
the file format, e.g. x00. Note: the version of the file format
is not tied to the version of the numpy package.</p>
<p>The next 2 bytes form a little-endian unsigned short int: the
length of the header data HEADER_LEN.</p>
<p>The next HEADER_LEN bytes form the header data describing the
array’s format. It is an ASCII string which contains a Python
literal expression of a dictionary. It is terminated by a newline
(’n’) and padded with spaces (’x20’) to make the total length of
the magic string + 4 + HEADER_LEN be evenly divisible by 16 for
alignment purposes.</p>
<p>The dictionary contains three keys:</p>
<blockquote>
<div><dl class="simple">
<dt>“descr”<span class="classifier">dtype.descr</span></dt><dd><p>An object that can be passed as an argument to the
numpy.dtype() constructor to create the array’s dtype.</p>
</dd>
<dt>“fortran_order”<span class="classifier">bool</span></dt><dd><p>Whether the array data is Fortran-contiguous or not.
Since Fortran-contiguous arrays are a common form of
non-C-contiguity, we allow them to be written directly to
disk for efficiency.</p>
</dd>
<dt>“shape”<span class="classifier">tuple of int</span></dt><dd><p>The shape of the array.</p>
</dd>
</dl>
</div></blockquote>
<p>For repeatability and readability, this dictionary is formatted
using pprint.pformat() so the keys are in alphabetic order.</p>
<p>Following the header comes the array data. If the dtype contains
Python objects (i.e. dtype.hasobject is True), then the data is
a Python pickle of the array. Otherwise the data is the
contiguous (either C- or Fortran-, depending on fortran_order)
bytes of the array. Consumers can figure out the number of bytes
by multiplying the number of elements given by the shape (noting
that shape=() means there is 1 element) by dtype.itemsize.</p>
</section>
<section id="format-specification-version-2-0">
<h2>Format specification: version 2.0<a class="headerlink" href="#format-specification-version-2-0" title="Link to this heading">#</a></h2>
<p>The version 1.0 format only allowed the array header to have a
total size of 65535 bytes. This can be exceeded by structured
arrays with a large number of columns. The version 2.0 format
extends the header size to 4 GiB. <cite>numpy.save</cite> will automatically
save in 2.0 format if the data requires it, else it will always use
the more compatible 1.0 format.</p>
<p>The description of the fourth element of the header therefore has
become:</p>
<blockquote>
<div><p>The next 4 bytes form a little-endian unsigned int: the length
of the header data HEADER_LEN.</p>
</div></blockquote>
</section>
<section id="conventions">
<h2>Conventions<a class="headerlink" href="#conventions" title="Link to this heading">#</a></h2>
<p>We recommend using the “.npy” extension for files following this
format. This is by no means a requirement; applications may wish
to use this file format but use an extension specific to the
application. In the absence of an obvious alternative, however,
we suggest using “.npy”.</p>
<p>For a simple way to combine multiple arrays into a single file,
one can use ZipFile to contain multiple “.npy” files. We
recommend using the file extension “.npz” for these archives.</p>
</section>
<section id="alternatives">
<h2>Alternatives<a class="headerlink" href="#alternatives" title="Link to this heading">#</a></h2>
<p>The author believes that this system (or one along these lines) is
about the simplest system that satisfies all of the requirements.
However, one must always be wary of introducing a new binary
format to the world.</p>
<p>HDF5 [2] is a very flexible format that should be able to
represent all of NumPy’s arrays in some fashion. It is probably
the only widely-used format that can faithfully represent all of
NumPy’s array features. It has seen substantial adoption by the
scientific community in general and the NumPy community in
particular. It is an excellent solution for a wide variety of
array storage problems with or without NumPy.</p>
<p>HDF5 is a complicated format that more or less implements
a hierarchical filesystem-in-a-file. This fact makes satisfying
some of the Requirements difficult. To the author’s knowledge, as
of this writing, there is no application or library that reads or
writes even a subset of HDF5 files that does not use the canonical
libhdf5 implementation. This implementation is a large library
that is not always easy to build. It would be infeasible to
include it in numpy.</p>
<p>It might be feasible to target an extremely limited subset of
HDF5. Namely, there would be only one object in it: the array.
Using contiguous storage for the data, one should be able to
implement just enough of the format to provide the same metadata
that the proposed format does. One could still meet all of the
technical requirements like mmapability.</p>
<p>We would accrue a substantial benefit by being able to generate
files that could be read by other HDF5 software. Furthermore, by
providing the first non-libhdf5 implementation of HDF5, we would
be able to encourage more adoption of simple HDF5 in applications
where it was previously infeasible because of the size of the
library. The basic work may encourage similar dead-simple
implementations in other languages and further expand the
community.</p>
<p>The remaining concern is about reverse engineerability of the
format. Even the simple subset of HDF5 would be very difficult to
reverse engineer given just a file by itself. However, given the
prominence of HDF5, this might not be a substantial concern.</p>
<p>In conclusion, we are going forward with the design laid out in
this document. If someone writes code to handle the simple subset
of HDF5 that would be useful to us, we may consider a revision of
the file format.</p>
</section>
<section id="implementation">
<h2>Implementation<a class="headerlink" href="#implementation" title="Link to this heading">#</a></h2>
<p>The version 1.0 implementation was first included in the 1.0.5 release of
numpy, and remains available. The version 2.0 implementation was first
included in the 1.9.0 release of numpy.</p>
<p>Specifically, the file format.py in this directory implements the
format as described here.</p>
<blockquote>
<div><p><a class="github reference external" href="https://github.com/numpy/numpy/blob/main/numpy/lib/format.py">numpy/numpy</a></p>
</div></blockquote>
</section>
<section id="references">
<h2>References<a class="headerlink" href="#references" title="Link to this heading">#</a></h2>
<p>[1] <a class="reference external" href="https://docs.python.org/library/pickle.html">https://docs.python.org/library/pickle.html</a></p>
<p>[2] <a class="reference external" href="https://support.hdfgroup.org/HDF5/">https://support.hdfgroup.org/HDF5/</a></p>
</section>
<section id="copyright">
<h2>Copyright<a class="headerlink" href="#copyright" title="Link to this heading">#</a></h2>
<p>This document has been placed in the public domain.</p>
</section>
</section>
</article>
</div>
<dialog id="pst-secondary-sidebar-modal"></dialog>
<div id="pst-secondary-sidebar" class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div
id="pst-page-navigation-heading-2"
class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#rationale">Rationale</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#use-cases">Use cases</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#requirements">Requirements</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#format-specification-version-1-0">Format specification: version 1.0</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#format-specification-version-2-0">Format specification: version 2.0</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conventions">Conventions</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#alternatives">Alternatives</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation">Implementation</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#references">References</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#copyright">Copyright</a></li>
</ul>
</nav></div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="_static/scripts/bootstrap.js?digest=26a4bc78f4c0ddb94549"></script>
<script defer src="_static/scripts/pydata-sphinx-theme.js?digest=26a4bc78f4c0ddb94549"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright 2017-2024, NumPy Developers.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 7.2.6.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.16.0.
</p></div>
</div>
</div>
</footer>
</body>
</html>