Skip to content

Commit 45e478a

Browse files
committed
BLD: pandas imports. lot of stuff still broken though
1 parent 7b4492d commit 45e478a

26 files changed

+434
-380
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
clean:
22
-rm -rf build dist
33

4-
tseries: pandas/src/tseries.pyx
4+
tseries: pandas/lib.pyx pandas/tslib.pyx pandas/hashtable.pyx
55
python setup.py build_ext --inplace
66

77
sparse: pandas/src/sparse.pyx

pandas/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77
import numpy as np
88

99
try:
10-
import pandas.lib as lib
10+
import hashtable
11+
import tslib
12+
import lib
1113
except Exception: # pragma: no cover
1214
import sys
1315
e = sys.exc_info()[1] # Py25 and Py3 current exception syntax conflict

pandas/src/stats.pyx renamed to pandas/algos.pyx

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from numpy cimport *
22
cimport numpy as np
3+
import numpy as np
34

45
cimport cython
56

@@ -1707,6 +1708,83 @@ def roll_generic(ndarray[float64_t, cast=True] input, int win,
17071708
#----------------------------------------------------------------------
17081709
# group operations
17091710

1711+
@cython.boundscheck(False)
1712+
def groupby_indices(ndarray values):
1713+
cdef:
1714+
Py_ssize_t i, n = len(values)
1715+
ndarray[int64_t] labels, counts, arr, seen
1716+
int64_t loc
1717+
dict ids = {}
1718+
object val
1719+
int64_t k
1720+
1721+
ids, labels, counts = group_labels(values)
1722+
seen = np.zeros_like(counts)
1723+
1724+
# try not to get in trouble here...
1725+
cdef int64_t **vecs = <int64_t **> malloc(len(ids) * sizeof(int64_t*))
1726+
result = {}
1727+
for i from 0 <= i < len(counts):
1728+
arr = np.empty(counts[i], dtype=np.int64)
1729+
result[ids[i]] = arr
1730+
vecs[i] = <int64_t *> arr.data
1731+
1732+
for i from 0 <= i < n:
1733+
k = labels[i]
1734+
1735+
# was NaN
1736+
if k == -1:
1737+
continue
1738+
1739+
loc = seen[k]
1740+
vecs[k][loc] = i
1741+
seen[k] = loc + 1
1742+
1743+
free(vecs)
1744+
1745+
return result
1746+
1747+
@cython.wraparound(False)
1748+
@cython.boundscheck(False)
1749+
def group_labels(ndarray[object] values):
1750+
'''
1751+
Compute label vector from input values and associated useful data
1752+
1753+
Returns
1754+
-------
1755+
'''
1756+
cdef:
1757+
Py_ssize_t i, n = len(values)
1758+
ndarray[int64_t] labels = np.empty(n, dtype=np.int64)
1759+
ndarray[int64_t] counts = np.empty(n, dtype=np.int64)
1760+
dict ids = {}, reverse = {}
1761+
int64_t idx
1762+
object val
1763+
int64_t count = 0
1764+
1765+
for i from 0 <= i < n:
1766+
val = values[i]
1767+
1768+
# is NaN
1769+
if val != val:
1770+
labels[i] = -1
1771+
continue
1772+
1773+
# for large number of groups, not doing try: except: makes a big
1774+
# difference
1775+
if val in ids:
1776+
idx = ids[val]
1777+
labels[i] = idx
1778+
counts[idx] = counts[idx] + 1
1779+
else:
1780+
ids[val] = count
1781+
reverse[count] = val
1782+
labels[i] = count
1783+
counts[count] = 1
1784+
count += 1
1785+
1786+
return reverse, labels, counts[:count].copy()
1787+
17101788

17111789
@cython.boundscheck(False)
17121790
@cython.wraparound(False)
@@ -2943,3 +3021,4 @@ def group_var_bin(ndarray[float64_t, ndim=2] out,
29433021
(ct * ct - ct))
29443022

29453023
include "join.pyx"
3024+
include "generated.pyx"

pandas/core/algorithms.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
import numpy as np
77

88
import pandas.core.common as com
9-
import pandas.lib as lib
10-
import pandas._algos as _algos
9+
import pandas.algos as algos
10+
import pandas.hashtable as htable
1111

1212

1313
def match(to_match, values, na_sentinel=-1):
@@ -70,11 +70,11 @@ def _hashtable_algo(f, dtype):
7070
f(HashTable, type_caster) -> result
7171
"""
7272
if com.is_float_dtype(dtype):
73-
return f(lib.Float64HashTable, com._ensure_float64)
73+
return f(htable.Float64HashTable, com._ensure_float64)
7474
elif com.is_integer_dtype(dtype):
75-
return f(lib.Int64HashTable, com._ensure_int64)
75+
return f(htable.Int64HashTable, com._ensure_int64)
7676
else:
77-
return f(lib.PyObjectHashTable, com._ensure_object)
77+
return f(htable.PyObjectHashTable, com._ensure_object)
7878

7979

8080
def _count_generic(values, table_type, type_caster):
@@ -167,7 +167,7 @@ def value_counts(values, sort=True, ascending=False):
167167

168168
if com.is_integer_dtype(values.dtype):
169169
values = com._ensure_int64(values)
170-
keys, counts = lib.value_count_int64(values)
170+
keys, counts = htable.value_count_int64(values)
171171
result = Series(counts, index=keys)
172172
else:
173173
counter = defaultdict(lambda: 0)
@@ -271,7 +271,7 @@ def _get_score(at):
271271
return _get_score(q)
272272
else:
273273
q = np.asarray(q, np.float64)
274-
return _algos.arrmap_float64(q, _get_score)
274+
return algos.arrmap_float64(q, _get_score)
275275

276276

277277
def _interpolate(a, b, fraction):
@@ -313,19 +313,19 @@ def group_position(*args):
313313

314314

315315
_rank1d_functions = {
316-
'float64': lib.rank_1d_float64,
317-
'int64': lib.rank_1d_int64,
318-
'generic': lib.rank_1d_generic
316+
'float64': algos.rank_1d_float64,
317+
'int64': algos.rank_1d_int64,
318+
'generic': algos.rank_1d_generic
319319
}
320320

321321
_rank2d_functions = {
322-
'float64': lib.rank_2d_float64,
323-
'int64': lib.rank_2d_int64,
324-
'generic': lib.rank_2d_generic
322+
'float64': algos.rank_2d_float64,
323+
'int64': algos.rank_2d_int64,
324+
'generic': algos.rank_2d_generic
325325
}
326326

327327
_hashtables = {
328-
'float64': (lib.Float64HashTable, lib.Float64Vector),
329-
'int64': (lib.Int64HashTable, lib.Int64Vector),
330-
'generic': (lib.PyObjectHashTable, lib.ObjectVector)
328+
'float64': (htable.Float64HashTable, htable.Float64Vector),
329+
'int64': (htable.Int64HashTable, htable.Int64Vector),
330+
'generic': (htable.PyObjectHashTable, htable.ObjectVector)
331331
}

pandas/core/common.py

Lines changed: 53 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@
1111
from numpy.lib.format import read_array, write_array
1212
import numpy as np
1313

14-
import pandas._algos as _algos
14+
import pandas.algos as algos
1515
import pandas.lib as lib
16+
import pandas.tslib as tslib
17+
1618
from pandas.util import py3compat
1719
import codecs
1820
import csv
@@ -84,7 +86,7 @@ def _isnull_ndarraylike(obj):
8486
result = Series(result, index=obj.index, copy=False)
8587
elif values.dtype == np.dtype('M8[ns]'):
8688
# this is the NaT pattern
87-
result = values.view('i8') == lib.iNaT
89+
result = values.view('i8') == tslib.iNaT
8890
elif issubclass(values.dtype.type, np.timedelta64):
8991
result = -np.isfinite(values.view('i8'))
9092
else:
@@ -168,43 +170,43 @@ def wrapper(arr, indexer, out, fill_value=np.nan):
168170

169171

170172
_take1d_dict = {
171-
'float64': _algos.take_1d_float64,
172-
'int32': _algos.take_1d_int32,
173-
'int64': _algos.take_1d_int64,
174-
'object': _algos.take_1d_object,
175-
'bool': _view_wrapper(_algos.take_1d_bool, np.uint8),
176-
'datetime64[ns]': _view_wrapper(_algos.take_1d_int64, np.int64,
177-
na_override=lib.iNaT),
173+
'float64': algos.take_1d_float64,
174+
'int32': algos.take_1d_int32,
175+
'int64': algos.take_1d_int64,
176+
'object': algos.take_1d_object,
177+
'bool': _view_wrapper(algos.take_1d_bool, np.uint8),
178+
'datetime64[ns]': _view_wrapper(algos.take_1d_int64, np.int64,
179+
na_override=tslib.iNaT),
178180
}
179181

180182
_take2d_axis0_dict = {
181-
'float64': _algos.take_2d_axis0_float64,
182-
'int32': _algos.take_2d_axis0_int32,
183-
'int64': _algos.take_2d_axis0_int64,
184-
'object': _algos.take_2d_axis0_object,
185-
'bool': _view_wrapper(_algos.take_2d_axis0_bool, np.uint8),
186-
'datetime64[ns]': _view_wrapper(_algos.take_2d_axis0_int64, np.int64,
187-
na_override=lib.iNaT),
183+
'float64': algos.take_2d_axis0_float64,
184+
'int32': algos.take_2d_axis0_int32,
185+
'int64': algos.take_2d_axis0_int64,
186+
'object': algos.take_2d_axis0_object,
187+
'bool': _view_wrapper(algos.take_2d_axis0_bool, np.uint8),
188+
'datetime64[ns]': _view_wrapper(algos.take_2d_axis0_int64, np.int64,
189+
na_override=tslib.iNaT),
188190
}
189191

190192
_take2d_axis1_dict = {
191-
'float64': _algos.take_2d_axis1_float64,
192-
'int32': _algos.take_2d_axis1_int32,
193-
'int64': _algos.take_2d_axis1_int64,
194-
'object': _algos.take_2d_axis1_object,
195-
'bool': _view_wrapper(_algos.take_2d_axis1_bool, np.uint8),
196-
'datetime64[ns]': _view_wrapper(_algos.take_2d_axis1_int64, np.int64,
197-
na_override=lib.iNaT),
193+
'float64': algos.take_2d_axis1_float64,
194+
'int32': algos.take_2d_axis1_int32,
195+
'int64': algos.take_2d_axis1_int64,
196+
'object': algos.take_2d_axis1_object,
197+
'bool': _view_wrapper(algos.take_2d_axis1_bool, np.uint8),
198+
'datetime64[ns]': _view_wrapper(algos.take_2d_axis1_int64, np.int64,
199+
na_override=tslib.iNaT),
198200
}
199201

200202
_take2d_multi_dict = {
201-
'float64': _algos.take_2d_multi_float64,
202-
'int32': _algos.take_2d_multi_int32,
203-
'int64': _algos.take_2d_multi_int64,
204-
'object': _algos.take_2d_multi_object,
205-
'bool': _view_wrapper(_algos.take_2d_multi_bool, np.uint8),
206-
'datetime64[ns]': _view_wrapper(_algos.take_2d_multi_int64, np.int64,
207-
na_override=lib.iNaT),
203+
'float64': algos.take_2d_multi_float64,
204+
'int32': algos.take_2d_multi_int32,
205+
'int64': algos.take_2d_multi_int64,
206+
'object': algos.take_2d_multi_object,
207+
'bool': _view_wrapper(algos.take_2d_multi_bool, np.uint8),
208+
'datetime64[ns]': _view_wrapper(algos.take_2d_multi_int64, np.int64,
209+
na_override=tslib.iNaT),
208210
}
209211

210212

@@ -369,9 +371,9 @@ def mask_out_axis(arr, mask, axis, fill_value=np.nan):
369371
arr[tuple(indexer)] = fill_value
370372

371373
_diff_special = {
372-
'float64': lib.diff_2d_float64,
373-
'int64': lib.diff_2d_int64,
374-
'int32': lib.diff_2d_int32
374+
'float64': algos.diff_2d_float64,
375+
'int64': algos.diff_2d_int64,
376+
'int32': algos.diff_2d_int32
375377
}
376378

377379
def diff(arr, n, axis=0):
@@ -452,21 +454,21 @@ def wrapper(arr, mask, limit=None):
452454
f(view, mask, limit=limit)
453455
return wrapper
454456

455-
_pad_1d_datetime = _interp_wrapper(_algos.pad_inplace_int64, np.int64)
456-
_pad_2d_datetime = _interp_wrapper(_algos.pad_2d_inplace_int64, np.int64)
457-
_backfill_1d_datetime = _interp_wrapper(_algos.backfill_inplace_int64,
457+
_pad_1d_datetime = _interp_wrapper(algos.pad_inplace_int64, np.int64)
458+
_pad_2d_datetime = _interp_wrapper(algos.pad_2d_inplace_int64, np.int64)
459+
_backfill_1d_datetime = _interp_wrapper(algos.backfill_inplace_int64,
458460
np.int64)
459-
_backfill_2d_datetime = _interp_wrapper(_algos.backfill_2d_inplace_int64,
461+
_backfill_2d_datetime = _interp_wrapper(algos.backfill_2d_inplace_int64,
460462
np.int64)
461463

462464

463465
def pad_1d(values, limit=None, mask=None):
464466
if is_float_dtype(values):
465-
_method = _algos.pad_inplace_float64
467+
_method = algos.pad_inplace_float64
466468
elif is_datetime64_dtype(values):
467469
_method = _pad_1d_datetime
468470
elif values.dtype == np.object_:
469-
_method = _algos.pad_inplace_object
471+
_method = algos.pad_inplace_object
470472
else: # pragma: no cover
471473
raise ValueError('Invalid dtype for padding')
472474

@@ -478,11 +480,11 @@ def pad_1d(values, limit=None, mask=None):
478480

479481
def backfill_1d(values, limit=None, mask=None):
480482
if is_float_dtype(values):
481-
_method = _algos.backfill_inplace_float64
483+
_method = algos.backfill_inplace_float64
482484
elif is_datetime64_dtype(values):
483485
_method = _backfill_1d_datetime
484486
elif values.dtype == np.object_:
485-
_method = _algos.backfill_inplace_object
487+
_method = algos.backfill_inplace_object
486488
else: # pragma: no cover
487489
raise ValueError('Invalid dtype for padding')
488490

@@ -495,11 +497,11 @@ def backfill_1d(values, limit=None, mask=None):
495497

496498
def pad_2d(values, limit=None, mask=None):
497499
if is_float_dtype(values):
498-
_method = _algos.pad_2d_inplace_float64
500+
_method = algos.pad_2d_inplace_float64
499501
elif is_datetime64_dtype(values):
500502
_method = _pad_2d_datetime
501503
elif values.dtype == np.object_:
502-
_method = _algos.pad_2d_inplace_object
504+
_method = algos.pad_2d_inplace_object
503505
else: # pragma: no cover
504506
raise ValueError('Invalid dtype for padding')
505507

@@ -516,11 +518,11 @@ def pad_2d(values, limit=None, mask=None):
516518

517519
def backfill_2d(values, limit=None, mask=None):
518520
if is_float_dtype(values):
519-
_method = _algos.backfill_2d_inplace_float64
521+
_method = algos.backfill_2d_inplace_float64
520522
elif is_datetime64_dtype(values):
521523
_method = _backfill_2d_datetime
522524
elif values.dtype == np.object_:
523-
_method = _algos.backfill_2d_inplace_object
525+
_method = algos.backfill_2d_inplace_object
524526
else: # pragma: no cover
525527
raise ValueError('Invalid dtype for padding')
526528

@@ -903,11 +905,11 @@ def _is_sequence(x):
903905
except Exception:
904906
return False
905907

906-
_ensure_float64 = _algos.ensure_float64
907-
_ensure_int64 = _algos.ensure_int64
908-
_ensure_int32 = _algos.ensure_int32
909-
_ensure_platform_int = _algos.ensure_platform_int
910-
_ensure_object = _algos.ensure_object
908+
_ensure_float64 = algos.ensure_float64
909+
_ensure_int64 = algos.ensure_int64
910+
_ensure_int32 = algos.ensure_int32
911+
_ensure_platform_int = algos.ensure_platform_int
912+
_ensure_object = algos.ensure_object
911913

912914

913915
def _astype_nansafe(arr, dtype):
@@ -916,7 +918,7 @@ def _astype_nansafe(arr, dtype):
916918

917919
if issubclass(arr.dtype.type, np.datetime64):
918920
if dtype == object:
919-
return lib.ints_to_pydatetime(arr.view(np.int64))
921+
return tslib.ints_to_pydatetime(arr.view(np.int64))
920922
elif (np.issubdtype(arr.dtype, np.floating) and
921923
np.issubdtype(dtype, np.integer)):
922924

0 commit comments

Comments
 (0)