@@ -2000,15 +2000,13 @@ def sasdata2dataframe(self, table: str, libref: str = '', dsopts: dict = None,
2000
2000
return df
2001
2001
2002
2002
def sd2pq (self , parquet_file_path : str , table : str , libref : str = '' , dsopts : dict = None ,
2003
- pa_parquet_kwargs = {"compression" : 'snappy' ,
2004
- "flavor" :"spark" ,
2005
- "write_statistics" :False },
2006
- pa_pandas_kwargs = {},
2007
- partitioned = False ,
2003
+ pa_parquet_kwargs = None ,
2004
+ pa_pandas_kwargs = None ,
2005
+ partitioned = False ,
2008
2006
partition_size_mb = 128 ,
2009
- chunk_size_mb = 4 ,
2010
- coerce_timestamp_errors = True ,
2011
- static_columns :list = None ,
2007
+ chunk_size_mb = 4 ,
2008
+ coerce_timestamp_errors = True ,
2009
+ static_columns :list = None ,
2012
2010
rowsep : str = '\x01 ' , colsep : str = '\x02 ' ,
2013
2011
rowrep : str = ' ' , colrep : str = ' ' ,
2014
2012
** kwargs ) -> None :
@@ -2055,13 +2053,19 @@ def sd2pq(self, parquet_file_path: str, table: str, libref: str ='', dsopts: dic
2055
2053
2056
2054
:return: None
2057
2055
"""
2058
- dsopts = dsopts if dsopts is not None else {}
2056
+ dsopts = dsopts if dsopts is not None else {}
2057
+ parquet_kwargs = pa_parquet_kwargs if pa_parquet_kwargs is not None else {"compression" : 'snappy' ,
2058
+ "flavor" :"spark" ,
2059
+ "write_statistics" :False
2060
+ }
2061
+ pandas_kwargs = pa_pandas_kwargs if pa_pandas_kwargs is not None else {}
2062
+
2059
2063
return self .sasdata2parquet (parquet_file_path = parquet_file_path ,
2060
2064
table = table ,
2061
2065
libref = libref ,
2062
2066
dsopts = dsopts ,
2063
- pa_parquet_kwargs = pa_parquet_kwargs ,
2064
- pa_pandas_kwargs = pa_pandas_kwargs ,
2067
+ pa_parquet_kwargs = parquet_kwargs ,
2068
+ pa_pandas_kwargs = pandas_kwargs ,
2065
2069
partitioned = partitioned ,
2066
2070
partition_size_mb = partition_size_mb ,
2067
2071
chunk_size_mb = chunk_size_mb ,
@@ -2077,17 +2081,15 @@ def sd2pq(self, parquet_file_path: str, table: str, libref: str ='', dsopts: dic
2077
2081
def sasdata2parquet (self ,
2078
2082
parquet_file_path : str ,
2079
2083
table : str ,
2080
- libref : str = '' ,
2081
- dsopts : dict = None ,
2082
- pa_parquet_kwargs = {"compression" : 'snappy' ,
2083
- "flavor" :"spark" ,
2084
- "write_statistics" :False },
2085
- pa_pandas_kwargs = {},
2086
- partitioned = False ,
2084
+ libref : str = '' ,
2085
+ dsopts : dict = None ,
2086
+ pa_parquet_kwargs = None ,
2087
+ pa_pandas_kwargs = None ,
2088
+ partitioned = False ,
2087
2089
partition_size_mb = 128 ,
2088
- chunk_size_mb = 4 ,
2089
- coerce_timestamp_errors = True ,
2090
- static_columns :list = None ,
2090
+ chunk_size_mb = 4 ,
2091
+ coerce_timestamp_errors = True ,
2092
+ static_columns :list = None ,
2091
2093
rowsep : str = '\x01 ' ,
2092
2094
colsep : str = '\x02 ' ,
2093
2095
rowrep : str = ' ' ,
@@ -2139,6 +2141,12 @@ def sasdata2parquet(self,
2139
2141
lastlog = len (self ._io ._log )
2140
2142
2141
2143
dsopts = dsopts if dsopts is not None else {}
2144
+ parquet_kwargs = pa_parquet_kwargs if pa_parquet_kwargs is not None else {"compression" : 'snappy' ,
2145
+ "flavor" :"spark" ,
2146
+ "write_statistics" :False
2147
+ }
2148
+ pandas_kwargs = pa_pandas_kwargs if pa_pandas_kwargs is not None else {}
2149
+
2142
2150
if self .exist (table , libref ) == 0 :
2143
2151
logger .error ('The SAS Data Set ' + libref + '.' + table + ' does not exist' )
2144
2152
if self .sascfg .bcv < 3007009 :
@@ -2154,8 +2162,8 @@ def sasdata2parquet(self,
2154
2162
table = table ,
2155
2163
libref = libref ,
2156
2164
dsopts = dsopts ,
2157
- pa_parquet_kwargs = pa_parquet_kwargs ,
2158
- pa_pandas_kwargs = pa_pandas_kwargs ,
2165
+ pa_parquet_kwargs = parquet_kwargs ,
2166
+ pa_pandas_kwargs = pandas_kwargs ,
2159
2167
partitioned = partitioned ,
2160
2168
partition_size_mb = partition_size_mb ,
2161
2169
chunk_size_mb = chunk_size_mb ,
0 commit comments