Skip to content

Commit

Permalink
More robust conversion of chromosome numbers to int
Browse files Browse the repository at this point in the history
  • Loading branch information
omerwe committed Dec 9, 2024
1 parent 9a99a8a commit 8b17b4e
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 5 deletions.
2 changes: 1 addition & 1 deletion ldsc_polyfun/jackknife.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,7 @@ def __init__(self, x, y, n_blocks=None, separators=None, chr_num=None, verbose=T
num_lambdas=100, approx_ridge=False,
ridge_lambda=None, use_1se=False, has_intercept=False, standardize=True,
skip_ridge_jackknife=True, num_chr_sets=2, num_chr=22):

#sanity checks
assert chr_num is not None
# # # chr_num[:100000]=1
Expand Down
12 changes: 10 additions & 2 deletions ldsc_polyfun/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,20 @@ def read_csv(fh, **kwargs):
return df

def set_snpid_index(df):

def float_to_int(c):
try:
c = int(c)
except ValueError:
pass
return c

df['A1_first'] = (df['A1'] < df['A2']) | (df['A1'].str.len()>1) | (df['A2'].str.len()>1)
df['A1s'] = df['A2'].copy()
df.loc[df['A1_first'], 'A1s'] = df.loc[df['A1_first'], 'A1'].copy()
df['A2s'] = df['A1'].copy()
df.loc[df['A1_first'], 'A2s'] = df.loc[df['A1_first'], 'A2'].copy()
s_chr = df['CHR'].map(lambda c: int(c) if str(c)[0] in ['0','1','2','3','4','5,','6','7','8','9'] else c).astype(str)
s_chr = df['CHR'].map(float_to_int).astype(str)
s_bp = df['BP'].astype(int).astype(str)
df.index = s_chr + '.' + s_bp + '.' + df['A1s'] + '.' + df['A2s']
df.index.name = 'snpid'
Expand Down Expand Up @@ -116,7 +124,7 @@ def sumstats(fh, alleles=True, dropna=True):
if dropna:
x = x.dropna(how='any')

x = set_snpid_index(x)
x = set_snpid_index(x)
x.drop(columns=['CHR', 'BP'], inplace=True)


Expand Down
3 changes: 2 additions & 1 deletion ldsc_polyfun/sumstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def _print_part_delete_values(ldscore_reg, ofh, log):

def _merge_and_log(ld, sumstats, noun, log):
'''Wrap smart merge with log messages about # of SNPs.'''
sumstats = smart_merge(ld, sumstats)
sumstats = smart_merge(ld, sumstats)
msg = 'After merging with {F}, {N} SNPs remain.'
if len(sumstats) == 0:
msg += ' Please make sure that your annotation files include the SNPs in your sumstats files (please see the PolyFun wiki for details on downloading functional annotations)'
Expand Down Expand Up @@ -275,6 +275,7 @@ def _read_ld_sumstats(args, log, fh, alleles=True, dropna=True):

M_annot, ref_ld, novar_cols = _check_variance(log, M_annot, ref_ld)
w_ld = _read_w_ld(args, log)

sumstats = _merge_and_log(ref_ld, sumstats, 'reference panel LD', log)
sumstats = _merge_and_log(sumstats, w_ld, 'regression SNP LD', log)
w_ld_cname = sumstats.columns[-1]
Expand Down
2 changes: 1 addition & 1 deletion polyfun.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def run_ldsc(self, args, use_ridge, nn, keep_large, evenodd_split, n_blocks=2):
df_sumstats = pd.read_table(args.sumstats, sep='\s+')
###merge everything together...

#prepare LD-scores for S-LDSC run
#prepare LD-scores for S-LDSC run
ref_ld = np.array(df_sumstats[ref_ld_cnames], dtype=np.float32)
sumstats._check_ld_condnum(args, log, ref_ld_cnames)
if df_sumstats.shape[0] < 200000:
Expand Down

0 comments on commit 8b17b4e

Please sign in to comment.