unify filter logic

susan · susan · commit ebc2e91987e8 · 2022-01-09T10:26:16.000-08:00
diff --git a/get_besteu.py b/get_besteu.py
@@ -4,6 +4,7 @@
 import logging
 import pandas as pd
 import opchain
+import time
 
 def eprint(*args, **kwargs):
     print(*args, file=sys.stderr, **kwargs)
@@ -12,9 +13,9 @@ def eprint(*args, **kwargs):
 MIN_DMU2 = -8.0
 MIN_DME_U = -999.0
 MIN_MG = 0.5
-ODD_DAY_SYMBOLS = ["DJI.C", "DIA", "$SPX.X", "SPY", "$NDX.X", "QQQ", "$RUT.X", "IWM", "$VIX.X", "VXX"]
+ODD_DAY_SYMBOLS = ["DJI.C", "DIA", "$SPX.X", "SPY", "$NDX.X", "QQQ", "$RUT.X", "IWM", "$VIX.X", "VXX", "EWZ"] #brazil
 MIN_DAY = 38
-MAX_DAY = 112
+MAX_DAY = 78
 MIN_EW = -1.0
 MIN_DME_W = 0.0
 NUM_EWS = 30
@@ -37,12 +38,12 @@ def eprint(*args, **kwargs):
                    "dme",
                    #"DME_u",
                    "dme_w",
+                   "e_w",
+                   "mg_w",
                    "pop",
                    # "popt",
                    "width",
                    #"e_u",
-                   "e_w",
-                   "mg_w",
                    #"eml_u",
                    #"ml_u",
                    #"mg_u",
@@ -130,7 +131,7 @@ def getBestEw(df, daysToExpiration=None, count=1):
     return rows
  
 
-def getBestEws(stocklist_file, rows, run_date=None, exp_days=None):
+def getBestEUs(stocklist_file, rows, run_date=None, exp_days=None):
     if not os.path.isfile(stocklist_file):
         eprint(f"{stocklist_file} not found")
         sys.exit(1)
@@ -183,6 +184,46 @@ def getBestEws(stocklist_file, rows, run_date=None, exp_days=None):
     eprint(f"got data for {len(symbols)} symbols {symbols} from file: {stocklist_file}")
 
 
+def minmaxFilter(df, use_odd_day_symbols=False):
+    logging.info(f"df minmaxstart:  {len(df)} rows")
+    df = df[df.days_exp >= MIN_DAY]
+    df = df[df.days_exp <= MAX_DAY]
+    logging.info(f"len after min/max days: {len(df)}")
+    df = df[df.e_w > MIN_EW]
+    logging.info(f"len after > MIN_EW: {len(df)}")
+    #df = df.drop(df[df.dmu <= MIN_DMU].index) 
+    df = df[df.dmu >= MIN_DMU]
+    logging.info(f"len after <= MIN_DMU: {len(df)}")
+    # df = df.drop(df[df.dmu2 <= MIN_DMU2].index)
+    df = df[df.dmu2 >= MIN_DMU2]
+    logging.info(f"len after <= MIN_DMU2: {len(df)}")
+    # df = df.drop(df[df.dme_u <= MIN_DME_U].index)
+    logging.debug(f"dme_w max: {df.dme_w.max()}")
+    logging.debug(f"dme_w min: {df.dme_w.min()}")
+    #df = df.drop(df[df.dme_w <= MIN_DME_W].index)
+    df = df[df.dme_w > MIN_DME_W]
+    logging.info(f"len after > MIN_DME_W: {len(df)}")
+    # df = df.drop(df[df.mg <= MIN_MG].index)
+    df = df[df.mg >= MIN_MG]
+    logging.info(f"len after <= MIN_MG: {len(df)}")
+    if len(df) == 0:
+        logging.info("no rows, returning empty dataframe")
+        return df
+           
+    #eprint("symbols:", df['symbol'])
+    eprint("df minmaxstart start filter symbols:", len(df))
+    if use_odd_day_symbols:
+        df = df[df.symbol.isin(ODD_DAY_SYMBOLS)]
+    else:
+        eprint("filtered symbols")
+        df = df[~df.symbol.isin(ODD_DAY_SYMBOLS)]
+
+    eprint("trimed df:", len(df))
+    
+    df = df.sort_values(by="dme_w", ascending=False)  # DME_u
+    return df
+     
+
 # main
 #
 if len(sys.argv) < 2 or sys.argv[1] in ('-h', '--help'):
@@ -218,27 +259,29 @@ def getBestEws(stocklist_file, rows, run_date=None, exp_days=None):
 
 # assume it's a csv file of symbols
 rows = []
+start_time = time.time()
+eprint("getBestEUs start")
 for csv_file in csv_files:
-    getBestEws(csv_file, rows, run_date=run_date, exp_days=exp_days)
-     
+    getBestEUs(csv_file, rows, run_date=run_date, exp_days=exp_days)
+eprint(f"getBestEUs done - {int(time.time() - start_time)}")     
 if not rows:
     eprint("no rows found!")
     sys.exit()
 # row = rows[0]
 # columns = list(row.keys())
-df = pd.DataFrame(rows, columns=BEST_EW_COLUMNS)
+df = pd.DataFrame(rows, columns=BEST_EW_COLUMNS, )
 
 days = df['days_exp']
 print(df.columns)
 print("days:", days)
-print("fow count:", len(df))
+print("row count:", len(df))
 days = list(set(list(days.values)))
 days.sort()
 
-
-
 if out_dir:
     original_stdout = sys.stdout # Save a reference to the original standard output
+    eprint(f"days start  - {int(time.time() - start_time)}")     
+
     for day in days:  
         if day < MIN_DAY:
             #eprint(f"{day} less than {MIN_DAY}, skipping")
@@ -248,47 +291,32 @@ def getBestEws(stocklist_file, rows, run_date=None, exp_days=None):
             continue
         logging.info(f"running day: {day}")
         df_day = df[df.days_exp == day]
-        eprint(f"df_day: {len(df_day)} rows")
+        logging.info(f"df_day: {len(df_day)} rows")
         if len(df_day.index) == 0:
-            eprint("no rows")
+            logging.info("no rows")
             continue # no rows
-        df_day = df_day[df_day.e_w > MIN_EW]
-        logging.info(f"df_day pod e_w: {len(df_day)} rows")
-
-        df_day = df_day.drop(df_day[df_day.dmu <= MIN_DMU].index) 
-        df_day = df_day.drop(df_day[df_day.dmu <= MIN_DMU].index)
-        df_day = df_day.drop(df_day[df_day.dmu2 <= MIN_DMU2].index)
-        # df_day = df_day.drop(df_day[df_day.dme_u <= MIN_DME_U].index)
-        df_day = df_day.drop(df_day[df_day.dme_w <= MIN_DME_W].index)
-        df_day = df_day.drop(df_day[df_day.mg <= MIN_MG].index)
-        for symbol in ODD_DAY_SYMBOLS:
-            df_day = df_day.drop(df_day[df_day.symbol == symbol].index)
-        df_day = df_day.sort_values(by="dme_w", ascending=False)  # DME_u
-
+        df_day = minmaxFilter(df_day)
+         
         if len(df_day.index) > 0:
             filename = f"{out_dir}/best_ew_{run_date}_{day}.csv"
             df_day = df_day.rename(columns=RENAME_COLUMNS)
             with open(filename, 'w') as f:
                 sys.stdout = f # Change the standard output to the file we created.
                 output = df_day.to_csv(float_format="%.2f")
                 print(output)
+    eprint(f"days done  - {int(time.time() - start_time)}")     
+
     # odd days file
     filename = f"{out_dir}/best_ew_{run_date}_index.csv"
-    df_odd = df[df["symbol"].isin(ODD_DAY_SYMBOLS)]
-    df_odd = df_odd[df_odd.e_w > MIN_EW]
-    df_odd = df_odd.drop(df_odd[df_odd.dmu <= MIN_DMU].index)
-    df_odd = df_odd.drop(df_odd[df_odd.dmu2 <= MIN_DMU2].index)
-    #    df_odd = df_odd.drop(df_odd[df_odd.dme_u <= MIN_DME_U].index)
-    df_odd = df_odd.drop(df_odd[df_odd.dme_w <= MIN_DME_W].index)
-    df_odd = df_odd.drop(df_odd[df_odd.mg <= MIN_MG].index)
-    df_odd = df_odd.drop(df_odd[df_odd["days_exp"] < MIN_DAY].index)
-    df_odd = df_odd.drop(df_odd[df_odd["days_exp"] > MAX_DAY].index)
-    df_odd = df_odd.sort_values(by="symbol", ascending=False)
+    df = pd.DataFrame(rows, columns=BEST_EW_COLUMNS)
+    df_odd = minmaxFilter(df, use_odd_day_symbols=True)
     df_odd = df_odd.rename(columns=RENAME_COLUMNS)
     with open(filename, 'w') as f:
         sys.stdout = f # Change the standard output to the file we created.
         output = df_odd.to_csv(float_format="%.2f")
         print(output)
+    eprint(f"odds done  - {int(time.time() - start_time)}")     
+
 
     sys.stdout = original_stdout # Reset the standard output to its original value
 else:
diff --git a/opchain/opchain.py b/opchain/opchain.py
@@ -29,9 +29,9 @@
 MIN_VAL = -999.0 
 DEFAULT_DAYS = 45
 CSR_CS_DELTA_RANGE = (0.1, 0.17)     #was .18
-CSR_CB_DELTA_RANGE = (0.005, 0.17)      
+CSR_CB_DELTA_RANGE = (0.009, 0.17)      
 PSR_PS_DELTA_RANGE = (0.1, 0.14)    #.15
-PSR_PB_DELTA_RANGE = (0.005, 0.14)   
+PSR_PB_DELTA_RANGE = (0.009, 0.14)   
 USE_PRICE = "mark"
 
 def eprint(*args, **kwargs):
@@ -45,8 +45,9 @@ def get_today():
     
 
 def get_chains(symbol, run_date=None, dt_min=None, dt_max=None, reload=False):
-    logging.info(f"get_chains {symbol}, run_date: {run_date}")
+    logging.info(f"get_chains {symbol}, run_date: {run_date} reload=True")
     now = time.time()
+    logging.info(f"start time: {int(now)}")
     if dt_min is None:
         # use current time
         dt_min = datetime.fromtimestamp(now)
@@ -83,6 +84,7 @@ def get_chains(symbol, run_date=None, dt_min=None, dt_max=None, reload=False):
     # params["daysToExpiration"] = 45
     req = "https://api.tdameritrade.com/v1/marketdata/chains"
     rsp = requests.get(req, params=params, headers=headers)
+    logging.info(f"making request to tdameritrade: {req}")
     if rsp.status_code != 200:
         logging.error(f"got bad status code: {rsp.status_code}")
         return None
@@ -501,6 +503,8 @@ def get_candidates(contracts, putCall=None, sell_range=None, buy_range=None, day
         columns.append(name)
           
     candidate_rows = []
+    logging.info(f"get_candidates - iteratting over: {len(contracts)} rows")
+    start = time.time()
     
     for i, b in contracts.iterrows(): 
         if b.daysToExpiration != daysToExpiration:
@@ -513,9 +517,11 @@ def get_candidates(contracts, putCall=None, sell_range=None, buy_range=None, day
             logging.debug(f"skipping row {i}, b delta {b.delta} out of range: {buy_range}")
             continue
 
+        logging.info(f"get_candidates - inner iteration loop: {len(contracts)} rows")
         for j, s in contracts.iterrows():
             if i == j:
                 continue
+            
             if s.daysToExpiration != daysToExpiration:
                 logging.debug(f"skipping sell row {j}, daysToExpiration[{s.daysToExpiration}] != {daysToExpiration}")
                 continue
@@ -564,6 +570,7 @@ def get_candidates(contracts, putCall=None, sell_range=None, buy_range=None, day
 
     #candidates['pom'] = 1 - abs(candidates['s_delta'])
     logging.info(f"get_candidates, returning {len(candidates)} candidates from {len(contracts)} contracts")
+    logging.info(f"time spent for {len(contracts)}: {(time.time() - start):.2f}")
     candidates = candidates.sort_values(by="e_w", ascending=False)
     return candidates
     
diff --git a/xetfs.csv b/xetfs.csv
@@ -6,4 +6,5 @@ $RUT.X
 $SPX.X
 SPY
 $VIX.X
-VXX
+VXX
+EWZ
diff --git a/xstocks.csv b/xstocks.csv
@@ -1,18 +1,17 @@
 AAPL
 ABBV
-ADBE
+#ADBE
 ALGN
 AMD
 AMZN
 CMG
-CNC
-EWZ
+#CNC
 FB
 GOOG
 GOOGL
 GS
 ISRG
-KHC
+#KHC
 LMT
 LRCX
 LULU
@@ -22,7 +21,7 @@ MS
 NFLX
 NVDA
 SHOP
-SIG
+#SIG
 TSLA
 UNH
 UVXY

-Original file line number
+Diff line change
 $SPX.X
 SPY
 $VIX.X
 -VXX
 +VXX
 +EWZ