*): Update for coding style.

tbfly · tbfly · commit b412f27fcbfc · 2019-06-11T16:28:25.000+08:00
diff --git a/datasets/openwebtext/create_tfrecords.py b/datasets/openwebtext/create_tfrecords.py
@@ -50,7 +50,7 @@ def create_file(args):
         return
     if os.path.exists(os.path.join(output_dir, s)): # Unfinished file, remove
         os.remove(os.path.join(output_dir, s))
-    
+
     with tf.python_io.TFRecordWriter(os.path.join(output_dir, s)) as writer:
         good_files = 0
         current = None
@@ -85,4 +85,4 @@ def create_file(args):
 
 end = time.time()
 
-print("Done! In {:.2f}s, {} / {} good files.".format(end-start, str(good), str(len(files))))
+print("Done! In {:.2f}s, {} / {} good files.".format(end-start, str(good), str(len(files))))
diff --git a/download_model.py b/download_model.py
@@ -49,4 +49,4 @@
             # 1k for chunk_size, since Ethernet packet size is around 1500 bytes
             for chunk in r.iter_content(chunk_size=chunk_size):
                 f.write(chunk)
-                pbar.update(chunk_size)
+                pbar.update(chunk_size)
diff --git a/experimental/experiments.py b/experimental/experiments.py
@@ -75,7 +75,7 @@ def get_value(self):
                 return val
             else:
                 raise RuntimeError("{} ran out of values!".format(self.name))
-        
+
         # Sample randomly from a list of values
         elif self.distribution == "sample":
             return random.sample(self.values)
@@ -111,7 +111,7 @@ def generate_experiments(base, parameters, number):
                 ex[p.name] = p.get_value()
 
         experiments.append(ex)
-    
+
     return experiments
 
 parameters = [
diff --git a/experimental/overrunner.py b/experimental/overrunner.py
@@ -39,7 +39,7 @@ def save(runners):
     states = []
     for r in runners:
         states.append(r.dump_dict())
-    
+
     with open("logs/state.json", "w") as f:
         json.dump(states, f)
 
@@ -51,7 +51,7 @@ def save(runners):
             if ts.done:
                 runners.remove(ts)
                 continue
-            
+
             ts.update_state()
             logging.info("{} - TPU State: {} - Process Running: {}".format(ts.prefix, ts.state, ts.task_running))
 
@@ -86,7 +86,7 @@ def save(runners):
 
             if ts.running_time > 60*60*24: # Make a hard checkpoint save every day
                 logging.info("Backing up {}".format(ts.prefix))
-                subprocess.call(["gsutil", "cp", "-r", ts.params["model_dir"], 
+                subprocess.call(["gsutil", "cp", "-r", ts.params["model_dir"],
                     os.path.join(backup_path, ts.params["model_dir"].split("/")[-1] + "-" + str(ts.current_save))])
                 ts.current_save += 1
 
@@ -113,4 +113,4 @@ def save(runners):
             ts.kill_current_task()
         except Exception as e:
             logging.error(e)
-    save(all_runners)
+    save(all_runners)
diff --git a/experimental/tpu_survival.py b/experimental/tpu_survival.py
@@ -42,7 +42,7 @@ def __init__(self, project=None, location=None, id=None, params=None, d=None):
             self.done = d["done"]
 
 
-        
+
         # current running job
         self.current_process = None
         self.state = None
@@ -250,4 +250,4 @@ def delete_tpu(project, location, tpu_name):
     request = service.projects().locations().nodes().delete(
         name=name)
 
-    return request.execute()
+    return request.execute()
diff --git a/inputs.py b/inputs.py
@@ -44,7 +44,7 @@ def generic_text(params):
     datasets = [bpe_text(params["batch_size"], dataset[0], amount=params["n_ctx"], iterations=params["iterations"], stitch=params["stitch"], batch=False)
                 for dataset in params["dataset"]]
     weights = [dataset[1] for dataset in params["dataset"]]
-    
+
     dataset = tf.data.experimental.sample_from_datasets(datasets, weights=weights)
     dataset = dataset.batch(params["batch_size"], drop_remainder=True).prefetch(params["iterations"] * 2)
 
@@ -65,9 +65,9 @@ def _parse_function(example_proto):
     dataset = dataset.map(_parse_function, num_parallel_calls=1).shuffle(1000 * stitch)
 
     # Since samples can be less than the correct length, and TPUs don't like variable lengths, this function stitches together enough samples
-    # to have a text at least 1024 tokens long. For this to work the stitch parameter must be correctly tuned so that 
+    # to have a text at least 1024 tokens long. For this to work the stitch parameter must be correctly tuned so that
     # stitch * min(characters_in_text) >= amount
-    def _stitch_text(x, y): 
+    def _stitch_text(x, y):
         x = tf.sparse.to_dense(x)
 
         def _get_x(i):
@@ -76,7 +76,7 @@ def _get_x(i):
         out = _get_x(0)
         for i in range(1, stitch):
             out = tf.concat([out, [50256], _get_x(i)], axis=0) # text1<|endoftext|>text2
-        
+
         return out
 
     # Hack-y way to stitch together multiple texts
@@ -113,6 +113,6 @@ def gpt2_pred_input(params, text=None):
     tokens = enc.encode(text)
     if len(tokens) > 1024:
         tokens = tokens[:1024]
-    t = tf.broadcast_to(tokens, [params["batch_size"], len(tokens)]) 
-    dataset = tf.data.Dataset.from_tensors(t) 
-    return dataset
+    t = tf.broadcast_to(tokens, [params["batch_size"], len(tokens)])
+    dataset = tf.data.Dataset.from_tensors(t)
+    return dataset
diff --git a/main.py b/main.py
@@ -15,7 +15,7 @@
 # This program was designed to function with multiple kinds of models, but currently only GPT2 is supported
 # The first element in the tupel is the model function, the second is the function called when predicting
 models = {
-    "GPT2": (gpt2_model, gpt2_predict) 
+    "GPT2": (gpt2_model, gpt2_predict)
 }
 
 inputs = {
@@ -46,8 +46,8 @@
     elif args.predict_file is not None and args.predict_text is not None:
         print("ERROR: Specify exactly one of --predict_file and --predict_text!")
         sys.exit()
-    
-    
+
+
     # Setup logging
     Path("logs").mkdir(exist_ok=True)
     tf.logging.set_verbosity(logging.INFO)
@@ -91,7 +91,7 @@
             cluster=tpu_cluster_resolver,
             save_checkpoints_secs=60*30,
             session_config=tf.ConfigProto(
-                # allow_soft_placement=True, 
+                # allow_soft_placement=True,
                 # log_device_placement=True
                 ),
                 tpu_config=tf.contrib.tpu.TPUConfig(iterations_per_loop=params["iterations"])
@@ -138,15 +138,15 @@
         network.train(
                 input_fn=partial(input_fn, eval=False),
                 steps=params["train_steps"])
-                
+
 
         end = time.time()
         logger.info("\nTrain loop took {:.2f}s\n".format(end-start))
 
         eval_result = network.evaluate(
            input_fn=partial(input_fn, eval=True),
            steps=params["eval_steps"])
-        
+
         logger.info("\nEval Results: {}\n".format(str(eval_result)))
 
         if network.get_variable_value("global_step") > params["max_steps"]:
diff --git a/model_fns.py b/model_fns.py
@@ -18,19 +18,19 @@ def gpt2_model(features, labels, mode, params):
                                     train=mode==tf.estimator.ModeKeys.TRAIN)
 
             output["logits"] = tf.cast(output["logits"], tf.float32)
-            
+
         else:
             output = gpt2.model(X=features, params=params,
                                     labels=labels,
                                     past=None, reuse=tf.AUTO_REUSE,
                                     train=mode==tf.estimator.ModeKeys.TRAIN)
-                                    
+
         loss_batch = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output["logits"], labels=labels)
         loss = tf.reduce_mean(loss_batch)
 
     if mode == tf.estimator.ModeKeys.TRAIN:
         train_op = create_train_op(loss, params)
-    
+
         if params["use_tpu"]:
             return tf.contrib.tpu.TPUEstimatorSpec(mode, loss=loss, train_op=train_op)
         else:
@@ -42,10 +42,10 @@ def gpt2_model(features, labels, mode, params):
 
         if params["use_tpu"]:
             # Metric inputs are transferred to CPU and must preserve batch dimension
-            return tf.contrib.tpu.TPUEstimatorSpec(mode=mode, 
+            return tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                 loss=loss, eval_metrics=(perplexity_metric, {"loss": loss_batch}))
         else:
-            return tf.estimator.EstimatorSpec(mode=mode, 
+            return tf.estimator.EstimatorSpec(mode=mode,
                 loss=loss, eval_metric_ops=perplexity_metric(loss_batch))
 
 
@@ -61,12 +61,12 @@ def gpt2_model(features, labels, mode, params):
             batch_size=params["batch_size"],
             temperature=1.0, top_k=params["top_k"]
         )
-        
+
         predictions = {
             "tokens": output
         }
 
         if params["use_tpu"]:
             return tf.contrib.tpu.TPUEstimatorSpec(mode, predictions=predictions)
         else:
-            return tf.estimator.EstimatorSpec(mode, predictions=predictions)
+            return tf.estimator.EstimatorSpec(mode, predictions=predictions)
diff --git a/models/gpt2/sample.py b/models/gpt2/sample.py
@@ -35,10 +35,10 @@ def step(params, tokens, past=None):
                 lm_output = gpt2.model(params=params, X=tokens, past=past, reuse=tf.AUTO_REUSE)
 
             lm_output["logits"] = tf.cast(lm_output["logits"], tf.float32)
-            
+
         else:
             lm_output = lm_output = gpt2.model(params=params, X=tokens, past=past, reuse=tf.AUTO_REUSE)
-        
+
 
         logits = lm_output['logits'][:, :, :params["n_vocab"]]
         presents = lm_output['present']
@@ -65,7 +65,7 @@ def body(past, prev, output):
 
         def cond(*args):
             return True
-        
+
         _, _, tokens = tf.while_loop(
             cond=cond, body=body,
             maximum_iterations=length,
@@ -82,4 +82,4 @@ def cond(*args):
             back_prop=False,
         )
 
-        return tokens
+        return tokens
diff --git a/optimizers.py b/optimizers.py
@@ -5,7 +5,7 @@
 def create_train_op(loss, params):
     lr = params["lr"]
     if "warmup_steps" in params.keys():
-        lr = cosine_decay_with_warmup(tf.train.get_global_step(), lr, 
+        lr = cosine_decay_with_warmup(tf.train.get_global_step(), lr,
                                         params["max_steps"], warmup_steps=params["warmup_steps"])
 
     if params["opt_name"] == "adam":
@@ -51,7 +51,7 @@ def create_train_op(loss, params):
 
     else:
         raise ValueError("Unknown optimizer type!")
-        
+
     if params["use_tpu"]:
         optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)
 
@@ -352,4 +352,4 @@ def cast_like(x, y):
             pass
         tf.logging.warning("Cast for %s may induce copy from '%s' to '%s'", x_name,
                         x.device, cast_x.device)
-    return cast_x
+    return cast_x