Skip to content

Commit

Permalink
cli: error handling for tokens
Browse files Browse the repository at this point in the history
Signed-off-by: Alexander Bezzubov <[email protected]>
  • Loading branch information
bzz committed Feb 3, 2020
1 parent 96ceb0d commit 63ee209
Showing 1 changed file with 15 additions and 2 deletions.
17 changes: 15 additions & 2 deletions notebooks/codesearchnet-opennmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,13 @@ def __getitem__(self, idx: int) -> Tuple[str, str]:

tokens = row["code_tokens"]
body_tokens = tokens[tokens.index(fn_name) + 2 :]
fn_body_tokens = body_tokens[body_tokens.index("{") + 1 : len(body_tokens) - 1]
try:
fn_body_tokens = body_tokens[
body_tokens.index("{") + 1 : len(body_tokens) - 1
]
except ValueError as ve: # '{' might be missing
logging.error("'%s' fn body extraction failed", body_tokens, ve)
fn_body_tokens = None

return (fn_name, fn_body, fn_body_tokens)

Expand All @@ -91,6 +97,7 @@ def __len__(self) -> int:
from functools import lru_cache
from typing import List


def split_camelcase(camel_case_identifier: str) -> List[str]:
"""
Split camelCase identifiers.
Expand Down Expand Up @@ -158,7 +165,13 @@ def main(args: Namespace) -> None:
for fn_name, fn_body, fn_body_tokens in dataset:
if not fn_name or not fn_body:
continue
src = " ".join(fn_body_tokens) if args.token_level_sources else fn_body

if args.token_level_sources:
if not fn_body_tokens:
continue
src = " ".join(fn_body_tokens)
else:
src = fn_body

if args.word_level_targets:
tgt = fn_name
Expand Down

0 comments on commit 63ee209

Please sign in to comment.