pfnet · take-cheeze · Jan 4, 2022 · Jan 7, 2022 · Jan 11, 2022 · Jan 14, 2022
@@ -3,7 +3,7 @@
 TARGET="${1}"
 
 TEST_PIP_PACKAGES="
-matplotlib tensorboard ipython ipywidgets pandas optuna onnxruntime
+matplotlib tensorboard ipython ipywidgets pandas optuna onnxruntime marko
 pytest flake8 pysen[lint] pytest-cov slack_sdk
 "
 

@@ -58,7 +58,7 @@ if ($test -eq "torch19") {
 RunOrDie python -V
 
 # Install common requirements
-RunOrDie python -m pip install pytorch-ignite pytest flake8 matplotlib tensorboard onnx ipython ipywidgets pandas optuna cupy-cuda102 onnxruntime slack_sdk
+RunOrDie python -m pip install pytorch-ignite pytest flake8 matplotlib tensorboard onnx ipython ipywidgets pandas optuna cupy-cuda102 onnxruntime slack_sdk marko
 RunOrDie python -m pip list
 
 # Install

diff --git a/.github/workflows/test-cpu.yml b/.github/workflows/test-cpu.yml
@@ -24,7 +24,7 @@ jobs:
         pip install -U pip wheel
         pip install 'torch==1.9.*' 'torchvision==0.10.*' --extra-index-url https://download.pytorch.org/whl/cpu
         pip install pytest
-        pip install matplotlib tensorboard ipython ipywidgets pandas optuna onnx onnxruntime pytorch-ignite
+        pip install matplotlib tensorboard ipython ipywidgets pandas optuna onnx onnxruntime pytorch-ignite marko
         pip install -v -e .
         # Test PPE is importable with minimum dependency
         python -c 'import pytorch_pfn_extras'

@@ -110,10 +110,20 @@ def _to_tuple_if_not_sequence(v: Any) -> Tuple:
 
 
 def onnx_node_doc_string(onnx_node: torch._C.Node, torch_node: torch._C.Node) -> str:
+    inputs: List[torch._C.Value] = list(torch_node.inputs())
+    nodes: List[torch._C.Node] = [torch_node]
+    while len(inputs) > 0:
+        n = inputs.pop().node()
+        if n is not None and n.kind() in ["onnx::Constant", "prim::Constant", "prim::ListConstruct", "onnx::SequenceConstruct"]:
+            nodes.insert(0, n)
+            inputs = list(n.inputs()) + inputs
+    nodes_str: str = "".join([repr(n) for n in nodes])
     return f"""## Symbolic node
 {onnx_node}
 ## Original node
-{torch_node}
+```
+{nodes_str}
+```
 ## Scope
 {torch_node.scopeName()}
 ## Source Range
@@ -259,7 +269,7 @@ def _run_trace(self) -> None:
         self.g = self.optimize_torch(self.g)
         self.log("Optimized graph", self.g)
 
-        self.log("Original traced graph", self.traced.graph)
+        self.log("Original traced graph", self.traced.inlined_graph)
         self.log("State dict", lambda: "\n".join([f"- {k}: {v}" for k, v in self.vars.items()]))
 
     def is_self(self, v: torch._C.Value) -> bool:
@@ -334,6 +344,9 @@ def optimize_torch(self, graph: torch._C.Graph) -> torch._C.Graph:
             inputs = list(graph.inputs())
             for idx, n in enumerate(input_names):
                 inputs[idx].setDebugName(n)
+        if self.output_names is not None:
+            for name, out in zip(self.output_names, graph.outputs()):
+                out.setDebugName(name)
         torch._C._jit_pass_onnx_set_dynamic_input_shape(  # type: ignore[attr-defined]
             graph, self.dynamic_axes or {}, input_names
         )
@@ -467,7 +480,9 @@ def handle_if(self, g: torch._C.Graph, n: torch._C.Node) -> None:
         # Generated onnx node doc string should be added later since DCE isn't completed yet
         doc_str: str = f"""
 ## Original node
+```
 {n}
+```
 ## Scope
 {n.scopeName()}
 ## Source Range
@@ -583,6 +598,13 @@ def list_added_nodes() -> List[torch._C.Node]:
 
         sym_nodes: List[torch._C.Node] = list_added_nodes()
 
+        # Place onnx::Identity node instead node when none is added
+        if len(sym_nodes) == 0:
+            sym_out = g_ctx.op("Identity", sym_outs[0])
+            assert isinstance(sym_out, torch._C.Value)
+            sym_outs = sym_out,
+            sym_nodes = [sym_out.node()]
+
         self.log(f"Converting node {n.kind()}", n)
         if len(sym_nodes) > 0:
             self.log(f"Converted node {n.kind()}", lambda: "\n".join([str(i) for i in sym_nodes]))
@@ -750,6 +772,7 @@ def block2subgraph(name: str, b: torch._C.Block, doc_string: str) -> onnx.GraphP
                     assert isinstance(self.vars[k], torch.Tensor)
                     t: torch.Tensor = cast(torch.Tensor, self.vars[k])
                     onnx_vars[_unique_id(i)] = _tensor_to_proto(t, name=k)
+                    onnx_vars[_unique_id(i)].doc_string = repr(i.node())
                     register_val_name(_unique_id(i), value_name(i), shadow=True)
                     continue
                 if _unique_id(i) not in val_tab:
@@ -810,8 +833,15 @@ def assign_onnx_values(
         return onnx_nodes, onnx_vars, val_tab
 
     def generate_onnx(self) -> onnx.ModelProto:
-        # Convert prim and aten nodes to ONNX by using symbolic functions
         self.original_g: torch._C.Graph = self.g.copy()
+
+        # Name all values to restore
+        for n in self.g.nodes():
+            for n_o in n.outputs():
+                if n_o.debugName() == str(n_o.unique()):
+                    n_o.setDebugName(f"v{n_o.unique()}")
+
+        # Convert prim and aten nodes to ONNX by using symbolic functions
         target_nodes = list(self.g.nodes())
         for n in target_nodes:
             self.generate_onnx_node(self.g, n)

@@ -0,0 +1,106 @@
+import marko
+import onnx
+import torch
+import re
+
+from collections import OrderedDict
+from typing import List, Set, Tuple
+
+import pytorch_pfn_extras.onnx.unstrip_tensor
+
+
+_scope_re = re.compile("(.+), scope: ([^ ]+)")
+_const_vals_re = re.compile(r"value= ([\d\- ]+) \[ \w+Type\{\d+\} \]")
+_const_typed_val_re = re.compile(r"value=\[ \w+Type\{(-?[\d\.e-]+)\} \]")
+_const_val_re = re.compile(r"value=\{(-?[\d\.e-]+)\}")
+_func_re = re.compile(r" = \^(\w+)\(")
+
+
+class ReconstructError(Exception):
+    pass
+
+
+def _process_line(line: str) -> Tuple[str, str]:
+    scope_match = re.match(_scope_re, line)
+    scope = ""
+    if scope_match is not None:
+        scope = scope_match[2].split("/")[-1]
+        line = scope_match[1]
+    line = line.replace("onnx::Constant", "prim::Constant")
+    line = line.replace("onnx::SequenceConstruct", "prim::ListConstruct")
+    if "prim::Constant" in line:
+        line = re.sub(_const_vals_re, lambda m: f"value=[{m[1].replace('  ', ', ')}]", line)
+        line = re.sub(_const_typed_val_re, r"value=\1", line)
+        line = re.sub(_const_val_re, r"value=\1", line)
+
+    func_match = re.search(_func_re, line)
+    if func_match:
+        raise ReconstructError(f"torch.autograd.Function call not supported for: {func_match[1]} in line: {line}")
+
+    return line, scope
+
+
+def _process_markdown(md: str) -> Tuple[List[str], List[str]]:
+    lines: List[str] = []
+    scopes: List[str] = []
+    target_para: bool = False
+    for c in marko.parser.Parser().parse(md).children:  # type: ignore[union-attr]
+        if isinstance(c, marko.block.FencedCode) and target_para:
+            for text in c.children:
+                if not isinstance(text, marko.inline.RawText):
+                    continue
+                for line in text.children.split("\n"):
+                    if len(line) == 0:
+                        continue
+                    line, scope = _process_line(line)
+                    lines.append(line)
+                    scopes.append(scope)
+            target_para = False
+            break
+        if not isinstance(c, marko.block.Heading) or c.level != 2:
+            continue
+        if c.children[0].children == "Original node":
+            target_para = True
+
+    return lines, scopes
+
+
+def reconstruct(model: onnx.ModelProto) -> Tuple[torch._C.Graph, List[Tuple[str, torch.Tensor]]]:
+    lines: List[str] = []
+    scopes: List[str] = []
+    for n in model.graph.node:
+        if len(n.doc_string) == 0 and n.op_type != "Constant":
+            raise ReconstructError(f"doc_string not found in node: {onnx.helper.printable_node(n)}. Please use strip_doc_string=False option")
+        new_lines, new_scopes = _process_markdown(n.doc_string)
+        lines.extend(new_lines)
+        scopes.extend(new_scopes)
+    lines = list(OrderedDict.fromkeys(lines))
+
+    skip_inputs: Set[str] = set([i.name for i in model.graph.initializer])
+
+    inputs: List[str] = ["%" + i.name for i in model.graph.input if i.name not in skip_inputs]
+    outputs: List[str] = ["%" + o.name.split(".")[-1] for o in model.graph.output]
+    body = "\n    ".join(lines)
+
+    initializer_name_re = re.compile(r"^%([\w.]+) [:=]")
+    params: List[Tuple[str, torch.Tensor]] = []
+    for i in model.graph.initializer:
+        i_name = re.match(initializer_name_re, i.doc_string)
+        if i_name:
+            inputs.append(f"%{i_name[1]}")
+
+            i_u = onnx.TensorProto()
+            i_u.CopyFrom(i)
+            pytorch_pfn_extras.onnx.unstrip_tensor._unstrip_tensor(i_u)
+            t = torch.from_numpy(onnx.numpy_helper.to_array(i_u).copy())
+            params.append((i.name, t))
+
+    src: str = f"""graph({", ".join(inputs)}):
+    {body}
+    return ({", ".join(outputs)})
+"""
+
+    g: torch._C.Graph = torch._C.parse_ir(src)
+    torch._C._jit_pass_lint(g)
+
+    return g, params
@@ -17,7 +17,7 @@
     install_requires=['numpy', 'packaging', 'torch', 'typing-extensions>=3.10'],
     extras_require={
         'test': ['pytest', 'onnxruntime', 'torchvision'],
-        'onnx': ['onnx'],
+        'onnx': ['onnx', 'marko'],
     },
     python_requires='>=3.6.0',
     packages=setuptools.find_packages(exclude=['tests', 'tests.*']),

@@ -384,7 +384,7 @@ def _dump_upgraders_map() -> Dict[str, str]: ...
 def _test_only_populate_upgraders(content: Dict[str, str]) -> None: ...
 def _test_only_remove_upgraders(content: Dict[str, str]) -> None: ...
 def merge_type_from_type_comment(decl: Decl, type_annotation_decl: Decl, is_method: _bool) -> Decl: ...
-def parse_ir(input: str, parse_tensor_constants: _bool) -> Graph: ...
+def parse_ir(input: str, parse_tensor_constants: _bool = False) -> Graph: ...
 def parse_schema(schema: str) -> FunctionSchema: ...
 def get_device(input: Tensor) -> _int: ...
 

@@ -72,7 +72,7 @@ def forward(self, x):
 
     model = Net()
     x = torch.ones((1, 1, 32, 32))
-    output_dir = _helper(model, x, 'as_output')
+    output_dir = _helper(model, x, 'as_output', check_reconstruct=False)
 
     actual_onnx = onnx.load(os.path.join(output_dir, 'model.onnx'))
     named_nodes = {n.name: n for n in actual_onnx.graph.node}
@@ -106,7 +106,7 @@ def forward(self, x):
 
     model = Net()
     x = torch.ones((1, 1, 32, 32))
-    output_dir = _helper(model, x, 'as_output')
+    output_dir = _helper(model, x, 'as_output', check_reconstruct=False)
 
     actual_onnx = onnx.load(os.path.join(output_dir, 'model.onnx'))
     named_nodes = {n.name: n for n in actual_onnx.graph.node}
@@ -138,7 +138,7 @@ def forward(self, x):
 
     model = Net()
     x = torch.ones((1, 1, 32, 32))
-    output_dir = _helper(model, x, 'as_output')
+    output_dir = _helper(model, x, 'as_output', check_reconstruct=False)
 
     actual_onnx = onnx.load(os.path.join(output_dir, 'model.onnx'))
     named_nodes = {n.name: n for n in actual_onnx.graph.node}

@@ -58,7 +58,7 @@ def forward(self, x):
             return Func.apply(x) + torch.tensor([10], dtype=torch.float)
 
     assert hasattr(Func, "symbolic")
-    run_model_test(Model(), (torch.rand((20,)),))
+    run_model_test(Model(), (torch.rand((20,)),), check_reconstruct=False)
 
 
 class AnyModel(torch.nn.Module):
@@ -226,7 +226,8 @@ def forward(self, x):
     m = run_model_test(
         Model(), (torch.randn(2, 7, 17),),
         skip_oxrt=True,
-        custom_opsets={"org.chainer": ver})
+        custom_opsets={"org.chainer": ver},
+        check_reconstruct=False)
 
     assert len(m.opset_import) == 2
 

@@ -19,6 +19,7 @@
 import pytorch_pfn_extras.onnx._constants
 from pytorch_pfn_extras.onnx.strip_large_tensor import _strip_large_tensor_tool_impl
 from pytorch_pfn_extras.onnx.unstrip_tensor import unstrip
+from pytorch_pfn_extras.onnx.pfto_exporter.torch_reconstruct import reconstruct
 
 
 output_dir = 'out'
@@ -53,15 +54,20 @@ def _get_output_dir(d, **kwargs):
     return output_dir
 
 
-def _helper(model, args, d, use_pfto=True, **kwargs):
+def _helper(model, args, d, use_pfto=True, check_reconstruct=True, **kwargs):
     output_dir = _get_output_dir(d)
     if 'training' not in kwargs:
         kwargs['training'] = model.training
     if 'do_constant_folding' not in kwargs:
         kwargs['do_constant_folding'] = False
     if 'metadata' not in kwargs:
         kwargs["metadata"] = False
+    if "strip_doc_string" not in kwargs:
+        kwargs["strip_doc_string"] = False
     export_testcase(model, args, output_dir, use_pfto=use_pfto, **kwargs)
+    if check_reconstruct and use_pfto and not kwargs["strip_doc_string"]:
+        reconstruct(pytorch_pfn_extras.onnx.load_model(
+            os.path.join(output_dir, "model.onnx")))
     return output_dir
 
 
@@ -70,7 +76,9 @@ def test_export_testcase():
     model = Net().to('cpu')
     x = torch.zeros((1, 1, 28, 28))
 
-    output_dir = _helper(model, x, 'mnist', output_grad=True, metadata=True)
+    output_dir = _helper(
+        model, x, 'mnist', output_grad=True, metadata=True,
+        check_reconstruct=True, verbose=False)
 
     assert os.path.isdir(output_dir)
     assert os.path.isfile(os.path.join(output_dir, 'meta.json'))
@@ -157,7 +165,9 @@ def test_model_not_overwrite():
     output_dir = _helper(model, x, dir_name)
     assert os.path.isdir(output_dir)
 
-    output_dir = _helper(model, x + 0.5, dir_name, model_overwrite=False)
+    output_dir = _helper(
+        model, x + 0.5, dir_name,
+        model_overwrite=False)
 
     test_data_set_dir = os.path.join(output_dir, 'test_data_set_1')
     assert os.path.isfile(os.path.join(test_data_set_dir, 'input_0.pb'))
@@ -350,7 +360,8 @@ def test_export_testcase_options():
 
     output_dir = _helper(
         model, x, 'mnist_stripped_tensor_data',
-        opset_version=11, strip_doc_string=False)
+        opset_version=11, strip_doc_string=False,
+        check_reconstruct=False)
 
     onnx_model = onnx.load(os.path.join(
         output_dir, 'model.onnx'), load_external_data=False)

@@ -41,5 +41,5 @@ def test_shufflenet():
     run_model_test(
         torchvision.models.shufflenetv2.shufflenet_v2_x1_0(),
         (torch.rand(1, 3, 224, 224),),
-        use_gpu=True,
+        use_gpu=True, check_reconstruct=False
     )
@@ -5,6 +5,7 @@
 import onnxruntime as ort
 import torch
 from pytorch_pfn_extras.onnx.pfto_exporter.export import export as pfto_export
+from pytorch_pfn_extras.onnx.pfto_exporter.torch_reconstruct import reconstruct
 
 
 def run_model_test(
@@ -20,6 +21,7 @@ def run_model_test(
     strict_trace=True,
     mode="eval",
     use_gpu=False,
+    check_reconstruct=True,
     **kwargs,
 ) -> onnx.ModelProto:
     if mode == "train":
@@ -84,6 +86,9 @@ def run_model_test(
             assert len(te_model.graph.output) == len(pfto_model.graph.output)
             assert len(te_model.graph.input) == len(pfto_model.graph.input)
 
+        if check_reconstruct:
+            reconstruct(pfto_model)
+
         if skip_oxrt:
             return pfto_model