diff --git a/qiskit/qasm3/exporter.py b/qiskit/qasm3/exporter.py index 31f6da572b20..57e196e3cdef 100644 --- a/qiskit/qasm3/exporter.py +++ b/qiskit/qasm3/exporter.py @@ -174,12 +174,17 @@ class DefcalInstruction: } ) -# This probably isn't precisely the same as the OQ3 spec, but we'd need an extra dependency to fully -# handle all Unicode character classes, and this should be close enough for users who aren't -# actively _trying_ to break us (fingers crossed). -_VALID_DECLARABLE_IDENTIFIER = re.compile(r"([\w][\w\d]*)", flags=re.U) -_VALID_HARDWARE_QUBIT = re.compile(r"\$[\d]+", flags=re.U) -_BAD_IDENTIFIER_CHARACTERS = re.compile(r"[^\w\d]", flags=re.U) +# This is deliberately more restrictive than the OQ3 spec - the builtin `re` module has weak Unicode +# support, and this need here doesn't rise to the level of adding the third-party `regex` as a +# dependency. Python's `\w` matches way too much (basically anything in Unicode classes [L?] and +# [N?], plus _), while `\d` matches way too little (only [Nd]) to be used as a negation. As a +# compromise, we allow ASCII letters, Greek letters (since they're a small, contiguous block in +# Unicode that can be specified easily, and physicists like them), _ and [0-9]. Everything else is +# escaped. +_ALPHA = r"a-zA-Z\u0370-\u03ff" # ASCII alpha, plus the "Greek and Coptic" Unicode block. +_VALID_DECLARABLE_IDENTIFIER = re.compile(rf"[{_ALPHA}_][{_ALPHA}_0-9]*", flags=re.U) +_VALID_HARDWARE_QUBIT = re.compile(r"\$[0-9]+", flags=re.U) +_BAD_IDENTIFIER_CHARACTERS = re.compile(rf"[^{_ALPHA}_0-9]", flags=re.U) class Exporter: diff --git a/releasenotes/notes/qasm3-fix-identifiers-c2adba914620b486.yaml b/releasenotes/notes/qasm3-fix-identifiers-c2adba914620b486.yaml new file mode 100644 index 000000000000..d870e52fc370 --- /dev/null +++ b/releasenotes/notes/qasm3-fix-identifiers-c2adba914620b486.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + The OpenQASM 3 exporter (:func:`.qasm3.dumps` and :func:`~.qasm3.dump`) will now correctly + escape Unicode "number" characters other than ASCII digits, when used in identifiers. Previously, + characters like superscripts would remain in the output, which is not valid OpenQASM 3. + - | + The OpenQASM 3 exporter (:func:`.qasm3.dumps` and :func:`~.qasm3.dump`) will now correctly + escape identifiers whose name begins with an ASCII digit. diff --git a/test/python/qasm3/test_export.py b/test/python/qasm3/test_export.py index 51c532247b56..87dc17837332 100644 --- a/test/python/qasm3/test_export.py +++ b/test/python/qasm3/test_export.py @@ -113,8 +113,11 @@ def setUpClass(cls): # be useful for the tests must _never_ have false positive matches. We use an explicit # space (`\s`) or semicolon rather than the end-of-word `\b` because we want to ensure that # the exporter isn't putting out invalid characters as part of the identifiers. + alpha = r"a-zA-Z\u0370-\u03ff" + id_first = rf"[{alpha}_]" + id_cont = rf"[{alpha}_0-9]" cls.register_regex = re.compile( - r"^\s*(let|(qu)?bit(\[\d+\])?)\s+(?P\w+)[\s;]", re.U | re.M + rf"^\s*(let|(qu)?bit(\[\d+\])?)\s+(?P{id_first}{id_cont}*)[\s;]", re.U | re.M ) scalar_type_names = { "angle", @@ -127,7 +130,7 @@ def setUpClass(cls): cls.scalar_parameter_regex = re.compile( r"^\s*((input|output|const)\s+)?" # Modifier rf"({'|'.join(scalar_type_names)})\s*(\[[^\]]+\])?\s+" # Type name and designator - r"(?P\w+)[\s;]", # Parameter name + rf"(?P{id_first}{id_cont}*)[\s;]", # Parameter name re.U | re.M, ) super().setUpClass() @@ -1477,21 +1480,27 @@ def test_registers_have_escaped_names(self): """Test that both types of register are emitted with safely escaped names if they begin with invalid names. Regression test of gh-9658.""" qc = QuantumCircuit( - QuantumRegister(2, name="q_{reg}"), ClassicalRegister(2, name="c_{reg}") - ) - qc.measure([0, 1], [0, 1]) + QuantumRegister(2, name="q_{reg}"), + ClassicalRegister(2, name="c_{reg}"), + QuantumRegister(2, name="²"), + ClassicalRegister(2, name="2c"), + QuantumRegister(2, name="abc?!abc$%^&"), + ClassicalRegister(2, name="?!abc$%^&"), + ) + qc.measure(qc.qubits, qc.clbits) out_qasm = dumps(qc) matches = {match_["name"] for match_ in self.register_regex.finditer(out_qasm)} - self.assertEqual(len(matches), 2, msg=f"Observed OQ3 output:\n{out_qasm}") + self.assertEqual(len(matches), 6, msg=f"Observed OQ3 output:\n{out_qasm}") def test_parameters_have_escaped_names(self): """Test that parameters are emitted with safely escaped names if they begin with invalid names. Regression test of gh-9658.""" qc = QuantumCircuit(1) - qc.u(Parameter("p_{0}"), 2 * Parameter("p_?0!"), 0, 0) + qc.u(Parameter("p_{0}"), 2 * Parameter("2p"), Parameter("a²"), 0) + qc.rz(Parameter("!$abc%$&"), 0) out_qasm = dumps(qc) matches = {match_["name"] for match_ in self.scalar_parameter_regex.finditer(out_qasm)} - self.assertEqual(len(matches), 2, msg=f"Observed OQ3 output:\n{out_qasm}") + self.assertEqual(len(matches), 4, msg=f"Observed OQ3 output:\n{out_qasm}") def test_parameter_expression_after_naming_escape(self): """Test that :class:`.Parameter` instances are correctly renamed when they are used with