Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kserve-integration UAT fails in the CI on AKS and EKS with Notebook execution failed with KeyError: 'status' #1100

Open
NohaIhab opened this issue Oct 2, 2024 · 2 comments
Labels
bug Something isn't working

Comments

@NohaIhab
Copy link
Contributor

NohaIhab commented Oct 2, 2024

Bug Description

Seen in the scheduled runs in AKS and EKS, the kserve-integration UAT failed.
Logs are attached in the Relevant Log Output.

To Reproduce

run the Create AKS cluster, deploy CKF and run bundle test or Create EKS cluster, deploy CKF and run bundle test action in the CI for latest/edge

Environment

AKS 1.29
EKS 1.29
kubeflow bundle latest/edge
juju 3.4/stable

Relevant Log Output

=================================== FAILURES ===================================
______________________ test_notebook[kserve-integration] _______________________

test_notebook = '/tests/.worktrees/b9848a5695a361eba1d9b0cfb2fddc99460b304e/tests/notebooks/kserve/kserve-integration.ipynb'

    @pytest.mark.ipynb
    @pytest.mark.parametrize(
        # notebook - ipynb file to execute
        "test_notebook",
        NOTEBOOKS.values(),
        ids=NOTEBOOKS.keys(),
    )
    def test_notebook(test_notebook):
        """Test Notebook Generic Wrapper."""
        os.chdir(os.path.dirname(test_notebook))
    
        with open(test_notebook) as nb:
            notebook = nbformat.read(nb, as_version=nbformat.NO_CONVERT)
    
        ep = ExecutePreprocessor(
            timeout=-1, kernel_name="python3", on_notebook_start=install_python_requirements
        )
        ep.skip_cells_with_tag = "pytest-skip"
    
        try:
            log.info(f"Running ***os.path.basename(test_notebook)***...")
>           output_notebook, _ = ep.preprocess(notebook, ***"metadata": ***"path": "./"***)

/tests/.worktrees/b9848a5695a361eba1d9b0cfb2fddc99460b304e/tests/test_notebooks.py:45: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/opt/conda/lib/python3.11/site-packages/nbconvert/preprocessors/execute.py:103: in preprocess
    self.preprocess_cell(cell, resources, index)
/opt/conda/lib/python3.11/site-packages/nbconvert/preprocessors/execute.py:124: in preprocess_cell
    cell = self.execute_cell(cell, index, store_history=True)
/opt/conda/lib/python3.11/site-packages/jupyter_core/utils/__init__.py:165: in wrapped
    return loop.run_until_complete(inner)
/opt/conda/lib/python3.11/asyncio/base_events.py:654: in run_until_complete
    return future.result()
/opt/conda/lib/python3.11/site-packages/nbclient/client.py:1062: in async_execute_cell
    await self._check_raise_for_error(cell, cell_index, exec_reply)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <nbconvert.preprocessors.execute.ExecutePreprocessor object at 0x7fc7cc2e6650>
cell = ***'cell_type': 'code', 'execution_count': 8, 'id': '8522c4e9-07b7-4bff-9b49-3675ff19bacc', 'metadata': ***'execution': ***'...sp = client.get(ISVC_NAME)\nisvc_url = isvc_resp[\'status\'][\'address\'][\'url\']\nprint("Inference URL:", isvc_url)'***
cell_index = 16
exec_reply = ***'buffers': [], 'content': ***'ename': 'KeyError', 'engine_info': ***'engine_id': -1, 'engine_uuid': 'd5e7bdef-d6d9-4ef2-9...e, 'engine': 'd5e7bdef-d6d9-4ef2-9700-5284518715ee', 'started': '2024-10-01T01:07:25.404728Z', 'status': 'error'***, ...***

    async def _check_raise_for_error(
        self, cell: NotebookNode, cell_index: int, exec_reply: dict[str, t.Any] | None
    ) -> None:
        if exec_reply is None:
            return None
    
        exec_reply_content = exec_reply["content"]
        if exec_reply_content["status"] != "error":
            return None
    
        cell_allows_errors = (not self.force_raise_errors) and (
            self.allow_errors
            or exec_reply_content.get("ename") in self.allow_error_names
            or "raises-exception" in cell.metadata.get("tags", [])
        )
        await run_hook(
            self.on_cell_error, cell=cell, cell_index=cell_index, execute_reply=exec_reply
        )
        if not cell_allows_errors:
>           raise CellExecutionError.from_cell_and_msg(cell, exec_reply_content)
E           nbclient.exceptions.CellExecutionError: An error occurred while executing the following cell:
E           ------------------
E           isvc_resp = client.get(ISVC_NAME)
E           isvc_url = isvc_resp['status']['address']['url']
E           print("Inference URL:", isvc_url)
E           ------------------
E           
E           
E           ---------------------------------------------------------------------------
E           KeyError                                  Traceback (most recent call last)
E           Cell In[8], line 2
E                 1 isvc_resp = client.get(ISVC_NAME)
E           ----> 2 isvc_url = isvc_resp['status']['address']['url']
E                 3 print("Inference URL:", isvc_url)
E           
E           KeyError: 'status'

/opt/conda/lib/python3.11/site-packages/nbclient/client.py:918: CellExecutionError

During handling of the above exception, another exception occurred:

test_notebook = '/tests/.worktrees/b9848a5695a361eba1d9b0cfb2fddc99460b304e/tests/notebooks/kserve/kserve-integration.ipynb'

    @pytest.mark.ipynb
    @pytest.mark.parametrize(
        # notebook - ipynb file to execute
        "test_notebook",
        NOTEBOOKS.values(),
        ids=NOTEBOOKS.keys(),
    )
    def test_notebook(test_notebook):
        """Test Notebook Generic Wrapper."""
        os.chdir(os.path.dirname(test_notebook))
    
        with open(test_notebook) as nb:
            notebook = nbformat.read(nb, as_version=nbformat.NO_CONVERT)
    
        ep = ExecutePreprocessor(
            timeout=-1, kernel_name="python3", on_notebook_start=install_python_requirements
        )
        ep.skip_cells_with_tag = "pytest-skip"
    
        try:
            log.info(f"Running ***os.path.basename(test_notebook)***...")
            output_notebook, _ = ep.preprocess(notebook, ***"metadata": ***"path": "./"***)
            # persist the notebook output to the original file for debugging purposes
            save_notebook(output_notebook, test_notebook)
        except CellExecutionError as e:
            # handle underlying error
>           pytest.fail(f"Notebook execution failed with ***e.ename***: ***e.evalue***")
E           Failed: Notebook execution failed with KeyError: 'status'

/tests/.worktrees/b9848a5695a361eba1d9b0cfb2fddc99460b304e/tests/test_notebooks.py:50: Failed

Additional Context

No response

@NohaIhab NohaIhab added the bug Something isn't working label Oct 2, 2024
Copy link

Thank you for reporting us your feedback!

The internal ticket has been created: https://warthogs.atlassian.net/browse/KF-6348.

This message was autogenerated

@NohaIhab
Copy link
Contributor Author

NohaIhab commented Oct 2, 2024

It's possible that the ISVC does not have a status yet when it is checked, we can add a wait there to make sure that it has a status before getting the URL

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

1 participant