Skip to content

Commit b0dd961

Browse files
committed
Move local endpoints from torchscript to custom fastapi server
1 parent 8856fe9 commit b0dd961

32 files changed

+1737
-1195
lines changed

.gitattributes

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
*.mar filter=lfs diff=lfs merge=lfs -text
1+
*.nc.zip filter=lfs diff=lfs merge=lfs -text
22
*.stl filter=lfs diff=lfs merge=lfs -text
33
*.obj filter=lfs diff=lfs merge=lfs -text
44
*.dae filter=lfs diff=lfs merge=lfs -text

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ ehthumbs.db
108108
MUJOCO_LOG.TXT
109109

110110
# Local model
111-
*.mar
111+
*.nc.zip
112112

113113
logs/
114114
examples/logs/

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1515
- `nc.list_my_orgs()` method to find the details of your organizations
1616
- Support for Python 3.9
1717
- Added Pi0
18+
- Ability to launch a policy outside of a server using `nc.policy()`
1819

1920
### Changed
2021

2122
- The current organization is now stored locally at `~/.neuracore/config.json` rather than being set globally
2223
- Training now supports all data types. See CNNMLP for an example.
24+
- Endpoints now use our own custom server, rather than torchserve
25+
- `nc.connect_local_endpoint()` -> `nc.policy_local_server()`
26+
- `nc.connect_endpoint()` -> `nc.policy_remote_server()`
27+
- Moved from .mar format to .nc.zip format for model archives
2328

2429
### Removed
2530

README.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,37 @@ nc.log_joint_positions({
6868
nc.log_rgb("top_camera", image_array)
6969
```
7070

71+
## Command Line Commands
72+
73+
Neuracore provides several command-line tools for authentication, organization management, and server operations:
74+
75+
### Authentication
76+
```bash
77+
# Generate and save API key (interactive login)
78+
nc-login
79+
```
80+
81+
### Organization Management
82+
```bash
83+
# Select current organization (interactive selection)
84+
nc-select-org
85+
```
86+
87+
### Server Operations
88+
```bash
89+
# Launch local policy server
90+
nc-launch-server --job_id <job_id> --org_id <org_id> [--host <host>] [--port <port>]
91+
92+
# Example:
93+
nc-launch-server --job_id my_job_123 --org_id my_org_456 --host 0.0.0.0 --port 8080
94+
```
95+
96+
**Parameters:**
97+
- `--job_id`: Required. The job ID to run
98+
- `--org_id`: Required. Your organization ID
99+
- `--host`: Optional. Host address (default: 0.0.0.0)
100+
- `--port`: Optional. Port number (default: 8080)
101+
71102
## Open Source Training
72103

73104
Neuracore includes a powerful open-source training infrastructure built with Hydra for configuration management. Train your own robot learning algorithms locally rather than using our cloud training service.

examples/README.md

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -80,22 +80,6 @@ For local model deployment, you'll need additional packages:
8080
pip install "neuracore[ml]"
8181
```
8282

83-
Local model deployment also requires a Java JRE.
84-
85-
<details>
86-
<summary>Install Java JRE</summary>
87-
88-
Mac:
89-
```bash
90-
brew install temurin java
91-
```
92-
93-
Linux:
94-
```bash
95-
sudo apt install default-jre
96-
```
97-
</details>
98-
9983

10084
Run the local model:
10185
```bash

examples/example_local_endpoint.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,17 @@ def main():
1717
overwrite=False,
1818
)
1919
# If you have a train run name, you can use it to connect to a local. E.g.:
20-
policy = nc.connect_local_endpoint(train_run_name=TRAINING_JOB_NAME)
21-
# If you know the path to the local model.mar file, you can use it directly as:
22-
# policy = nc.connect_local_endpoint(path_to_model="PATH/TO/MODEL.mar")
20+
policy = nc.policy(train_run_name=TRAINING_JOB_NAME)
21+
22+
# If you know the path to the local model.nc.zip file, you can use it directly as:
23+
# policy = nc.policy(model_file=PATH/TO/MODEL.nc.zip)
24+
25+
# Alternatively, you can connect to a local endpoint that has been started
26+
# policy = nc.policy_local_server(train_run_name=TRAINING_JOB_NAME)
27+
28+
# Optional. Set the checkpoint to the last epoch.
29+
# Note by default, model is loaded from the last epoch.
30+
policy.set_checkpoint(epoch=-1)
2331

2432
onscreen_render = True
2533
render_cam_name = "angle"

examples/example_server_endpoint.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def main():
2222
)
2323

2424
try:
25-
policy = nc.connect_endpoint(ENDPOINT_NAME)
25+
policy = nc.policy_remote_server(ENDPOINT_NAME)
2626
except EndpointError:
2727
print(f"Please ensure that the endpoint '{ENDPOINT_NAME}' is running.")
2828
print(

neuracore/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@
77
from .api.training import * # noqa: F403
88
from .core.exceptions import * # noqa: F403
99

10-
__version__ = "1.6.6"
10+
__version__ = "2.0.0"

neuracore/api/endpoints.py

Lines changed: 61 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -14,85 +14,100 @@
1414

1515
from ..core.auth import get_auth
1616
from ..core.const import API_URL
17-
from ..core.endpoint import EndpointPolicy
18-
from ..core.endpoint import connect_endpoint as _connect_endpoint
19-
from ..core.endpoint import connect_local_endpoint as _connect_local_endpoint
17+
from ..core.endpoint import DirectPolicy, LocalServerPolicy, RemoteServerPolicy
18+
from ..core.endpoint import policy as _policy
19+
from ..core.endpoint import policy_local_server as _policy_local_server
20+
from ..core.endpoint import policy_remote_server as _policy_remote_server
2021

2122

22-
def connect_endpoint(
23-
endpoint_name: str, robot_name: Optional[str] = None, instance: int = 0
24-
) -> EndpointPolicy:
25-
"""Connect to a deployed model endpoint for inference.
23+
def policy(
24+
train_run_name: Optional[str] = None,
25+
model_file: Optional[str] = None,
26+
robot_name: Optional[str] = None,
27+
instance: int = 0,
28+
) -> DirectPolicy:
29+
"""Launch a direct policy that runs the model in-process without any server.
2630
27-
Establishes a connection to a model endpoint that has been deployed on the
28-
Neuracore platform. The endpoint can be used to make predictions with the
29-
deployed model, and data logging is associated with the specified robot.
31+
This is the fastest option with lowest latency since there's no network overhead.
32+
The model runs directly in your Python process.
3033
3134
Args:
32-
endpoint_name: Name of the deployed endpoint to connect to.
35+
train_run_name: Name of the training run to load the model from.
3336
robot_name: Robot name that predictions and data will be associated with.
3437
If not provided, uses the last initialized robot from global state.
3538
instance: Instance number of the robot for multi-instance deployments.
3639
3740
Returns:
38-
Policy object that provides an interface for making predictions
39-
with the deployed model.
41+
DirectPolicy object that provides direct in-process model inference.
4042
4143
Raises:
42-
EndpointError: If the endpoint connection fails due to invalid endpoint
43-
name, authentication issues, or network problems.
44-
ConfigError: If there is an error trying to get the current org
44+
EndpointError: If the model download or initialization fails.
45+
ConfigError: If there is an error trying to get the current org.
4546
"""
46-
return _connect_endpoint(
47-
endpoint_name=endpoint_name, robot_name=robot_name, instance=instance
48-
)
47+
return _policy(train_run_name, model_file, robot_name, instance)
4948

5049

51-
def connect_local_endpoint(
52-
path_to_model: Optional[str] = None,
50+
def policy_local_server(
5351
train_run_name: Optional[str] = None,
52+
model_file: Optional[str] = None,
5453
port: int = 8080,
5554
robot_name: Optional[str] = None,
5655
instance: int = 0,
57-
) -> EndpointPolicy:
58-
"""Connect to a local model endpoint (run locally on your hardware).
56+
host: str = "127.0.0.1",
57+
) -> LocalServerPolicy:
58+
"""Launch and connect to a local server policy.
5959
60-
Establishes a connection to a locally hosted model endpoint. The model can
61-
be specified either by providing a direct path to a .mar model file or by
62-
referencing a training run name. Only one of these options should be provided.
60+
This option provides server-like architecture while maintaining local control.
6361
6462
Args:
65-
path_to_model: Direct file path to a local .mar (Model ARchive) model file.
66-
Mutually exclusive with train_run_name.
67-
train_run_name: Name of a training run to load the model from. The system
68-
will locate and load the model from the specified training run.
69-
Mutually exclusive with path_to_model.
70-
port: TCP port number where the local endpoint is running.
63+
train_run_name: Name of the training run to load the model from.
64+
model_file: Path to the model file to load.
65+
port: TCP port number where the local server will run.
7166
robot_name: Robot name that predictions and data will be associated with.
7267
If not provided, uses the last initialized robot from global state.
7368
instance: Instance number of the robot for multi-instance deployments.
69+
host: Host address to bind the server to. Defaults to localhost.
7470
7571
Returns:
76-
Policy object that provides an interface for making predictions
77-
with the local model.
72+
LocalServerPolicy object that manages a local FastAPI server.
7873
7974
Raises:
80-
EndpointError: If the endpoint connection fails due to invalid model path,
81-
inaccessible port, or conflicting parameters.
82-
ValueError: If both path_to_model and train_run_name are provided, or if
83-
neither is provided.
84-
FileNotFoundError: If the specified model file doesn't exist.
85-
ConfigError: If there is an error trying to get the current org
75+
EndpointError: If the server startup or model initialization fails.
76+
ConfigError: If there is an error trying to get the current org.
8677
"""
87-
return _connect_local_endpoint(
88-
robot_name=robot_name,
89-
instance=instance,
90-
path_to_model=path_to_model,
91-
train_run_name=train_run_name,
92-
port=port,
78+
return _policy_local_server(
79+
train_run_name, model_file, port, robot_name, instance, host
9380
)
9481

9582

83+
def policy_remote_server(
84+
endpoint_name: str,
85+
robot_name: Optional[str] = None,
86+
instance: int = 0,
87+
) -> RemoteServerPolicy:
88+
"""Connects to a policy that is remotely running on neuracore.
89+
90+
Connects to a model endpoint deployed on the Neuracore cloud platform.
91+
The endpoint must be active and accessible.
92+
93+
Args:
94+
endpoint_name: Name of the deployed endpoint to connect to.
95+
robot_name: Robot name that predictions and data will be associated with.
96+
If not provided, uses the last initialized robot from global state.
97+
instance: Instance number of the robot for multi-instance deployments.
98+
99+
Returns:
100+
RemoteServerPolicy object for making predictions with the remote endpoint.
101+
102+
Raises:
103+
EndpointError: If the endpoint connection fails due to invalid endpoint
104+
name, authentication issues, or network problems.
105+
ConfigError: If there is an error trying to get the current org.
106+
"""
107+
return _policy_remote_server(endpoint_name, robot_name, instance)
108+
109+
110+
# Deployment management functions
96111
def deploy_model(job_id: str, name: str) -> dict:
97112
"""Deploy a trained model to a managed endpoint.
98113

neuracore/core/cli/launch_server.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
"""Algorithm validation script for neuracore ML algorithms.
2+
3+
This module provides a command-line tool for validating ML algorithms in an
4+
isolated virtual environment. It creates a temporary venv, installs dependencies,
5+
and runs validation to ensure algorithms meet neuracore requirements.
6+
"""
7+
8+
import argparse
9+
import logging
10+
11+
import neuracore as nc
12+
from neuracore.core.endpoint import policy_local_server
13+
14+
logging.basicConfig(level=logging.INFO)
15+
logger = logging.getLogger(__name__)
16+
17+
18+
def main() -> None:
19+
"""Main entry point for the neuracore-validate command-line tool.
20+
21+
Parses command-line arguments, validates the provided algorithm folder,
22+
and exits with appropriate status code.
23+
24+
Usage:
25+
neuracore-validate <path_to_algorithm_folder>
26+
neuracore-validate --algorithm_folder <path_to_algorithm_folder>
27+
neuracore-validate --algorithm_id <algorithm_id>
28+
29+
Exit codes:
30+
0: Validation succeeded
31+
1: Validation failed or invalid arguments
32+
"""
33+
parser = argparse.ArgumentParser(
34+
description="Validate neuracore ML algorithms",
35+
formatter_class=argparse.RawDescriptionHelpFormatter,
36+
)
37+
38+
parser.add_argument(
39+
"--job_id",
40+
type=str,
41+
help="Job ID to run",
42+
)
43+
parser.add_argument(
44+
"--org_id",
45+
type=str,
46+
help="Organization ID",
47+
)
48+
parser.add_argument(
49+
"--host",
50+
type=str,
51+
default="0.0.0.0",
52+
)
53+
parser.add_argument(
54+
"--port",
55+
type=int,
56+
default=8080,
57+
)
58+
59+
args = parser.parse_args()
60+
61+
nc.login()
62+
nc.set_organization(args.org_id)
63+
policy = policy_local_server(
64+
train_run_name="", # Use job id instead
65+
port=args.port,
66+
host=args.host,
67+
job_id=args.job_id,
68+
)
69+
assert policy.server_process is not None
70+
policy.server_process.wait()
71+
72+
73+
if __name__ == "__main__":
74+
main()

0 commit comments

Comments
 (0)