Skip to content

Commit

Permalink
Merge branch 'main' into add-local-runtime
Browse files Browse the repository at this point in the history
  • Loading branch information
enyst authored Nov 30, 2024
2 parents a136ab1 + 4c432d3 commit af56cea
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 18 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/integration-runner.yml
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,8 @@ jobs:
id: create_comment
uses: KeisukeYamashita/create-comment@v1
with:
# if triggered by PR, use PR number, otherwise use 5077 as fallback issue number for manual triggers
number: ${{ github.event_name == 'pull_request' && github.event.pull_request.number || 5077 }}
# if triggered by PR, use PR number, otherwise use 5318 as fallback issue number for manual triggers
number: ${{ github.event_name == 'pull_request' && github.event.pull_request.number || 5318 }}
unique: false
comment: |
Trigger by: ${{ github.event_name == 'pull_request' && format('Pull Request (integration-test label on PR #{0})', github.event.pull_request.number) || (github.event_name == 'workflow_dispatch' && format('Manual Trigger: {0}', github.event.inputs.reason)) || 'Nightly Scheduled Run' }}
Expand All @@ -155,4 +155,4 @@ jobs:
DeepSeek LLM Test Results:
${{ env.INTEGRATION_TEST_REPORT_DEEPSEEK }}
---
Download evaluation outputs (includes both Haiku and DeepSeek results): [Download](${{ steps.upload_results_artifact.outputs.artifact-url }})
Download testing outputs (includes both Haiku and DeepSeek results): [Download](${{ steps.upload_results_artifact.outputs.artifact-url }})
2 changes: 1 addition & 1 deletion docs/src/components/HomepageHeader/HomepageHeader.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ export function HomepageHeader() {
<a href="https://codecov.io/github/All-Hands-AI/OpenHands?branch=main"><img alt="CodeCov" src="https://img.shields.io/codecov/c/github/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" /></a>
<a href="https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE"><img src="https://img.shields.io/github/license/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="MIT License" /></a>
<br/>
<a href="https://join.slack.com/t/opendevin/shared_invite/zt-2oikve2hu-UDxHeo8nsE69y6T7yFX_BA"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community" /></a>
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2tom0er4l-JeNUGHt_AxpEfIBstbLPiw"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community" /></a>
<a href="https://discord.gg/ESHStjSjD4"><img src="https://img.shields.io/badge/Discord-Join%20Us-purple?logo=discord&logoColor=white&style=for-the-badge" alt="Join our Discord community" /></a>
<a href="https://github.com/All-Hands-AI/OpenHands/blob/main/CREDITS.md"><img src="https://img.shields.io/badge/Project-Credits-blue?style=for-the-badge&color=FFE165&logo=github&logoColor=white" alt="Credits" /></a>
<br/>
Expand Down
11 changes: 10 additions & 1 deletion evaluation/integration_tests/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,8 @@ def load_integration_tests() -> pd.DataFrame:
)

df = pd.read_json(output_file, lines=True, orient='records')

# record success and reason for failure for the final report
df['success'] = df['test_result'].apply(lambda x: x['success'])
df['reason'] = df['test_result'].apply(lambda x: x['reason'])
logger.info('-' * 100)
Expand All @@ -231,9 +233,16 @@ def load_integration_tests() -> pd.DataFrame:
)
logger.info('-' * 100)

# record cost for each instance, with 3 decimal places
df['cost'] = df['metrics'].apply(lambda x: round(x['accumulated_cost'], 3))
logger.info(f'Total cost: USD {df["cost"].sum():.2f}')

report_file = os.path.join(metadata.eval_output_dir, 'report.md')
with open(report_file, 'w') as f:
f.write(
f'Success rate: {df["success"].mean():.2%} ({df["success"].sum()}/{len(df)})\n'
)
f.write(df[['instance_id', 'success', 'reason']].to_markdown(index=False))
f.write(f'\nTotal cost: USD {df["cost"].sum():.2f}\n')
f.write(
df[['instance_id', 'success', 'reason', 'cost']].to_markdown(index=False)
)
2 changes: 1 addition & 1 deletion openhands/runtime/impl/e2b/sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from glob import glob

from e2b import Sandbox as E2BSandbox
from e2b.sandbox.exception import TimeoutException
from e2b.sandbox import TimeoutException

from openhands.core.config import SandboxConfig
from openhands.core.logger import openhands_logger as logger
Expand Down
10 changes: 1 addition & 9 deletions openhands/server/socket.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from openhands.events.observation import (
NullObservation,
)
from openhands.events.observation.error import ErrorObservation
from openhands.events.serialization import event_to_dict
from openhands.events.stream import AsyncEventStreamWrapper
from openhands.server.auth import get_sid_from_token, sign_token
Expand Down Expand Up @@ -42,14 +41,7 @@ async def init_connection(connection_id: str, data: dict):
if token:
sid = get_sid_from_token(token, config.jwt_secret)
if sid == '':
await sio.emit(
'oh_event',
event_to_dict(
ErrorObservation(
content='Invalid token! Please ensure a valid jwt_secret is specified or use -e JWT_TOKEN when running with Docker.'
)
),
)
await sio.emit('oh_event', {'error': 'Invalid token', 'error_code': 401})
return
logger.info(f'Existing session: {sid}')
else:
Expand Down
11 changes: 9 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ numpy = "*"
json-repair = "*"
browsergym = "0.10.2" # integrate browsergym as the browsing interface
html2text = "*"
e2b = "^0.17.1"
e2b = ">=0.17.1,<1.1.0"
pexpect = "*"
jinja2 = "^3.1.3"
python-multipart = "*"
Expand Down Expand Up @@ -97,6 +97,7 @@ reportlab = "*"
[tool.coverage.run]
concurrency = ["gevent"]


[tool.poetry.group.runtime.dependencies]
jupyterlab = "*"
notebook = "*"
Expand Down Expand Up @@ -127,6 +128,7 @@ ignore = ["D1"]
[tool.ruff.lint.pydocstyle]
convention = "google"


[tool.poetry.group.evaluation.dependencies]
streamlit = "*"
whatthepatch = "*"
Expand Down

0 comments on commit af56cea

Please sign in to comment.