Skip to content

Commit

Permalink
[OPIK-194] rest of e2e sanity tests, docstrings and architecture clea…
Browse files Browse the repository at this point in the history
…nup (#364)

* trace details test and basic dataset test

* cleanup and finishing up

* changed name of job

* removed on:push

---------

Co-authored-by: Andrei Căutișanu <[email protected]>
  • Loading branch information
AndreiCautisanu and Andrei Căutișanu authored Oct 10, 2024
1 parent f7bd89d commit 2b5846b
Show file tree
Hide file tree
Showing 8 changed files with 216 additions and 32 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/sanity.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
workflow_dispatch:

jobs:
test_installation:
e2e_sanity:
runs-on: ubuntu-20.04

steps:
Expand Down
58 changes: 56 additions & 2 deletions tests_end_to_end/application_sanity/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,17 @@
import os
import opik
import yaml
import json
from opik.configurator.configure import configure
from opik.evaluation import evaluate
from opik.evaluation.metrics import Contains, Equals
from opik import opik_context, track, DatasetItem
from playwright.sync_api import Page

from page_objects.ProjectsPage import ProjectsPage
from page_objects.TracesPage import TracesPage
from page_objects.DatasetsPage import DatasetsPage
from page_objects.ExperimentsPage import ExperimentsPage


@pytest.fixture(scope='session', autouse=True)
Expand All @@ -30,6 +37,42 @@ def client(config):
return opik.Opik(project_name=config['project']['name'], host='http://localhost:5173/api')


@pytest.fixture(scope='function')
def projects_page(page: Page):
projects_page = ProjectsPage(page)
projects_page.go_to_page()
return projects_page


@pytest.fixture(scope='function')
def projects_page_timeout(page: Page):
projects_page = ProjectsPage(page)
projects_page.go_to_page()
projects_page.page.wait_for_timeout(7000)
return projects_page


@pytest.fixture(scope='function')
def traces_page(page: Page, projects_page, config):
projects_page.click_project(config['project']['name'])
traces_page = TracesPage(page)
return traces_page


@pytest.fixture(scope='function')
def datasets_page(page: Page):
datasets_page = DatasetsPage(page)
datasets_page.go_to_page()
return datasets_page


@pytest.fixture(scope='function')
def experiments_page(page: Page):
experiments_page = ExperimentsPage(page)
experiments_page.go_to_page()
return experiments_page


@pytest.fixture(scope='module')
def log_traces_and_spans_low_level(client, config):
"""
Expand Down Expand Up @@ -126,7 +169,7 @@ def make_trace(x):
make_trace(x)


@pytest.fixture(scope='function')
@pytest.fixture(scope='module')
def dataset(config, client):
dataset_config = {
'name': config['dataset']['name'],
Expand All @@ -141,7 +184,7 @@ def dataset(config, client):
return dataset


@pytest.fixture(scope='function')
@pytest.fixture(scope='module')
def create_experiments(config, dataset):
exp_config = {
'prefix': config['experiments']['prefix'],
Expand Down Expand Up @@ -186,3 +229,14 @@ def eval_equals(x: DatasetItem):
scoring_metrics=[equals_metric]
)


@pytest.fixture(scope='function')
def dataset_content(config):
curr_dir = os.path.dirname(__file__)
dataset_filepath = os.path.join(curr_dir, config['dataset']['filename'])

data = []
with open(dataset_filepath, 'r') as f:
for line in f:
data.append(json.loads(line))
return data
126 changes: 99 additions & 27 deletions tests_end_to_end/application_sanity/test_sanity.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,31 @@
import pytest
import json

from playwright.sync_api import Page, expect
from page_objects.ProjectsPage import ProjectsPage
from page_objects.TracesPage import TracesPage

from page_objects.TracesPageSpansMenu import TracesPageSpansMenu
from page_objects.IndividualDatasetPage import IndividualDatasetPage


def test_project_name(projects_page_timeout, log_traces_and_spans_decorator, log_traces_and_spans_low_level):
'''
Checks that the project created via the fixtures exists
Does a timeout of 5 seconds to wait for the traces to show up in UI for later tests (TODO: figure out a better way to do this)
def test_project_name(page: Page, log_traces_and_spans_decorator, log_traces_and_spans_low_level):
projects_page = ProjectsPage(page)
projects_page.go_to_page()
projects_page.check_project_exists('test-project')
1. Open projects page
2. Check the created project exists
'''
projects_page_timeout.check_project_exists('test-project')


def test_traces_created(page, config, log_traces_and_spans_low_level, log_traces_and_spans_decorator):
#navigate to project
projects_page = ProjectsPage(page)
projects_page.go_to_page()

#wait for data to actually arrive to the frontend
#TODO: replace this with a smarter waiting mechanism
page.wait_for_timeout(5000)
projects_page.click_project(config['project']['name'])
def test_traces_created(traces_page, config, log_traces_and_spans_low_level, log_traces_and_spans_decorator):
'''
Checks that every trace defined in the sanity_config file is present in the project
#grab all traces of project
traces_page = TracesPage(page)
1. Open the traces page of the project
2. Grab all the names of the traces (should never set more than 15 in config so 1 page is safe)
3. Check that every possible name of the traces as defined in sanity_config.yaml is present in the names list
'''
trace_names = traces_page.get_all_trace_names()

client_prefix = config['traces']['client']['prefix']
Expand All @@ -33,16 +36,13 @@ def test_traces_created(page, config, log_traces_and_spans_low_level, log_traces
assert prefix+str(count) in trace_names


def test_spans_of_traces(page, config, log_traces_and_spans_low_level, log_traces_and_spans_decorator):
projects_page = ProjectsPage(page)
projects_page.go_to_page()

#wait for data to actually arrive to the frontend
#TODO: replace this with a smarter waiting mechanism
projects_page.click_project(config['project']['name'])

#grab all traces of project
traces_page = TracesPage(page)
def test_spans_of_traces(page, traces_page, config, log_traces_and_spans_low_level, log_traces_and_spans_decorator):
'''
Checks that every trace has the correct number and names of spans defined in the sanity_config.yaml file
1. Open the traces page of the project
2. Go through each trace and click it
3. Check that the spans are present in each trace
'''
trace_names = traces_page.get_all_trace_names()

for trace in trace_names:
Expand All @@ -54,3 +54,75 @@ def test_spans_of_traces(page, config, log_traces_and_spans_low_level, log_trace
spans_menu.check_span_exists_by_name(f'{prefix}{count}')


def test_trace_and_span_details(page, traces_page, config, log_traces_and_spans_low_level, log_traces_and_spans_decorator):
'''
Checks that for each trace and spans, the attributes defined in sanity_config.yaml are present
1. Go through each trace of the project
2. Check the created tags are present
3. Check the created feedback scores are present
4. Check the defined metadata is present
5. Go through each span of the traces and repeat 2-4
'''
trace_names = traces_page.get_all_trace_names()

for trace in trace_names:
page.get_by_text(trace).click()
spans_menu = TracesPageSpansMenu(page)
trace_type = trace.split('-')[0]
tag_names = config['traces'][trace_type]['tags']

for tag in tag_names:
spans_menu.check_tag_exists_by_name(tag)

spans_menu.get_feedback_scores_tab().click()

for score in config['traces'][trace_type]['feedback-scores']:
expect(page.get_by_role('cell', name=score, exact=True)).to_be_visible()
expect(page.get_by_role('cell', name=str(config['traces'][trace_type]['feedback-scores'][score]), exact=True)).to_be_visible()

spans_menu.get_metadata_tab().click()
for md_key in config['traces'][trace_type]['metadata']:
expect(page.get_by_text(f'{md_key}: {config['traces'][trace_type]['metadata'][md_key]}')).to_be_visible()

for count in range(config['spans']['count']):
prefix = config['spans'][trace_type]['prefix']
spans_menu.get_first_span_by_name(f'{prefix}{count}').click()

spans_menu.get_feedback_scores_tab().click()
for score in config['spans'][trace_type]['feedback-scores']:
expect(page.get_by_role('cell', name=score, exact=True)).to_be_visible()
expect(page.get_by_role('cell', name=str(config['spans'][trace_type]['feedback-scores'][score]), exact=True)).to_be_visible()

spans_menu.get_metadata_tab().click()
for md_key in config['spans'][trace_type]['metadata']:
expect(page.get_by_text(f'{md_key}: {config['spans'][trace_type]['metadata'][md_key]}')).to_be_visible()


def test_dataset_name(datasets_page, config, dataset):
'''
Checks that the dataset created via the fixture as defined in sanity_config.yaml is present on the datasets page
'''
datasets_page.check_dataset_exists_by_name(config['dataset']['name'])


def test_dataset_items(page: Page, datasets_page, config, dataset_content):
'''
Checks that the traces created via the fixture and defined in sanity_dataset.jsonl are present within the dataset
'''
datasets_page.select_database_by_name(config['dataset']['name'])

individual_dataset_page = IndividualDatasetPage(page)
for item in dataset_content:
individual_dataset_page.check_cell_exists_by_text(json.dumps(item['input']).replace('{', '{ ').replace('}', ' }'))
individual_dataset_page.check_cell_exists_by_text(json.dumps(item['expected_output']).replace('{', '{ ').replace('}', ' }'))


def test_experiments_exist(experiments_page, config, create_experiments):
'''
Checks that the experiments created via the fixture are present and have the correct values for the metrics (experiments defined in a way to always return the same results)
'''
experiments_page.check_experiment_exists_by_name('test-experiment-Equals')
expect(experiments_page.page.get_by_text('Equals0').first).to_be_visible()

experiments_page.check_experiment_exists_by_name('test-experiment-Contains')
expect(experiments_page.page.get_by_text('Contains1').first).to_be_visible()
17 changes: 17 additions & 0 deletions tests_end_to_end/page_objects/DatasetsPage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from playwright.sync_api import Page, expect

class DatasetsPage:
def __init__(self, page: Page):
self.page = page
self.url = '/default/datasets'

def go_to_page(self):
self.page.goto(self.url)

def select_database_by_name(self, name):
self.page.get_by_text(name, exact=True).first.click()

def check_dataset_exists_by_name(self, dataset_name):
expect(self.page.get_by_text(dataset_name)).to_be_visible()


12 changes: 12 additions & 0 deletions tests_end_to_end/page_objects/ExperimentsPage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from playwright.sync_api import Page, expect

class ExperimentsPage:
def __init__(self, page: Page):
self.page = page
self.url = '/default/experiments'

def go_to_page(self):
self.page.goto(self.url)

def check_experiment_exists_by_name(self, name):
expect(self.page.get_by_text(name).first).to_be_visible()
9 changes: 9 additions & 0 deletions tests_end_to_end/page_objects/IndividualDatasetPage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from playwright.sync_api import Page, expect

class IndividualDatasetPage:
def __init__(self, page: Page):
self.page = page
self.traces_table = page.get_by_role('table')

def check_cell_exists_by_text(self, text):
expect(self.traces_table.get_by_text(text)).to_be_visible()
1 change: 0 additions & 1 deletion tests_end_to_end/page_objects/TracesPage.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,5 @@ def __init__(self, page: Page):

def get_all_trace_names(self):
self.page.wait_for_selector(self.trace_names_selector)

names = self.page.locator(self.trace_names_selector).all_inner_texts()
return names
23 changes: 22 additions & 1 deletion tests_end_to_end/page_objects/TracesPageSpansMenu.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,27 @@
class TracesPageSpansMenu:
def __init__(self, page: Page):
self.page = page
self.input_output_tab = 'Input/Output'
self.feedback_scores_tab = 'Feedback scores'
self.metadata_tab = 'Metadata'

def get_first_trace_by_name(self, name):
return self.page.get_by_role('button', name=name).first

def get_first_span_by_name(self, name):
return self.page.get_by_role('button', name=name).first

def check_span_exists_by_name(self, name):
expect(self.page.get_by_role('button', name=name)).to_be_visible()
expect(self.page.get_by_role('button', name=name)).to_be_visible()

def check_tag_exists_by_name(self, tag_name):
expect(self.page.get_by_text(tag_name)).to_be_visible()

def get_input_output_tab(self):
return self.page.get_by_role('tab', name=self.input_output_tab)

def get_feedback_scores_tab(self):
return self.page.get_by_role('tab', name=self.feedback_scores_tab)

def get_metadata_tab(self):
return self.page.get_by_role('tab', name='Metadata')

0 comments on commit 2b5846b

Please sign in to comment.