Skip to content

Add Forced Alignment Client and Models (#120) #121

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/rev_ai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@
CaptionType, GroupChannelsType, CustomVocabulary, TopicExtractionJob, TopicExtractionResult, \
Topic, Informant, SpeakerName, LanguageIdentificationJob, LanguageIdentificationResult, \
LanguageConfidence, SentimentAnalysisResult, SentimentValue, SentimentMessage, \
SentimentAnalysisJob, CustomerUrlData, RevAiApiDeploymentConfigMap, RevAiApiDeployment
SentimentAnalysisJob, CustomerUrlData, RevAiApiDeploymentConfigMap, RevAiApiDeployment, \
ForcedAlignmentJob, ForcedAlignmentResult, Monologue, ElementAlignment
85 changes: 85 additions & 0 deletions src/rev_ai/forced_alignment_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# -*- coding: utf-8 -*-
"""Client used for interacting with our forced alignment api"""

import json
from .generic_api_client import GenericApiClient
from .models.forced_alignment import ForcedAlignmentJob, ForcedAlignmentResult


class ForcedAlignmentClient(GenericApiClient):
"""Client for interacting with the Rev AI forced alignment api"""

# Default version of Rev AI forced alignment api
api_version = 'v1'

# Default api name of Rev AI forced alignment api
api_name = 'alignment'

def __init__(self, access_token):
"""Constructor

:param access_token: access token which authorizes all requests and links them to your
account. Generated on the settings page of your account dashboard
on Rev AI.
"""
GenericApiClient.__init__(self, access_token, self.api_name, self.api_version,
ForcedAlignmentJob.from_json, ForcedAlignmentResult.from_json)

def submit_job_url(
self,
source_config=None,
source_transcript_config=None,
transcript_text=None,
metadata=None,
delete_after_seconds=None,
notification_config=None,
language=None):
"""Submit a job to the Rev AI forced alignment api.

:param source_config: CustomerUrlData object containing url of the source media and
optional authentication headers to use when accessing the source url
:param source_transcript_config: CustomerUrlData object containing url of the transcript file and
optional authentication headers to use when accessing the transcript url
:param transcript_text: The text of the transcript to be aligned (no punctuation, just words)
:param metadata: info to associate with the alignment job
:param delete_after_seconds: number of seconds after job completion when job is auto-deleted
:param notification_config: CustomerUrlData object containing the callback url to
invoke on job completion as a webhook and optional authentication headers to use when
calling the callback url
:param language: Language code for the audio and transcript. One of: "en", "es", "fr"
:returns: ForcedAlignmentJob object
:raises: HTTPError
"""
if not source_config:
raise ValueError('source_config must be provided')
if not (source_transcript_config or transcript_text):
raise ValueError('Either source_transcript_config or transcript_text must be provided')
if source_transcript_config and transcript_text:
raise ValueError('Only one of source_transcript_config or transcript_text may be provided')

payload = self._enhance_payload({
'source_config': source_config.to_dict() if source_config else None,
'source_transcript_config': source_transcript_config.to_dict() if source_transcript_config else None,
'transcript_text': transcript_text,
'language': language
}, metadata, None, delete_after_seconds, notification_config)

return self._submit_job(payload)

def get_result_json(self, id_):
"""Get result of a forced alignment job as json.

:param id_: id of job to be requested
:returns: job result data as raw json
:raises: HTTPError
"""
return self._get_result_json(id_, {}, route='transcript')

def get_result_object(self, id_):
"""Get result of a forced alignment job as ForcedAlignmentResult object.

:param id_: id of job to be requested
:returns: job result data as ForcedAlignmentResult object
:raises: HTTPError
"""
return self._get_result_object(id_, {}, route='transcript')
8 changes: 4 additions & 4 deletions src/rev_ai/generic_api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def get_list_of_jobs(self, limit=None, starting_after=None):

return [self.parse_job_info(job) for job in response.json()]

def _get_result_json(self, id_, params):
def _get_result_json(self, id_, params, route='result'):
"""Get the result of a job. This method is special in that it is intended to be hidden by
the implementation this is done because python standard is to pass options individually
instead of as an object and our true clients should match this standard
Expand All @@ -124,12 +124,12 @@ def _get_result_json(self, id_, params):

response = self._make_http_request(
"GET",
urljoin(self.base_url, 'jobs/{0}/result?{1}'.format(id_, '&'.join(query_params)))
urljoin(self.base_url, 'jobs/{0}/{1}?{2}'.format(id_, route, '&'.join(query_params)))
)

return response.json()

def _get_result_object(self, id_, params):
def _get_result_object(self, id_, params, route='result'):
"""Get the result of a job. This method is special in that it is intended to be hidden by
the implementation this is done because python standard is to pass options individually
instead of as an object and our true clients should match this standard
Expand All @@ -138,7 +138,7 @@ def _get_result_object(self, id_, params):
:returns: job result data as object
:raises: HTTPError
"""
return self.parse_job_result(self._get_result_json(id_, params))
return self.parse_job_result(self._get_result_json(id_, params, route))

def delete_job(self, id_):
"""Delete a specific job
Expand Down
1 change: 1 addition & 0 deletions src/rev_ai/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
from .language_id import LanguageIdentificationJob, LanguageIdentificationResult, LanguageConfidence
from .customer_url_data import CustomerUrlData
from .revaiapi_deployment_config_constants import RevAiApiDeployment, RevAiApiDeploymentConfigMap
from .forced_alignment import ForcedAlignmentJob, ForcedAlignmentResult, Monologue, ElementAlignment
6 changes: 6 additions & 0 deletions src/rev_ai/models/forced_alignment/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""Module containing models for Rev AI forced alignment"""

from .forced_alignment_job import ForcedAlignmentJob
from .forced_alignment_result import ForcedAlignmentResult, Monologue, ElementAlignment

__all__ = ['ForcedAlignmentJob', 'ForcedAlignmentResult', 'Monologue', 'ElementAlignment']
67 changes: 67 additions & 0 deletions src/rev_ai/models/forced_alignment/forced_alignment_job.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-
"""Contains ForcedAlignmentJob dataclass"""

from typing import Dict, Any
from ..asynchronous.job_status import JobStatus


class ForcedAlignmentJob:
def __init__(
self, id_, created_on, status,
completed_on=None,
callback_url=None,
metadata=None,
media_url=None,
failure=None,
failure_detail=None,
processed_duration_seconds=None,
delete_after_seconds=None):
"""Dataclass containing information about a Rev AI forced alignment job

:param id: unique identifier for this job
:param status: current job status
:param created_on: date and time at which this job was created
:param completed_on: date and time at which this job was completed
:param metadata: customer-provided metadata
:param type: type of job (always "alignment")
:param media_url: URL of the media to be aligned
:param failure: details about job failure if status is "failed"
"""
self.id = id_
self.created_on = created_on
self.status = status
self.completed_on = completed_on
self.callback_url = callback_url
self.metadata = metadata
self.media_url = media_url
self.failure = failure
self.failure_detail = failure_detail
self.processed_duration_seconds = processed_duration_seconds
self.delete_after_seconds = delete_after_seconds

def __eq__(self, other):
"""Override default equality operator"""
if isinstance(other, self.__class__):
return self.__dict__ == other.__dict__
return False

@classmethod
def from_json(cls, json: Dict[str, Any]) -> 'ForcedAlignmentJob':
"""Alternate constructor used for parsing json

:param json: json dictionary to convert
:returns: ForcedAlignmentJob
"""
return cls(
id_=json['id'],
created_on=json['created_on'],
status=JobStatus.from_string(json['status']),
completed_on=json.get('completed_on'),
callback_url=json.get('callback_url'),
metadata=json.get('metadata'),
media_url=json.get('media_url'),
failure=json.get('failure'),
failure_detail=json.get('failure_detail'),
processed_duration_seconds=json.get('processed_duration_seconds'),
delete_after_seconds=json.get('delete_after_seconds')
)
78 changes: 78 additions & 0 deletions src/rev_ai/models/forced_alignment/forced_alignment_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# -*- coding: utf-8 -*-
"""Contains ForcedAlignmentResult dataclass"""

from dataclasses import dataclass
from typing import List, Dict, Any


@dataclass
class ElementAlignment:
"""Dataclass containing information about an aligned word

:param value: the word that was aligned
:param ts: start time of the word in seconds
:param end_ts: end time of the word in seconds
:param type: type of element (always "text")
"""
value: str
ts: float
end_ts: float
type: str = "text"

@staticmethod
def from_json(json: Dict[str, Any]) -> 'ElementAlignment':
"""Creates an ElementAlignment from the given json dictionary

:param json: json dictionary to convert
:returns: ElementAlignment
"""
return ElementAlignment(
value=json.get('value'),
ts=json.get('ts'),
end_ts=json.get('end_ts'),
type=json.get('type', 'text')
)


@dataclass
class Monologue:
"""Dataclass containing information about a monologue section

:param speaker: speaker identifier
:param elements: list of words in this monologue with timing information
"""
speaker: int
elements: List[ElementAlignment]

@staticmethod
def from_json(json: Dict[str, Any]) -> 'Monologue':
"""Creates a Monologue from the given json dictionary

:param json: json dictionary to convert
:returns: Monologue
"""
return Monologue(
speaker=json.get('speaker', 0),
elements=[ElementAlignment.from_json(element) for element in json.get('elements', [])]
)


@dataclass
class ForcedAlignmentResult:
"""Dataclass containing the result of a forced alignment job

:param monologues: A Monologue object per speaker containing the words
they spoke with timing information
"""
monologues: List[Monologue]

@staticmethod
def from_json(json: Dict[str, Any]) -> 'ForcedAlignmentResult':
"""Creates a ForcedAlignmentResult from the given json dictionary

:param json: json dictionary to convert
:returns: ForcedAlignmentResult
"""
return ForcedAlignmentResult(
monologues=[Monologue.from_json(monologue) for monologue in json.get('monologues', [])]
)
1 change: 1 addition & 0 deletions test.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
import src.rev_ai.apiclient as client
from src.rev_ai.forced_alignment_client import ForcedAlignmentClient
Loading