Skip to content

Commit

Permalink
Merge pull request #32 from workforce-data-initiative/ksa_distinction
Browse files Browse the repository at this point in the history
Add ONET KSA distinctions to skills_master [Resolves #30]
  • Loading branch information
tweddielin committed Mar 3, 2017
2 parents 774a5d5 + 9fff23c commit b1d1b05
Show file tree
Hide file tree
Showing 14 changed files with 628 additions and 428 deletions.
15 changes: 10 additions & 5 deletions algorithms/skill_extractors/onet_ksas.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def __init__(self, onet_source, output_filename, hash_function):
self.onet_source = onet_source
self.hash_function = hash_function

def onet_to_pandas(self, filename, col_name, use_relevance=True):
def onet_to_pandas(self, filename, col_name, ksa_type, use_relevance=True):
"""
Args:
filename: an unpathed filename referring to an ONET skill file
Expand Down Expand Up @@ -48,6 +48,9 @@ def onet_to_pandas(self, filename, col_name, use_relevance=True):
else:
onet = [row for row in csv.DictReader(f, delimiter='\t')]
onet = pd.DataFrame(onet)
if ksa_type:
col_name = col_name + ['ksa_type']
onet['ksa_type'] = ksa_type

for col in col_name:
onet[col] = onet[col].astype(str).str.lower()
Expand All @@ -61,18 +64,19 @@ def run(self):
nlp = NLPTransforms()
# create dataframes for each KSA type
standard_columns = ['O*NET-SOC Code', 'Element ID', 'Element Name']
skills = self.onet_to_pandas('Skills.txt', standard_columns)
ability = self.onet_to_pandas('Abilities.txt', standard_columns)
knowledge = self.onet_to_pandas('Knowledge.txt', standard_columns)
skills = self.onet_to_pandas('Skills.txt', standard_columns, 'skill')
ability = self.onet_to_pandas('Abilities.txt', standard_columns, 'ability')
knowledge = self.onet_to_pandas('Knowledge.txt', standard_columns, 'knowledge')
tools = self.onet_to_pandas(
'Tools and Technology.txt',
['O*NET-SOC Code', 'Commodity Code', 'T2 Example'],
'tool',
use_relevance=False
)

# Concat KSA dataframes into one table
# note significant duplications since it's by ONET SOC Code
new_columns = ['O*NET-SOC Code', 'Element ID', 'ONET KSA']
new_columns = ['O*NET-SOC Code', 'Element ID', 'ONET KSA', 'ksa_type']
skills.columns = new_columns
ability.columns = new_columns
knowledge.columns = new_columns
Expand All @@ -83,6 +87,7 @@ def run(self):
onet_modelreference = self.onet_to_pandas(
'Content Model Reference.txt',
['Element ID', 'Description'],
ksa_type=None,
use_relevance=False
)

Expand Down
65 changes: 65 additions & 0 deletions api_sync/v1/alembic.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# A generic, single database configuration.

[alembic]
# path to migration scripts
script_location = alembic

# template used to generate migration files
# file_template = %%(rev)s_%%(slug)s

# max length of characters to apply to the
# "slug" field
#truncate_slug_length = 40

# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false

# set to 'true' to allow .pyc and .pyo files without
# a source .py file to be detected as revisions in the
# versions/ directory
# sourceless = false

# version location specification; this defaults
# to alembic/versions. When using multiple version
# directories, initial revisions must be specified with --version-path
# version_locations = %(here)s/bar %(here)s/bat alembic/versions

# the output encoding used when revision files
# are written from script.py.mako
# output_encoding = utf-8

# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic

[handlers]
keys = console

[formatters]
keys = generic

[logger_root]
level = WARN
handlers = console
qualname =

[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine

[logger_alembic]
level = INFO
handlers =
qualname = alembic

[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic

[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S
1 change: 1 addition & 0 deletions api_sync/v1/alembic/README
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Generic single-database configuration.
75 changes: 75 additions & 0 deletions api_sync/v1/alembic/env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from __future__ import with_statement
from alembic import context
from sqlalchemy import engine_from_config, pool
from logging.config import fileConfig
from utils.db import get_apiv1_dburl
from api_sync.v1.models import Base

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config

# Interpret the config file for Python logging.
# This line sets up loggers basically.
fileConfig(config.config_file_name)

# add your model's MetaData object here
# for 'autogenerate' support
# from myapp import mymodel
# target_metadata = mymodel.Base.metadata
target_metadata = Base.metadata

# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.

db_url = str(get_apiv1_dburl())
config.set_main_option('sqlalchemy.url', db_url)


def run_migrations_offline():
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
context.configure(
url=db_url,
target_metadata=target_metadata, literal_binds=True)

with context.begin_transaction():
context.run_migrations()


def run_migrations_online():
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""
connectable = engine_from_config(
config.get_section(config.config_ini_section),
prefix='sqlalchemy.',
poolclass=pool.NullPool)

with connectable.connect() as connection:
context.configure(
connection=connection,
target_metadata=target_metadata
)

with context.begin_transaction():
context.run_migrations()

if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()
23 changes: 23 additions & 0 deletions api_sync/v1/alembic/script.py.mako
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""${message}

Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}

"""
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}

# revision identifiers, used by Alembic.
revision = ${repr(up_revision)}
down_revision = ${repr(down_revision)}
branch_labels = ${repr(branch_labels)}
depends_on = ${repr(depends_on)}

def upgrade():
${upgrades if upgrades else "pass"}


def downgrade():
${downgrades if downgrades else "pass"}
23 changes: 23 additions & 0 deletions api_sync/v1/alembic/versions/d523e7e45456_added_ksa_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""Added ksa_type
Revision ID: d523e7e45456
Revises:
Create Date: 2017-02-27 15:21:53.018866
"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = 'd523e7e45456'
down_revision = None
branch_labels = None
depends_on = None

def upgrade():
op.add_column('skills_master', sa.Column('ksa_type', sa.String))


def downgrade():
op.drop_column('skills_master', 'ksa_type')
8 changes: 1 addition & 7 deletions api_sync/v1/models/skills_master.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,10 @@ class SkillMaster(Base):

uuid = db.Column(db.String, primary_key=True)
skill_name = db.Column(db.String)
ksa_type = db.Column(db.String)
onet_element_id = db.Column(db.String)
description = db.Column(db.String)
nlp_a = db.Column(db.String)

def __init__(self, uuid, skill_name, onet_element_id, description, nlp_a):
self.uuid = uuid
self.skill_name = skill_name
self.onet_element_id = onet_element_id
self.description = description
self.nlp_a = nlp_a

def __repr__(self):
return '<uuid {}>'.format(self.uuid)
6 changes: 3 additions & 3 deletions api_sync/v1/skills_master.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ def load_skills_master(filename, db_engine):
reader = csv.DictReader(f, delimiter='\t')
session = sessionmaker(db_engine)()
for row in reader:
skill_master = SkillMaster(
session.merge(SkillMaster(
uuid=row['skill_uuid'],
skill_name=row['ONET KSA'],
ksa_type=row['ksa_type'],
onet_element_id=row['Element ID'],
description=row['Description'],
nlp_a=row['nlp_a']
)
session.merge(skill_master)
))
session.commit()
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ Sqlalchemy
testing.postgresql
psycopg2
mock
alembic
Loading

0 comments on commit b1d1b05

Please sign in to comment.