Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions Assignment 1/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Use a lightweight Python image
FROM python:3.11-slim

# Set the working directory inside the container
WORKDIR /app

# Copy the required files individually
COPY Q1_final_def.py /app/
COPY Q1_w_testcase.py /app/
COPY spec.json /app/

# Install required dependencies
RUN pip install --no-cache-dir --upgrade pip && \
pip install faker

# Install required dependencies
RUN pip install --no-cache-dir --upgrade pip && \
pip install pytest

# Set the default command to run the main script
CMD ["python", "Q1_w_testcase.py"]
79 changes: 79 additions & 0 deletions Assignment 1/Q1_final_def.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@

"""
@author: anam
"""

import csv
import json
from faker import Faker
import os



def parse_write_write():



script_dir = os.path.dirname(os.path.abspath(__file__))
spec_file = os.path.join(script_dir, "spec.json")


#spec_file = "/Users/anam/Documents/Assignment/fixed_width_parser/spec.json"
fixed_width_file = os.path.join(script_dir, "dummydata.txt")
csv_output_file = os.path.join(script_dir, "dummyparsed.csv")

if os.path.exists(spec_file):
print("Spec file exists!")
else:
print("Spec file does not exist!")


# Step 1: Read spec.json
with open(spec_file, 'r', encoding='utf-8') as f:
spec = json.load(f)

column_names = spec["ColumnNames"]
column_widths = [int(offset_val) for offset_val in spec["Offsets"]]
fixed_width_encoding = spec["FixedWidthEncoding"]
delimited_encoding = spec["DelimitedEncoding"]
header = spec.get("IncludeHeader", "false").lower() == "true"

fake = Faker()

# Step 2: Write dummy data to a fixed-width file
with open(fixed_width_file, 'w', encoding=fixed_width_encoding) as fw_file:
for i in range(50):
row_data = []
row_data = [fake.word()[:width].ljust(width, '_') for width in column_widths]
#row_data = [''.join(fake.random_letters(width)) for width in column_widths]

row = "".join(row_data)
fw_file.write(row + "\n")
print("Fixed width file generated!")

# Step 3: Read fixed-width file and write to CSV
with open(fixed_width_file, 'r', encoding=fixed_width_encoding) as infile, open(csv_output_file, 'w', newline='', encoding=delimited_encoding) as outfile:
writer = csv.writer(outfile, delimiter='|')


if header:
writer.writerow(column_names)
#print("header inlcuded")

for line in infile:
start = 0
row = []
for width in column_widths:
row.append(line[start:start+width].strip())
start = width + start
writer.writerow(row)
print("CSV file generated using fixed width file!")


def main():
parse_write_write()


main()


72 changes: 72 additions & 0 deletions Assignment 1/Q1_w_testcase.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: anam
"""

import os
import csv
import pytest
from Q1_final_def import parse_write_write

script_dir = os.path.dirname(os.path.abspath(__file__))
spec_file = os.path.join(script_dir, "spec.json")
fixed_width_file = os.path.join(script_dir, "dummydata.txt")
csv_output_file = os.path.join(script_dir, "dummyparsed.csv")
column_widths = [5, 12, 3, 2, 13, 7, 10, 13, 20, 13]
total_rows = 51



# Test 1: Check if script runs without any errors
def test_for_errors():
try:
parse_write_write()
except Exception as e:
pytest.fail(f"Error found: {e}")


# Test 2-a: Check if fixed width file is created
def test_fixed_width_file_created():
assert os.path.exists(fixed_width_file), "Fixed width file was not created"
assert os.path.getsize(fixed_width_file) > 0, "Error: The fixed width file is empty!"
print("Test Case 1 Passed, fixed width file is created!")


# Test 2-b: Check if CSV file is created
def test_csv_file_created():
assert os.path.exists(csv_output_file), "CSV file was not created"
print("Test Case 2 Passed, CSV file is created!")



# Test 3: Check row count in CSV
def test_total_row_count():
with open(csv_output_file, 'r', encoding='utf-8') as csvfile:
read_file = list(csv.reader(csvfile, delimiter='|'))
assert len(read_file) == total_rows, f"Error: original file rows: {total_rows}, csv rows : {len(read_file)}"
print("Test Case 3 Passed,Check row count in CSV!")


# Test 4: Check column widths in generated fixed width file
def test_column_widths():
with open(fixed_width_file, 'r', encoding='utf-8') as txtfile:
for line in txtfile:
assert len(line.strip()) == sum(column_widths), "Generated fixed width data has incorrect column lengths"
print("Test Case 4 Passed,Check column widths in generated fixed width file!")


# Test 5: Validation of fixed width and CSV
def test_data_validation():
with open(fixed_width_file, 'r', encoding='utf-8') as txtfile, open(csv_output_file, 'r', encoding='utf-8') as csvfile:
txtfile_reader = txtfile.readlines()
csv_reader = list(csv.reader(csvfile, delimiter='|'))[1:]
for txt_row, csv_row in zip(txtfile_reader, csv_reader):
reconstructed = "".join([col.ljust(width)[:width] for col, width in zip(csv_row, column_widths)])
assert txt_row.strip() == reconstructed.strip(), "Mismatch found between fixed width and CSV output"
print("Test Case 5 Passed,Validation of fixed width and CSV!")

print("All test cases passed!")



50 changes: 50 additions & 0 deletions Assignment 1/dummydata.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
anothduring______coabecommon_______experieleader____enter________alone_______________I____________
even_leader______letlosingle_______hospitawife______five_________option______________leg__________
hundrpattern_____notsosure_________authoricompany___home_________from________________guy__________
rathecontain_____payawlarge________worker_off_______which________ground______________fear_________
knowlbig_________howdeabout________wait___fill______notice_______try_________________stop_________
well_college_____beyheresponse_____organizexist_____technology___certainly___________range________
join_reality_____he_desound________articlerise______capital______economy_____________high_________
rangeindustry____briseage__________attentigreat_____red__________information_________successful___
electparty_______forpawhose________actuallfollow____service______person______________only_________
guessclose_______sixalbuilding_____ahead__forward___family_______finish______________local________
stocklist________setsttraditional__upon___however___student______goal________________as___________
dark_every_______imaI_art__________order__seat______parent_______thousand____________become_______
cold_that________trydabreak________intervimedia_____degree_______several_____________how__________
deteraround______watustend_________matter_fine______speech_______his_________________film_________
prepathat________waipoanswer_______wall___seven_____soon_________hot_________________former_______
schooworld_______posmufear_________church_such______important____cold________________power________
officthing_______parrothan_________similarsomething_father_______rate________________type_________
receipush________attpocollege______public_since_____stop_________policy______________sort_________
shoulcapital_____polcecreate_______assume_run_______sport________staff_______________drug_________
identplant_______daumome___________resourconly______high_________page________________seem_________
closepeace_______lanspstrategy_____my_____foreign___Mr___________couple______________pressure_____
ten__recent______sigburequire______far____certainly_everything___partner_____________blue_________
democtrade_______effwemeeting______ground_wide______for__________gun_________________own__________
truthforce_______pubbrthing________purposechallenge_down_________career______________imagine______
hospidebate______impderesearch_____servicewall______peace________contain_____________other________
smallhuman_______Cononsenior_______millionimprove___today________themselves__________method_______
beat_eat_________litroguess________kind___agency____gun__________each________________fact_________
full_body________senbecommercial___electiowhite_____add__________better______________west_________
returhold________thrbualong________those__source____subject______another_____________with_________
mightbaby________guelarate_________if_____democraticother________light_______________represent____
intersong________couunmake_________anyone_food______build________conference__________give_________
teachhusband_____greberecently_____foot___itself____part_________partner_____________item_________
latereither______admpureturn_______matter_nothing___Congress_____would_______________leg__________
factoother_______flyfostory________yes____garden____another______move________________dark_________
gas__require_____hotpothough_______next___plan______thousand_____soldier_____________whom_________
securinstitution_blaglpush_________read___good______brother______property____________gas__________
weighnow_________fincaclaim________trainintrue______economic_____stay________________sure_________
patteidentify____styimpopulation___messagecell______worker_______example_____________summer_______
say__kitchen_____loolapublic_______accordiart_______station______accept______________teach________
direcanother_____witpolot__________you____argue_____pay__________crime_______________agency_______
way__term________trilestart________body___picture___address______able________________ten__________
partncandidate___intgiforce________later__half______charge_______develop_____________more_________
he___recent______theyaprepare______be_____outside___skin_________example_____________before_______
theorcontinue____youthyet__________leave__leader____others_______size________________seek_________
lose_reveal______attcofill_________at_____case______imagine______ahead_______________cut__________
laughgeneration__posthable_________month__first_____amount_______fear________________type_________
Congrme__________weepoanalysis_____requireonto______individual___town________________thing________
Repubfour________condatask_________lot____model_____realize______require_____________ability______
descrapply_______samcawhite________bank___necessary_let__________main________________box__________
free_college_____casaclook_________short__try_______wife_________poor________________commercial___
51 changes: 51 additions & 0 deletions Assignment 1/dummyparsed.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
f1|f2|f3|f4|f5|f6|f7|f8|f9|f10
anoth|during______|coa|be|common_______|experie|leader____|enter________|alone_______________|I____________
even_|leader______|let|lo|single_______|hospita|wife______|five_________|option______________|leg__________
hundr|pattern_____|not|so|sure_________|authori|company___|home_________|from________________|guy__________
rathe|contain_____|pay|aw|large________|worker_|off_______|which________|ground______________|fear_________
knowl|big_________|how|de|about________|wait___|fill______|notice_______|try_________________|stop_________
well_|college_____|bey|he|response_____|organiz|exist_____|technology___|certainly___________|range________
join_|reality_____|he_|de|sound________|article|rise______|capital______|economy_____________|high_________
range|industry____|bri|se|age__________|attenti|great_____|red__________|information_________|successful___
elect|party_______|for|pa|whose________|actuall|follow____|service______|person______________|only_________
guess|close_______|six|al|building_____|ahead__|forward___|family_______|finish______________|local________
stock|list________|set|st|traditional__|upon___|however___|student______|goal________________|as___________
dark_|every_______|ima|I_|art__________|order__|seat______|parent_______|thousand____________|become_______
cold_|that________|try|da|break________|intervi|media_____|degree_______|several_____________|how__________
deter|around______|wat|us|tend_________|matter_|fine______|speech_______|his_________________|film_________
prepa|that________|wai|po|answer_______|wall___|seven_____|soon_________|hot_________________|former_______
schoo|world_______|pos|mu|fear_________|church_|such______|important____|cold________________|power________
offic|thing_______|par|ro|than_________|similar|something_|father_______|rate________________|type_________
recei|push________|att|po|college______|public_|since_____|stop_________|policy______________|sort_________
shoul|capital_____|pol|ce|create_______|assume_|run_______|sport________|staff_______________|drug_________
ident|plant_______|dau|mo|me___________|resourc|only______|high_________|page________________|seem_________
close|peace_______|lan|sp|strategy_____|my_____|foreign___|Mr___________|couple______________|pressure_____
ten__|recent______|sig|bu|require______|far____|certainly_|everything___|partner_____________|blue_________
democ|trade_______|eff|we|meeting______|ground_|wide______|for__________|gun_________________|own__________
truth|force_______|pub|br|thing________|purpose|challenge_|down_________|career______________|imagine______
hospi|debate______|imp|de|research_____|service|wall______|peace________|contain_____________|other________
small|human_______|Con|on|senior_______|million|improve___|today________|themselves__________|method_______
beat_|eat_________|lit|ro|guess________|kind___|agency____|gun__________|each________________|fact_________
full_|body________|sen|be|commercial___|electio|white_____|add__________|better______________|west_________
retur|hold________|thr|bu|along________|those__|source____|subject______|another_____________|with_________
might|baby________|gue|la|rate_________|if_____|democratic|other________|light_______________|represent____
inter|song________|cou|un|make_________|anyone_|food______|build________|conference__________|give_________
teach|husband_____|gre|be|recently_____|foot___|itself____|part_________|partner_____________|item_________
later|either______|adm|pu|return_______|matter_|nothing___|Congress_____|would_______________|leg__________
facto|other_______|fly|fo|story________|yes____|garden____|another______|move________________|dark_________
gas__|require_____|hot|po|though_______|next___|plan______|thousand_____|soldier_____________|whom_________
secur|institution_|bla|gl|push_________|read___|good______|brother______|property____________|gas__________
weigh|now_________|fin|ca|claim________|trainin|true______|economic_____|stay________________|sure_________
patte|identify____|sty|im|population___|message|cell______|worker_______|example_____________|summer_______
say__|kitchen_____|loo|la|public_______|accordi|art_______|station______|accept______________|teach________
direc|another_____|wit|po|lot__________|you____|argue_____|pay__________|crime_______________|agency_______
way__|term________|tri|le|start________|body___|picture___|address______|able________________|ten__________
partn|candidate___|int|gi|force________|later__|half______|charge_______|develop_____________|more_________
he___|recent______|the|ya|prepare______|be_____|outside___|skin_________|example_____________|before_______
theor|continue____|you|th|yet__________|leave__|leader____|others_______|size________________|seek_________
lose_|reveal______|att|co|fill_________|at_____|case______|imagine______|ahead_______________|cut__________
laugh|generation__|pos|th|able_________|month__|first_____|amount_______|fear________________|type_________
Congr|me__________|wee|po|analysis_____|require|onto______|individual___|town________________|thing________
Repub|four________|con|da|task_________|lot____|model_____|realize______|require_____________|ability______
descr|apply_______|sam|ca|white________|bank___|necessary_|let__________|main________________|box__________
free_|college_____|cas|ac|look_________|short__|try_______|wife_________|poor________________|commercial___
10 changes: 10 additions & 0 deletions Assignment 1/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Parse fixed width file
Generate a fixed width file using the provided spec (offset provided in the spec file represent the length of each field).
Implement a parser that can parse the fixed width file and generate a delimited file, like CSV for example.
DO NOT use python libraries like pandas for parsing. You can use the standard library to write out a csv file (If you feel like)
Language choices (Python or Scala)
Deliver source via github or bitbucket
Bonus points if you deliver a docker container (Dockerfile) that can be used to run the code (too lazy to install stuff that you might use)
Pay attention to encoding
prerequisites:
Requires installation of faker
29 changes: 29 additions & 0 deletions Assignment 1/spec.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"ColumnNames": [
"f1",
"f2",
"f3",
"f4",
"f5",
"f6",
"f7",
"f8",
"f9",
"f10"
],
"Offsets": [
"5",
"12",
"3",
"2",
"13",
"7",
"10",
"13",
"20",
"13"
],
"FixedWidthEncoding": "windows-1252",
"IncludeHeader": "tRue",
"DelimitedEncoding": "utf-8"
}
Loading