-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdata-validation.py
executable file
·136 lines (109 loc) · 4.13 KB
/
data-validation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
""" Check how long strings are, practice parsing ingredients."""
import json
import re
# JSON Fixes from Original API
# Added TJ's in several items increasing abridged ingredients (3 large eggs)
# Found and replace all 1300 apostrophe's that were slanted apostrophes
# Fixed minor bugs based on my print
with open('data/recipes.json') as f:
recipe_data = json.loads(f.read())
print("\n\n*****\n\nFIRST DATA POINT ONLY\n\n")
print(recipe_data[0])
print("\n\nFIRST DATA POINT ONLY\n\n*****\n\n")
# Found longest_img_url = 107
longest_img_url = 0
for recipe in recipe_data:
length_recipe_url = len(recipe['img'])
if length_recipe_url > longest_img_url:
longest_img_url = length_recipe_url
# print(longest_img_url)
# Found longest_recipe_id = 20
longest_recipe_id = 0
for recipe in recipe_data:
length_recipe_id = len(recipe['id'])
if length_recipe_id > longest_recipe_id:
longest_recipe_id = length_recipe_id
# print(longest_recipe_id)
# Found longest_title = 63
longest_title = 0
for recipe in recipe_data:
length_title = len(recipe['title'])
if length_title > longest_title:
longest_title = length_title
# print(longest_title)
# Found longest_cooktime = 36
longest_cooktime = 0
for recipe in recipe_data:
length_cooktime = len(recipe['cookingTime'])
if length_cooktime > longest_cooktime:
longest_cooktime = length_cooktime
# print(longest_cooktime)
# Found longest_serves = 7
longest_serves = 0
for recipe in recipe_data:
length_serves = len(recipe['serves'])
if length_serves > longest_serves:
longest_serves = length_serves
# print(longest_serves)
# Found longest_preptime = 16
# Found bug with "whitespace Print Recipe" attached to preptime
# Fixed 11 bugs in recipes.json
longest_preptime = 0
longest_preptime_text = ""
for recipe in recipe_data:
length_preptime = len(recipe['prepTime'])
if length_preptime > longest_preptime:
longest_preptime = length_preptime
longest_preptime_text = recipe['prepTime']
# print(longest_preptime)
# print(longest_preptime_text)
# Found longest_tagids = 9
longest_tagids = 0
for recipe in recipe_data:
length_tagids = len(recipe['tagIds'])
if length_tagids > longest_tagids:
longest_tagids = length_tagids
# print(longest_tagids)
# THIS IS WHERE I WILL PRACTICE PARSING DATA FOR INGREDIENTS
# for recipe in recipe_data:
# longest_ingredient_length = 84
# longest_detailed_ingredient_name = 197
abridged_ingredients_dict = {}
detailed_ingredients_dict = {}
longest_ingredient_name = 0
longest_detailed_ingredient_name = 0
for recipe in recipe_data:
recipe_id = recipe['id']
for detailed_ingredient in recipe['ingredients']:
# Detailed Dictionary
if recipe['id'] not in detailed_ingredients_dict:
detailed_ingredients_dict[recipe['id']] = []
detailed_ingredients_dict[recipe['id']].append(detailed_ingredient)
# Longest Detailed Ingredient Length
if len(detailed_ingredient) > longest_detailed_ingredient_name:
longest_detailed_ingredient_name = len(detailed_ingredient)
# Split detailed ingredient
ingredient_split = re.split('TJ\'s |, ', detailed_ingredient)
# print(ingredient_split)
# Abridged Dictionary
if len(ingredient_split) > 1:
if recipe['id'] not in abridged_ingredients_dict:
abridged_ingredients_dict[recipe['id']] = []
abridged_ingredient = ingredient_split[1]
abridged_ingredients_dict[recipe['id']].append(abridged_ingredient)
# print(ingredient_split[1])
# Find longest ingredient
if len(ingredient_split[1]) > longest_ingredient_name:
longest_ingredient_name = len(ingredient_split[1])
# print(longest_detailed_ingredient_name)
# print(longest_ingredient_name)
# print(next(iter(detailed_ingredients_dict.items())))
# print(next(iter(abridged_ingredients_dict.items())))
with open('data/tags.json') as g:
tag_data = json.loads(g.read())
# Find longest tag
longest_tag_name = 0
for tag in tag_data:
if len(tag['name']) > longest_tag_name:
longest_tag_name = len(tag['name'])
#print(longest_tag_name)