|
| 1 | +import base64 |
| 2 | +from io import BytesIO |
| 3 | +from lxml import etree |
| 4 | +import requests |
| 5 | +from pathlib import Path |
| 6 | + |
| 7 | +from bs4 import BeautifulSoup, Tag |
| 8 | + |
| 9 | +from cookbook.helper.HelperFunctions import validate_import_url |
| 10 | +from cookbook.helper.ingredient_parser import IngredientParser |
| 11 | +from cookbook.helper.recipe_url_import import parse_servings, parse_servings_text, parse_time, iso_duration_to_minutes |
| 12 | +from cookbook.integration.integration import Integration |
| 13 | +from cookbook.models import Ingredient, Recipe, Step, Keyword |
| 14 | +from recipe_scrapers import scrape_html |
| 15 | + |
| 16 | + |
| 17 | +class Gourmet(Integration): |
| 18 | + |
| 19 | + def split_recipe_file(self, file): |
| 20 | + encoding = 'utf-8' |
| 21 | + byte_string = file.read() |
| 22 | + text_obj = byte_string.decode(encoding, errors="ignore") |
| 23 | + soup = BeautifulSoup(text_obj, "html.parser") |
| 24 | + return soup.find_all("div", {"class": "recipe"}) |
| 25 | + |
| 26 | + def get_ingredients_recursive(self, step, ingredients, ingredient_parser): |
| 27 | + if isinstance(ingredients, Tag): |
| 28 | + for ingredient in ingredients.children: |
| 29 | + if not isinstance(ingredient, Tag): |
| 30 | + continue |
| 31 | + |
| 32 | + if ingredient.name in ["li"]: |
| 33 | + step_name = "".join(ingredient.findAll(text=True, recursive=False)).strip().rstrip(":") |
| 34 | + |
| 35 | + step.ingredients.add(Ingredient.objects.create( |
| 36 | + is_header=True, |
| 37 | + note=step_name[:256], |
| 38 | + original_text=step_name, |
| 39 | + space=self.request.space, |
| 40 | + )) |
| 41 | + next_ingrediets = ingredient.find("ul", {"class": "ing"}) |
| 42 | + self.get_ingredients_recursive(step, next_ingrediets, ingredient_parser) |
| 43 | + |
| 44 | + else: |
| 45 | + try: |
| 46 | + amount, unit, food, note = ingredient_parser.parse(ingredient.text.strip()) |
| 47 | + f = ingredient_parser.get_food(food) |
| 48 | + u = ingredient_parser.get_unit(unit) |
| 49 | + step.ingredients.add( |
| 50 | + Ingredient.objects.create( |
| 51 | + food=f, |
| 52 | + unit=u, |
| 53 | + amount=amount, |
| 54 | + note=note, |
| 55 | + original_text=ingredient.text.strip(), |
| 56 | + space=self.request.space, |
| 57 | + ) |
| 58 | + ) |
| 59 | + except ValueError: |
| 60 | + pass |
| 61 | + |
| 62 | + def get_recipe_from_file(self, file): |
| 63 | + # 'file' comes is as a beautifulsoup object |
| 64 | + |
| 65 | + source_url = None |
| 66 | + for item in file.find_all('a'): |
| 67 | + if item.has_attr('href'): |
| 68 | + source_url = item.get("href") |
| 69 | + break |
| 70 | + |
| 71 | + name = file.find("p", {"class": "title"}).find("span", {"itemprop": "name"}).text.strip() |
| 72 | + |
| 73 | + recipe = Recipe.objects.create( |
| 74 | + name=name[:128], |
| 75 | + source_url=source_url, |
| 76 | + created_by=self.request.user, |
| 77 | + internal=True, |
| 78 | + space=self.request.space, |
| 79 | + ) |
| 80 | + |
| 81 | + for category in file.find_all("span", {"itemprop": "recipeCategory"}): |
| 82 | + keyword, created = Keyword.objects.get_or_create(name=category.text, space=self.request.space) |
| 83 | + recipe.keywords.add(keyword) |
| 84 | + |
| 85 | + try: |
| 86 | + recipe.servings = parse_servings(file.find("span", {"itemprop": "recipeYield"}).text.strip()) |
| 87 | + except AttributeError: |
| 88 | + pass |
| 89 | + |
| 90 | + try: |
| 91 | + prep_time = file.find("span", {"itemprop": "prepTime"}).text.strip().split() |
| 92 | + prep_time[0] = prep_time[0].replace(',', '.') |
| 93 | + if prep_time[1].lower() in ['stunde', 'stunden', 'hour', 'hours']: |
| 94 | + prep_time_min = int(float(prep_time[0]) * 60) |
| 95 | + elif prep_time[1].lower() in ['tag', 'tage', 'day', 'days']: |
| 96 | + prep_time_min = int(float(prep_time[0]) * 60 * 24) |
| 97 | + else: |
| 98 | + prep_time_min = int(prep_time[0]) |
| 99 | + recipe.waiting_time = prep_time_min |
| 100 | + except AttributeError: |
| 101 | + pass |
| 102 | + |
| 103 | + try: |
| 104 | + cook_time = file.find("span", {"itemprop": "cookTime"}).text.strip().split() |
| 105 | + cook_time[0] = cook_time[0].replace(',', '.') |
| 106 | + if cook_time[1].lower() in ['stunde', 'stunden', 'hour', 'hours']: |
| 107 | + cook_time_min = int(float(cook_time[0]) * 60) |
| 108 | + elif cook_time[1].lower() in ['tag', 'tage', 'day', 'days']: |
| 109 | + cook_time_min = int(float(cook_time[0]) * 60 * 24) |
| 110 | + else: |
| 111 | + cook_time_min = int(cook_time[0]) |
| 112 | + |
| 113 | + recipe.working_time = cook_time_min |
| 114 | + except AttributeError: |
| 115 | + pass |
| 116 | + |
| 117 | + for cuisine in file.find_all('span', {'itemprop': 'recipeCuisine'}): |
| 118 | + cuisine_name = cuisine.text |
| 119 | + keyword = Keyword.objects.get_or_create(space=self.request.space, name=cuisine_name) |
| 120 | + if len(keyword): |
| 121 | + recipe.keywords.add(keyword[0]) |
| 122 | + |
| 123 | + for category in file.find_all('span', {'itemprop': 'recipeCategory'}): |
| 124 | + category_name = category.text |
| 125 | + keyword = Keyword.objects.get_or_create(space=self.request.space, name=category_name) |
| 126 | + if len(keyword): |
| 127 | + recipe.keywords.add(keyword[0]) |
| 128 | + |
| 129 | + step = Step.objects.create( |
| 130 | + instruction='', |
| 131 | + space=self.request.space, |
| 132 | + show_ingredients_table=self.request.user.userpreference.show_step_ingredients, |
| 133 | + ) |
| 134 | + |
| 135 | + ingredient_parser = IngredientParser(self.request, True) |
| 136 | + |
| 137 | + ingredients = file.find("ul", {"class": "ing"}) |
| 138 | + self.get_ingredients_recursive(step, ingredients, ingredient_parser) |
| 139 | + |
| 140 | + instructions = file.find("div", {"class": "instructions"}) |
| 141 | + if isinstance(instructions, Tag): |
| 142 | + for instruction in instructions.children: |
| 143 | + if not isinstance(instruction, Tag) or instruction.text == "": |
| 144 | + continue |
| 145 | + if instruction.name == "h3": |
| 146 | + if step.instruction: |
| 147 | + step.save() |
| 148 | + recipe.steps.add(step) |
| 149 | + step = Step.objects.create( |
| 150 | + instruction='', |
| 151 | + space=self.request.space, |
| 152 | + ) |
| 153 | + |
| 154 | + step.name = instruction.text.strip()[:128] |
| 155 | + else: |
| 156 | + if instruction.name == "div": |
| 157 | + for instruction_step in instruction.children: |
| 158 | + for br in instruction_step.find_all("br"): |
| 159 | + br.replace_with("\n") |
| 160 | + step.instruction += instruction_step.text.strip() + ' \n\n' |
| 161 | + |
| 162 | + notes = file.find("div", {"class": "modifications"}) |
| 163 | + if notes: |
| 164 | + for n in notes.children: |
| 165 | + if n.text == "": |
| 166 | + continue |
| 167 | + if n.name == "h3": |
| 168 | + step.instruction += f'*{n.text.strip()}:* \n\n' |
| 169 | + else: |
| 170 | + for br in n.find_all("br"): |
| 171 | + br.replace_with("\n") |
| 172 | + |
| 173 | + step.instruction += '*' + n.text.strip() + '* \n\n' |
| 174 | + |
| 175 | + description = '' |
| 176 | + try: |
| 177 | + description = file.find("div", {"id": "description"}).text.strip() |
| 178 | + except AttributeError: |
| 179 | + pass |
| 180 | + if len(description) <= 512: |
| 181 | + recipe.description = description |
| 182 | + else: |
| 183 | + recipe.description = description[:480] + ' ... (full description below)' |
| 184 | + step.instruction += '*Description:* \n\n*' + description + '* \n\n' |
| 185 | + |
| 186 | + step.save() |
| 187 | + recipe.steps.add(step) |
| 188 | + |
| 189 | + # import the Primary recipe image that is stored in the Zip |
| 190 | + try: |
| 191 | + image_path = file.find("img").get("src") |
| 192 | + image_filename = image_path.split("\\")[1] |
| 193 | + |
| 194 | + for f in self.import_zip.filelist: |
| 195 | + zip_file_name = Path(f.filename).name |
| 196 | + if image_filename == zip_file_name: |
| 197 | + image_file = self.import_zip.read(f) |
| 198 | + image_bytes = BytesIO(image_file) |
| 199 | + self.import_recipe_image(recipe, image_bytes, filetype='.jpeg') |
| 200 | + break |
| 201 | + except Exception as e: |
| 202 | + print(recipe.name, ': failed to import image ', str(e)) |
| 203 | + |
| 204 | + recipe.save() |
| 205 | + return recipe |
| 206 | + |
| 207 | + def get_files_from_recipes(self, recipes, el, cookie): |
| 208 | + raise NotImplementedError('Method not implemented in storage integration') |
| 209 | + |
| 210 | + def get_file_from_recipe(self, recipe): |
| 211 | + raise NotImplementedError('Method not implemented in storage integration') |
0 commit comments