From 5823a231a496adcd8922182f2b5d836ae07113f4 Mon Sep 17 00:00:00 2001 From: Y <> Date: Sun, 27 Sep 2020 00:23:04 +0100 Subject: [PATCH] Added parser for akispetretzikis.com --- parsers/__init__.py | 2 ++ parsers/akispetretzikis.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 parsers/akispetretzikis.py diff --git a/parsers/__init__.py b/parsers/__init__.py index ed2cff0..39dc0bd 100644 --- a/parsers/__init__.py +++ b/parsers/__init__.py @@ -13,6 +13,7 @@ from parsers.kochbar import Kochbar from parsers.hostthetoast import Hostthetoast from parsers.thewoksoflife import Thewoksoflife from parsers.glebekitchen import GlebeKitchen +from parsers.akispetretzikis import AkisPetretzikis # Must exclude the "www" portion of the URL PARSERS = { @@ -30,6 +31,7 @@ PARSERS = { 'kochbar.de' : Kochbar, 'thewoksoflife.com': Thewoksoflife, 'glebekitchen.com': GlebeKitchen, + 'akispetretzikis.com': AkisPetretzikis } def getParser(domain): diff --git a/parsers/akispetretzikis.py b/parsers/akispetretzikis.py new file mode 100644 index 0000000..18175ed --- /dev/null +++ b/parsers/akispetretzikis.py @@ -0,0 +1,35 @@ +import json +from re import split +from parsers.recipe import Recipe + + +class AkisPetretzikis(Recipe): + + def get_json_recipe(self, d): + recipe = {} + if d['@type'] == 'Recipe': + recipe['name'] = d['name'] + recipe['description'] = d['description'] + recipe['ingredients'] = d['recipeIngredient'] + recipe['instructions'] = split(r'\r\n', d['recipeInstructions']) + recipe['instructions'] = [instruction for instruction in recipe['instructions'] if instruction] + recipe['image'] = d['image'] + + return recipe + + def Parse(self, url): + recipe = {} + recipe['url'] = url + recipe['source'] = 'akispetretzikis.com' + + soup = self.fetch_soup(url) + + results = soup.find_all('script', {'type': 'application/ld+json'}) + for result in results: + d = json.loads(result.contents[0]) + if d['@type'].lower() == 'recipe': + parsed_recipe = self.get_json_recipe(d) + recipe.update(parsed_recipe) + else: + continue + return recipe