diff --git a/.gitignore b/.gitignore index f347387..f35a3bb 100644 --- a/.gitignore +++ b/.gitignore @@ -128,4 +128,6 @@ dmypy.json # Pyre type checker .pyre/ -.vscode/ \ No newline at end of file +# IDEs +.vscode/ +.idea/ diff --git a/parsers/__init__.py b/parsers/__init__.py index 53b01ea..3fe6c6c 100644 --- a/parsers/__init__.py +++ b/parsers/__init__.py @@ -15,6 +15,7 @@ from parsers.thewoksoflife import Thewoksoflife from parsers.glebekitchen import GlebeKitchen from parsers.akispetretzikis import AkisPetretzikis from parsers.hervecuisine import Hervecuisine +from parsers.fattoincasadabenedetta import Fattoincasadabenedetta # Must exclude the "www" portion of the URL PARSERS = { @@ -33,7 +34,8 @@ PARSERS = { 'thewoksoflife.com': Thewoksoflife, 'glebekitchen.com': GlebeKitchen, 'akispetretzikis.com': AkisPetretzikis, - 'hervecuisine.com': Hervecuisine + 'hervecuisine.com': Hervecuisine, + 'fattoincasadabenedetta.it': Fattoincasadabenedetta, } def getParser(domain): diff --git a/parsers/fattoincasadabenedetta.py b/parsers/fattoincasadabenedetta.py new file mode 100644 index 0000000..9f8c128 --- /dev/null +++ b/parsers/fattoincasadabenedetta.py @@ -0,0 +1,55 @@ +from parsers.recipe import Recipe + +class Fattoincasadabenedetta(Recipe): + + def parse_microformat(self, soup): + recipe = {} + + title = soup.find('h1', {'class': 'entry-title'}) + recipe['name'] = title.contents[0] + + result = soup.find('meta', {'property': 'og:description'}) + recipe['description'] = result['content'] + + times = soup.find('div', {'class': 'recipe-time-box'}).find_all('li') + for time in times: + time_element = time.find('span', {'class': 'time-text'}).contents[0] + time_value = time.find('span', {'class': ''}).contents[0] + recipe['description'] += "\n" + str(time_element) + ' ' + str(time_value) + + result = soup.find('meta', {'property': 'og:image'}) + recipe['image'] = result['content'] + + recipe['ingredients'] = [] + ingredients = soup.find_all('li', {'class': 'wpurp-recipe-ingredient'}) + for ingredient in ingredients: + quantity = ingredient.find('span', {'class': 'wpurp-recipe-ingredient-quantity recipe-ingredient-quantity'}).string or '' + unit = ingredient.find('span', {'class': 'wpurp-recipe-ingredient-unit recipe-ingredient-unit'}).string or '' + name_element = ingredient.find('span', {'class': 'wpurp-recipe-ingredient-name recipe-ingredient-name'}) + name = name_element.contents[0] + notes = '' + if len(name_element.contents) > 1: + notes = name_element.contents[1].contents[0] + + recipe['ingredients'].append(quantity + ' ' + unit + ' ' + name + notes) + + recipe['instructions'] = [] + instructions = soup.find_all('li', {'class': 'wpurp-recipe-instruction'}) + for instruction in instructions: + if instruction.contents[0].string is not None: + recipe['instructions'].append(instruction.contents[0].string) + + return recipe + + def Parse(self, url): + recipe = {} + recipe['url'] = url + recipe['source'] = 'fattoincasadabenedetta.it' + + soup = self.fetch_soup(url) + parsed_recipe = self.parse_microformat(soup) + recipe.update(parsed_recipe) + + print(soup.len) + + return recipe \ No newline at end of file