added fattoincasadabenedetta.it

2021-02-06 21:39:46 +01:00 · 2021-02-06 21:39:46 +01:00 · d06cd2ef31
commit d06cd2ef31
parent 1440c144a5
3 changed files with 61 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@ -128,4 +128,6 @@ dmypy.json
 # Pyre type checker
 .pyre/

-.vscode/
+# IDEs
+.vscode/
+.idea/
--- a/parsers/init.py
+++ b/parsers/init.py
@ -15,6 +15,7 @@ from parsers.thewoksoflife import Thewoksoflife
 from parsers.glebekitchen import GlebeKitchen
 from parsers.akispetretzikis import AkisPetretzikis
 from parsers.hervecuisine import Hervecuisine
+from parsers.fattoincasadabenedetta import Fattoincasadabenedetta

 # Must exclude the "www" portion of the URL
 PARSERS = {
@ -33,7 +34,8 @@ PARSERS = {
    'thewoksoflife.com': Thewoksoflife,
    'glebekitchen.com': GlebeKitchen,
    'akispetretzikis.com': AkisPetretzikis,
-    'hervecuisine.com': Hervecuisine
+    'hervecuisine.com': Hervecuisine,
+    'fattoincasadabenedetta.it': Fattoincasadabenedetta,
 }

 def getParser(domain):
--- a/parsers/fattoincasadabenedetta.py
+++ b/parsers/fattoincasadabenedetta.py
@ -0,0 +1,55 @@
+from parsers.recipe import Recipe
+
+class Fattoincasadabenedetta(Recipe):
+
+    def parse_microformat(self, soup):
+        recipe = {}
+
+        title = soup.find('h1', {'class': 'entry-title'})
+        recipe['name'] = title.contents[0]
+
+        result = soup.find('meta', {'property': 'og:description'})
+        recipe['description'] = result['content']
+
+        times = soup.find('div', {'class': 'recipe-time-box'}).find_all('li')
+        for time in times:
+            time_element = time.find('span', {'class': 'time-text'}).contents[0]
+            time_value = time.find('span', {'class': ''}).contents[0]
+            recipe['description'] += "\n" + str(time_element) + ' ' + str(time_value)
+
+        result = soup.find('meta', {'property': 'og:image'})
+        recipe['image'] = result['content']
+
+        recipe['ingredients'] = []
+        ingredients = soup.find_all('li', {'class': 'wpurp-recipe-ingredient'})
+        for ingredient in ingredients:
+            quantity = ingredient.find('span', {'class': 'wpurp-recipe-ingredient-quantity recipe-ingredient-quantity'}).string or ''
+            unit = ingredient.find('span', {'class': 'wpurp-recipe-ingredient-unit recipe-ingredient-unit'}).string or ''
+            name_element = ingredient.find('span', {'class': 'wpurp-recipe-ingredient-name recipe-ingredient-name'})
+            name = name_element.contents[0]
+            notes = ''
+            if len(name_element.contents) > 1:
+                notes = name_element.contents[1].contents[0]
+
+            recipe['ingredients'].append(quantity + ' ' + unit + ' ' + name + notes)
+
+        recipe['instructions'] = []
+        instructions = soup.find_all('li', {'class': 'wpurp-recipe-instruction'})
+        for instruction in instructions:
+            if instruction.contents[0].string is not None:
+                recipe['instructions'].append(instruction.contents[0].string)
+
+        return recipe
+
+    def Parse(self, url):
+        recipe = {}
+        recipe['url'] = url
+        recipe['source'] = 'fattoincasadabenedetta.it'
+
+        soup = self.fetch_soup(url)
+        parsed_recipe = self.parse_microformat(soup)
+        recipe.update(parsed_recipe)
+
+        print(soup.len)
+
+        return recipe