Skip to content

Commit

Permalink
feat: add retry mechanism around main scraper fetch method
Browse files Browse the repository at this point in the history
  • Loading branch information
Flexicon committed Dec 30, 2024
1 parent 4a2f982 commit 325d6e7
Showing 1 changed file with 20 additions and 9 deletions.
29 changes: 20 additions & 9 deletions scraper/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@
from bs4.element import Tag
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from tenacity import Retrying, RetryError, retry_if_exception_type, stop_after_attempt
from tenacity import (
Retrying,
RetryError,
retry,
retry_if_exception_type,
stop_after_attempt,
wait_exponential,
)

from common.models import Champion

Expand All @@ -22,6 +29,12 @@ def __init__(self) -> None:
self.driver.set_page_load_timeout(10)
self.driver.implicitly_wait(5)

@retry(
stop=stop_after_attempt(3),
retry=retry_if_exception_type(TimeoutException),
wait=wait_exponential(min=1, max=5),
reraise=True,
)
def fetch_content_html(self, url: str, *, selector: str = ".main") -> str:
print(f"Fetching html from: {url}")
self.driver.get(url)
Expand Down Expand Up @@ -56,14 +69,12 @@ def _scrape_traits_for_character(driver: ScraperWebDriver, character: Tag) -> li
return traits_cache[url]

try:
for attempt in Retrying(
stop=stop_after_attempt(3), retry=retry_if_exception_type(TimeoutException)
):
with attempt:
html = driver.fetch_content_html(url)
traits = traits_cache[url] = _extract_traits_from_character_html(html)
except RetryError:
traits = []
html = driver.fetch_content_html(url)
except TimeoutException:
print(f"Timeout fetching champion traits from: {url}")
return []

traits = traits_cache[url] = _extract_traits_from_character_html(html)
return traits


Expand Down

0 comments on commit 325d6e7

Please sign in to comment.