From 713ec3094eacd2043ff61e24476bc0d6ed6a0c0c Mon Sep 17 00:00:00 2001
From: Ethan White <ethan@weecology.org>
Date: Tue, 16 Apr 2024 22:40:07 -0400
Subject: [PATCH] Use requests instead of urllib.request.urlopen

The Python website started serving webpages that were gzipped, which broke the more basic approach.
requests handles this automatically and is the widely accepted library for this type of work.

Fixes #106
---
 offlinedatasci/main.py | 15 ++++++++-------
 pyproject.toml         |  1 +
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/offlinedatasci/main.py b/offlinedatasci/main.py
index 0d0fcd3..7505f0f 100644
--- a/offlinedatasci/main.py
+++ b/offlinedatasci/main.py
@@ -11,6 +11,7 @@
 import urllib.request, urllib.error, urllib.parse
 import importlib_resources
 import pypi_mirror
+import requests
 import shutil
 import sys
 import warnings
@@ -189,8 +190,8 @@ def download_rstudio(ods_dir):
     destination_path = Path(Path(ods_dir), Path("rstudio"))
     if not os.path.isdir(destination_path):
         os.makedirs(destination_path)
-    fp = urllib.request.urlopen(baseurl)
-    web_content = fp.read()
+    fp = requests.get(baseurl)
+    web_content = fp.content
     soup = bs.BeautifulSoup(web_content, 'lxml')
     links = soup.find_all('a')
     for link in links:
@@ -214,8 +215,8 @@ def download_python(ods_dir):
     if not os.path.isdir(destination_path):
         os.makedirs(destination_path)
     python_versions = {}
-    fp = urllib.request.urlopen(url)
-    web_content = fp.read()
+    fp = requests.get(url)
+    web_content = fp.content
     soup = bs.BeautifulSoup(web_content, 'lxml')
     r_studio_download_table = soup.find_all('table')[download_table_num]
     table_body = r_studio_download_table.find('tbody')
@@ -239,7 +240,7 @@ def find_r_current_version(url):
     url -- CRAN r-project URL
     """
     version_regex = "(R\-\d+\.\d+\.\d)+\-(?:x86_64|arm64|win)\.(?:exe|pkg)"
-    urlfile = urllib.request.urlopen(url)
+    urlfile = requests.get(url)
     for line in urlfile:
         decoded = line.decode("utf-8") 
         match = re.findall(version_regex, decoded)
@@ -297,8 +298,8 @@ def get_ods_dir(directory=Path.home()):
 def get_python_download_page():
     """Get download page from Python homepage."""
     base_url="https://www.python.org"
-    fp = urllib.request.urlopen(base_url)
-    web_content = fp.read()
+    fp = requests.get(base_url)
+    web_content = fp.content
     soup = bs.BeautifulSoup(web_content, "html.parser")
     release_a_tag = soup.find("a", href=lambda href: href and "release" in href)
     current_release_path = release_a_tag["href"]
diff --git a/pyproject.toml b/pyproject.toml
index f13f0c7..df9d3f6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,6 +43,7 @@ dependencies = [
   'importlib_resources',
   'lxml',
   'python-pypi-mirror',
+  'requests',
   'setuptools'
 ]