-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnew_fx.py
54 lines (43 loc) · 1.73 KB
/
new_fx.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# --- testing f(x) ---
# Import Splinter, Pandas and BeautifulSoup
from splinter import Browser
from bs4 import BeautifulSoup as soup
import pandas as pd
import datetime as dt
def mars_hemi(browser):
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
# visit the site with the hemisphere photos
hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemi_url)
hemi_html = browser.html
hemi_soup = soup(hemi_html, "html.parser")
h3_tag = hemi_soup.find_all('h3')
h3_tags = []
for text in h3_tag:
h3_tags.append(text.get_text())
# Initialize list of dictionaries
img_list = []
# Create for loop to parse data
# h3 tags refere to each image title
for item in h3_tags:
# click on the link
browser.visit(hemi_url)
full_img_elem = browser.find_by_text(item, wait_time=1)
full_img_elem.click()
html = browser.html
img_soup = soup(html, 'html.parser')
# Find the more info button and click that
try:
browser.is_text_present('Open', wait_time=1)
open_elem = browser.links.find_by_partial_text('Open')
open_elem.click()
except:
pass
img_rel_url = img_soup.select_one('img.wide-image').get('src') # do this right 'img.wide-image'???
# img says look for class img, .wide-image says get
img_url = f'https://astrogeology.usgs.gov{img_rel_url}'
img_title = img_soup.find('h2', class_='title').get_text()
img_dict = {'img_url':img_url, 'title':img_title}
img_list.append(img_dict)
return img_list