-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathweb_scraping.py
36 lines (27 loc) · 1.16 KB
/
web_scraping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import requests
from bs4 import BeautifulSoup
# Get the URL from the user
url = input("Enter the URL of the website: ")
# Send an HTTP request to the specified website
# Set verify to False to disable SSL certificate verification (Note: This can be a security risk)
response = requests.get(url, verify=False) # Disable SSL certificate verification for testing purposes
# If the request is successful, continue
if response.status_code == 200:
# You can use the content
content = response.content
# Parse the HTML content
soup = BeautifulSoup(content, "html.parser")
# Get all unique tag names in the HTML
all_tags = set(tag.name for tag in soup.find_all())
# Display all tag names for user reference
tag_names_string = ', '.join(f'"{tag}"' for tag in all_tags)
print(f"Available tag names: {tag_names_string}")
# Get the tag from the user
tag_name = input("Enter the HTML tag you want to select: ")
# Select the HTML tags based on user input
tags = soup.find_all(tag_name)
# Iterate over the tags
for tag in tags:
# Get the tag content or attributes
print(tag.text)
print(tag_name+":", tag["href"])