Set 1
Analyze the Ebay Money
Is there any money in Avocado on ebay?
Analyze this link: https://il.ebay.com/b/Avocado-Trees/19617/bn_71618245
Create a list of the names of all items sold on this page. No need to scroll to the next pages.
Import library:
import bs4 as bs
import urllib.request
Real URL:
myurl = "https://ebay.com/b/Avocado-Trees/19617/bn_71618245"
source = urllib.request.urlopen(myurl).read()
#Lihi's note: Moshik used lxml but that didn't work for me
soup = bs.BeautifulSoup(source,'html.parser')
Select title from html tag
titles = soup.find_all('h3', class_='s-item__title')
Create a list of all the item prices sold on this page. Again, focus only on this first page of results.
prices = soup.find_all('span', class_='s-item__price')
Combine both lists into a dataframe and print the dataframe. It should show the item names and prices.
my_df = pd.DataFrame({'title' : titles,
'price' : prices
},
columns=['title','price'])
my_df
output:
Set 2:
Scrap from multiple URLs
import requests
from bs4 import *
import pandas as pd
# Define the URLs to scrape
urls = ['https://www.timesunion.com/', 'https://www.cleveland.com/', 'https://newsadvance.com/']
# Define the data to be scraped
data = []
# Loop through the URLs
for url in urls:
# Send a GET request to the website
response = requests.get(url)
# Parse the HTML content using Beautiful Soup
soup = BeautifulSoup(response.content, 'html.parser')
# Extract the data
if 'timesunion.com' in url:
# Scrape for Senator Liz Krueger
krueger = soup.find('a', string='Krueger')
krueger_policy = krueger.find_next('div', class_='card-body').get_text().strip()
data.append({'website': url, 'name': 'Senator Liz Krueger', 'policy': krueger_policy})
elif 'cleveland.com' in url:
# Scrape for Representative Jason Stephens
stephens = soup.find('a', string='Jason Stephens').find_parent('div', class_='col-sm-12')
stephens_policy = stephens.find('div', class_='politician-card__description').get_text().strip()
data.append({'website': url, 'name': 'Representative Jason Stephens', 'policy': stephens_policy})
elif 'newsadvance.com' in url:
# Scrape for Delegate Kathy J. Byron
byron = soup.find('a', string='Kathy J. Byron')
byron_policy = byron.find_next('div', class_='card-body').get_text().strip()
data.append({'website': url, 'name': 'Delegate Kathy J. Byron', 'policy': byron_policy})
# Create a Pandas DataFrame from the data
df = pd.DataFrame(data)
# Write the data to a CSV file
df.to_csv('politician_environmental_policies.csv', index=False)
Comments