# Prerequisites
# 1. download the latest version of python3
# 2. use the following scripts to create a virtual env
# python -m venv <folder_name>
# 3. Enter the virtual env
# source <folder_name>/Scripts/activate
import selenium
# print selenium version
print(selenium.__version__)
# load web-driver
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36"')
browser = webdriver.Chrome(executable_path='./chromedriver',chrome_options=options)
# get raw data from browser
import time
browser.get("https://shopee.tw/mall/search?keyword=iphone%2013")
### scroll automatically
for y in range(0, 10000, 500):
browser.execute_script(f"window.scrollTo(0, {y})")
time.sleep(0.5)
sourceRaw = browser.page_source
# handle the data from string to html structure
import requests
from bs4 import BeautifulSoup
soup = BeautifulSoup(sourceRaw, "html.parser")
products = []
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
for item in soup.select('.shopee-search-item-result__item a'):
link = f"https://shopee.tw{item['href']}"
browser.get(link)
WebDriverWait(browser, 8).until(
EC.visibility_of_element_located(
(By.CLASS_NAME, 'attM6y')
)
)
soup = BeautifulSoup(browser.page_source, "html.parser")
product = {}
product['url'] = link
product['name'] = soup.select('.product-briefing .attM6y span')[0].text
product['price'] = soup.select('.product-briefing .Ybrg9j')[0].text
products.append(product)
print('all products on the page 1:', products)
# quit browser
browser.quit()