import time import re from selenium import webdriver from bs4 import BeautifulSoup DRIVER_PATH = "/Users/michaeltang/.wdm/drivers/chromedriver/88.0.4324.96/mac64/chromedriver" URL = "https://vpl.bibliocommons.com/events/search/index" # This loads webdriver from the local machine if it exists. browser = webdriver.Chrome(DRIVER_PATH) browser.get(URL) # Give the browser time to load all content. time.sleep(4) button = browser.find_element_by_css_selector(".btn-lg") for i in range(0,20): button.click() ''' If you see the following error increase the sleep time: ElementClickInterceptedError: element click intercepted: ''' print("Count: ", str(i)) time.sleep(4) print("done loop") content = browser.find_elements_by_css_selector(".event-row") for e in content: textContent = e.get_attribute('innerHTML') # Beautiful soup allows us to remove HTML tags from our content if it exists soup = BeautifulSoup(textContent, features="lxml") rawString = soup.get_text().strip() # Remove hidden characters for tabs and new lines. rawString = re.sub(r"[\n\t]*", "", rawString) # Replace two or more consecutive empty spaces with '*' rawString = re.sub('[ ]{2,}', '*', rawString) #Fine tune the results os they can be parsed. rawString = rawString.replace("Location", "Location*") rawString = rawString.replace("Registration closed", "Registration closed*") rawString = rawString.replace("Registration required", "Registration required*") rawString = rawString.replace("In Progress", "*In Progress*") rawString = rawString.replace("*/*", "/") rawString = rawString.replace("Full*", "*Full*") #print(rawString) eventArray = rawString.split('*') EVENT_NAME = 0 EVENT_DATE = 1 EVENT_TIME = 2 eventName = eventArray[EVENT_NAME] eventDate = eventArray[EVENT_DATE].strip() # remove leading and trailing spaces eventTime = eventArray[EVENT_TIME].strip() # remove leading and trailing spaces location = eventArray[len(eventArray)-1] print("Name: " + eventName) print("Date: " + eventDate) print("Time: " + eventTime) print("Location: " + location) print("***")