vpl.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. import time
  2. import re
  3. from selenium import webdriver
  4. from bs4 import BeautifulSoup
  5. DRIVER_PATH = "/Users/michaeltang/.wdm/drivers/chromedriver/88.0.4324.96/mac64/chromedriver"
  6. URL = "https://vpl.bibliocommons.com/events/search/index"
  7. # This loads webdriver from the local machine if it exists.
  8. browser = webdriver.Chrome(DRIVER_PATH)
  9. browser.get(URL)
  10. # Give the browser time to load all content.
  11. time.sleep(4)
  12. button = browser.find_element_by_css_selector(".btn-lg")
  13. for i in range(0,20):
  14. button.click()
  15. '''
  16. If you see the following error increase the sleep time:
  17. ElementClickInterceptedError: element click intercepted:
  18. '''
  19. print("Count: ", str(i))
  20. time.sleep(4)
  21. print("done loop")
  22. content = browser.find_elements_by_css_selector(".event-row")
  23. for e in content:
  24. textContent = e.get_attribute('innerHTML')
  25. # Beautiful soup allows us to remove HTML tags from our content if it exists
  26. soup = BeautifulSoup(textContent, features="lxml")
  27. rawString = soup.get_text().strip()
  28. # Remove hidden characters for tabs and new lines.
  29. rawString = re.sub(r"[\n\t]*", "", rawString)
  30. # Replace two or more consecutive empty spaces with '*'
  31. rawString = re.sub('[ ]{2,}', '*', rawString)
  32. #Fine tune the results os they can be parsed.
  33. rawString = rawString.replace("Location", "Location*")
  34. rawString = rawString.replace("Registration closed", "Registration closed*")
  35. rawString = rawString.replace("Registration required", "Registration required*")
  36. rawString = rawString.replace("In Progress", "*In Progress*")
  37. rawString = rawString.replace("*/*", "/")
  38. rawString = rawString.replace("Full*", "*Full*")
  39. #print(rawString)
  40. eventArray = rawString.split('*')
  41. EVENT_NAME = 0
  42. EVENT_DATE = 1
  43. EVENT_TIME = 2
  44. eventName = eventArray[EVENT_NAME]
  45. eventDate = eventArray[EVENT_DATE].strip() # remove leading and trailing spaces
  46. eventTime = eventArray[EVENT_TIME].strip() # remove leading and trailing spaces
  47. location = eventArray[len(eventArray)-1]
  48. print("Name: " + eventName)
  49. print("Date: " + eventDate)
  50. print("Time: " + eventTime)
  51. print("Location: " + location)
  52. print("***")