008. using phantomjs and selenium, login, bring email data
# @
# docker run -i -t -v /c/users/yourname/sample:/sample -e ko_KR.UTF-8 -e PYTHONIOENCODING=utf_8 ubuntu-phantomjs /bin/bash
# cd /sample/
# @
# test.py
from selenium import webdriver
url = "http://www.xxx.com/"
# You create instance of phantomjs,
# which means you created one web browser
browser = webdriver.PhantomJS()
# There is bug in PhantomJS,
# so you should wait almost 3 seconds at initial time of phantomjs
browser.implicitly_wait(3)
# You read url and open browser with that url
browser.get(url)
# You can add codes what you want to do here
# For example, you can capture screen as image file
browser.save_screenshot("website.png")
# You can terminate browser
browser.quit()
# python3 test.py
# @
# Since this way is generating web browser
# and analyzing web page,
# which mean we can collect and analyze everything
# The only thing what we can't automatically pass with this way
# is contents protected by captcha
# @
# I will login by phantomjs
url_login = "http://nid.naver.com/nidlogin.login"
browser_login = webdriver.PhantomJS()
browser_login.implicitly_wait(3)
browser_login.get(url_login)
# browser_login.save_screenshot("website_B.png")
# I get id text input box element
element_id = browser_find_element_by_id("id")
# After finding element, you clear text box
element_id.clear()
element_id.send_keys("yourid")
# I get pw text input box element
element_pw = browser_find_element_by_id("pw")
element_pw.clear()
element_pw.send_keys("yourpassword")
# You can check by screenshot
# browser_login.save_screenshot("website_C.png")
# tag input and class="btn_global" and type="submit"
button_login = browser.find_element_by_css_selector("input.btn_global[type=submit]")
# You press button
button_login.submit()
browser_login.quit()
# @
# Let's try grab contents of email
# Open mail page
url_emil = "https://mail.naver.com"
browser_email = webdriver.PhantomJS()
browser_email.implicitly_wait(3)
# You open browser with specific web page
browser_email.get(url_emil)
element_id = browser_find_element_by_id("id")
element_id.clear()
element_id.send_keys("yourid")
element_pw = browser_email_find_element_by_id("pw")
element_pw.clear()
element_pw.send_keys("yourpassword")
button_login = browser_email.find_element_by_css_selector("input.btn_global[type=submit]")
button_login.submit()
# select mails in mail page
# tag strong and class="mail_title"
titles = browser_email.find_elements_by_css_selector("strong.mail_title")
# use for loop to display all titles of mails
for title in titles:
print("-", title.text)
# < mail titles
browser_email.quit()