008. using phantomjs and selenium, login, bring email data # @ # docker run -i -t -v /c/users/yourname/sample:/sample -e ko_KR.UTF-8 -e PYTHONIOENCODING=utf_8 ubuntu-phantomjs /bin/bash # cd /sample/ # @ # test.py from selenium import webdriver url = "http://www.xxx.com/" # You create instance of phantomjs, # which means you created one web browser browser = webdriver.PhantomJS() # There is bug in PhantomJS, # so you should wait almost 3 seconds at initial time of phantomjs browser.implicitly_wait(3) # You read url and open browser with that url browser.get(url) # You can add codes what you want to do here # For example, you can capture screen as image file browser.save_screenshot("website.png") # You can terminate browser browser.quit() # python3 test.py # @ # Since this way is generating web browser # and analyzing web page, # which mean we can collect and analyze everything # The only thing what we can't automatically pass with this way # is contents protected by captcha # @ # I will login by phantomjs url_login = "http://nid.naver.com/nidlogin.login" browser_login = webdriver.PhantomJS() browser_login.implicitly_wait(3) browser_login.get(url_login) # browser_login.save_screenshot("website_B.png") # I get id text input box element element_id = browser_find_element_by_id("id") # After finding element, you clear text box element_id.clear() element_id.send_keys("yourid") # I get pw text input box element element_pw = browser_find_element_by_id("pw") element_pw.clear() element_pw.send_keys("yourpassword") # You can check by screenshot # browser_login.save_screenshot("website_C.png") # tag input and class="btn_global" and type="submit" button_login = browser.find_element_by_css_selector("input.btn_global[type=submit]") # You press button button_login.submit() browser_login.quit() # @ # Let's try grab contents of email # Open mail page url_emil = "https://mail.naver.com" browser_email = webdriver.PhantomJS() browser_email.implicitly_wait(3) # You open browser with specific web page browser_email.get(url_emil) element_id = browser_find_element_by_id("id") element_id.clear() element_id.send_keys("yourid") element_pw = browser_email_find_element_by_id("pw") element_pw.clear() element_pw.send_keys("yourpassword") button_login = browser_email.find_element_by_css_selector("input.btn_global[type=submit]") button_login.submit() # select mails in mail page # tag strong and class="mail_title" titles = browser_email.find_elements_by_css_selector("strong.mail_title") # use for loop to display all titles of mails for title in titles: print("-", title.text) # < mail titles browser_email.quit()