006. login by using "requests library" # @ # test.py # @ # We scraped web data by using built-in url library in python # When you login, we should use "session" technique # But since dealing with "session" only by url library is difficult, # we'd better use external module "requests" # @ # pip install requests import requests url = "http://google.com" # You can make and use data instance for "appointing data" rather writing manually querystring data = { "a":"10", "b":"20" } # You create session instance session = requests.session() # < and you can use get, post, put, delete methods # < url : url of site what we want to collect # < session.get(url) # < session.post(url) # < session.put(url) # < session.delete(url) # > You get response instance from 'session.get()' response = session.get(url, data = data) # And then, you can use raise_for_status() to actually perform your request response.raise_for_status() # And then you can utilize text data stored in response object print(response.text) # < You can see main page of google # < Encoding and other things are automatically processed by "requests library" # When you encounter some problems with get() in url library, # you can resolve 99% of them by this way # 1. Go to web page # 1. Open developer tool # 1. Go to "network" tab # 1. Check if you can see "recording red light" # 1. Check if you checked "preserve log" # 1. If you need, you configure as 'Doc' on the under horizonotal bar # Now, we will try to login # First, try to login directly and see what happens in terms of network # We entered login.html # We were moved to login_proc.php # We were moved to index.html # We should find place where we logedin and it's actually login_proc.php # And the way to find that place is to check the place where ID and password are sent # When I see login.html, I can't see any record related to ID and password, # so I can conclude it's not the place what I'm looking for # When I see login_proc.php, # I can see something related to ID and password in "Form Data" section, # I can conclude that login happened at this moment of this page # So, in conclusion, if you imitate this request of page by using "requests library", # you will be able to login in python # @ # First, you need to see what information are sent # And they're following elements # In "Form Data" section, # return_url : http://www.hanbit.co.kr/index.html # m_id : myid # m-password : yourpassword # In "General" section, # request url : http://www.hanbit.co.kr/member/login_proc.php # request methods : post # @ url_hanbit = "http://www.hanbit.co.kr/member/login_proc.php" data_hanbit = { "return_url" : "http://www.hanbit.co.kr/index.html", "m_id" : "myid", "m-password" : "yourpassword" } session_hanbit = requests.session() response_hanbit = session_hanbit.post(url_hanbit, data = data_hanbit) response_hanbit.raise_for_status() print(response_hanbit.text) # @ # Let's grab e-coin data from logedin page url_ecoin = "http://www.hanbit.co.kr/myhanbit/myhanbit.html" response_ecoin = session.get(url_ecoin) response_ecoin.raise_for_status() BeautifulSoup(response_ecoin.text, "html.parser") # class mileage_section2, descendant tag span text_ecoin = soup.select_one(".mileage_section2 span").get_text() print("your e-coin : ", text_ecoin)