My first HTML document

003. basic of "GET request" @ download-png1.py @ import urllib.request import urllib.parse # https://search.naver.com/search.naver?where=nexearch&sm=top_hty&fbm=0&ie=utf8&query=%EC%B4%88%EC%BD%9C%EB%A6%BF api = "https://search.naver.com/search.naver" values = { "where" : "nexearch", "sm" : "top_hty", "fbm" : "0", "ie" : "utf8", "query" : "초콜릿" } # The reason that I imported "urllib.parse" is # that I should encode "values" object params = urllib.parse.urlencode(values) # Let's build full url url = api + "?" + params # I input "url" and read that site and collect data data = urllib.request.urlopen(url).read() print(data) # b' means that this data is binary format # I want to convert binary format to text format # I want convert binary format data into "utf-8" by using decode() # If you have issue with utf-8, # when you decode binary format korean web page into text, # try with "euc-kr" instead of "utf-8" text = data.decode("utf-8") pritn(text) # < Now, you can see Korean languae in output @ # When you scrap web site, you need to know how server is composed # Since contents of server technology is huge, # let's just inspect "GET request" of server technology # When you request, # you give "request method", "target of request", "addtional information" @ # https://www.google.com/xxx?cid=33 # request method : GET(POST, PUT, DELETE, PATCH) # target of request : https://www.google.com # name of host : www.google.com # addtional information # 1. path : xxx # 1. querystring appointing data: ?cid=33 @ # https://search.naver.com/search.naver?where=nexearch&sm=top_hty&fbm=0&ie=utf8&query=%EC%B4%88%EC%BD%9C%EB%A6%BF # request method : GET # target of request : https://search.naver.com # name of host : search.naver.com # addtional information # 1. path : search.naver # 1. appointing 5 data(querystring): # ? where=nexearch & sm=top_hty & fbm=0 & ie=utf8 & query=%EC%B4%88%EC%BD%9C%EB%A6%BF @ # %EC%B4%88%EC%BD%9C%EB%A6%BF is encoded characters # When %EC%B4%88%EC%BD%9C%EB%A6%BF is decoded, it becomes 초콜릿