003. basic of "GET request"
@
download-png1.py
@
import urllib.request
import urllib.parse
# https://search.naver.com/search.naver?where=nexearch&sm=top_hty&fbm=0&ie=utf8&query=%EC%B4%88%EC%BD%9C%EB%A6%BF
api = "https://search.naver.com/search.naver"
values = {
"where" : "nexearch",
"sm" : "top_hty",
"fbm" : "0",
"ie" : "utf8",
"query" : "초콜릿"
}
# The reason that I imported "urllib.parse" is
# that I should encode "values" object
params = urllib.parse.urlencode(values)
# Let's build full url
url = api + "?" + params
# I input "url" and read that site and collect data
data = urllib.request.urlopen(url).read()
print(data)
# b' means that this data is binary format
# I want to convert binary format to text format
# I want convert binary format data into "utf-8" by using decode()
# If you have issue with utf-8,
# when you decode binary format korean web page into text,
# try with "euc-kr" instead of "utf-8"
text = data.decode("utf-8")
pritn(text)
# < Now, you can see Korean languae in output
@
# When you scrap web site, you need to know how server is composed
# Since contents of server technology is huge,
# let's just inspect "GET request" of server technology
# When you request,
# you give "request method", "target of request", "addtional information"
@
# https://www.google.com/xxx?cid=33
# request method : GET(POST, PUT, DELETE, PATCH)
# target of request : https://www.google.com
# name of host : www.google.com
# addtional information
# 1. path : xxx
# 1. querystring appointing data: ?cid=33
@
# https://search.naver.com/search.naver?where=nexearch&sm=top_hty&fbm=0&ie=utf8&query=%EC%B4%88%EC%BD%9C%EB%A6%BF
# request method : GET
# target of request : https://search.naver.com
# name of host : search.naver.com
# addtional information
# 1. path : search.naver
# 1. appointing 5 data(querystring):
# ? where=nexearch & sm=top_hty & fbm=0 & ie=utf8 & query=%EC%B4%88%EC%BD%9C%EB%A6%BF
@
# %EC%B4%88%EC%BD%9C%EB%A6%BF is encoded characters
# When %EC%B4%88%EC%BD%9C%EB%A6%BF is decoded, it becomes 초콜릿