009. xml, collecting weather data
# @
# When people use html,
# they think html pattern can express data structure
# For that kind of purpose,
# people made new standard for data structure based on html
# @
# xml has opening tag and closing tag
# We call "xxx" or "" element
# @
# We call xxx "contents"
# We call xxx "text" if it's composed of only characters
# @
#
#
#
# xxx can be other tags,
# so in this case, we call other tags "contents"
# contents
# text
# @
# attribute
# contents
#
# value is always string type
# @
# Rule
# root tag("") which is located in the most top under should be only one
#
#
# @
#
#
#
# When you see xml code, you can encounter CDATA
# If you bring value of wf's attribute, you can bring text of xxxxxxxxx
# CDATA plays role of protecting long text, as special tag
# You don't need to consider CDATA when collecting
# @
# test.py
from bs4 import BeautifulSoup
import urllib.request
url = "http://www.kma.go.kr/weather/forecast/mid-term-rss3.jsp?stnId=108"
request = urllib.request.urlopen(url)
xml = request.read()
# print(xml)
# I will use BeautifulSoup to encode and decode xml
# There is xml.parser
# but it's almost no different with html.parser
# so I'll just use html.parser for parsing xml
soup = BeautifulSoup(xml, "html.parser")
# I bring first element for seoul weather
# tag location[0]
seoul = soup.find_all("location")[0]
# print(seoul)
# < This shows many days weather data of seoul
datas = seoul.find_all("data")
for item in datas:
print(item.find("wf").text)