#
# 1,142.70
results = soup.select('span.value')
# < 1,142.70
for result in results:
print(result.sting)
# < 1,142.70
# < 332.31
# < 2,182.43
# < ...
print("dollor-won currency", results[0].string)
print("yien-won currency", results[1].string)
print("euro-won currency", results[2].string)
# Now, we will collect news data
url = "http://news.naver.com/main/main.nhn?mode=LSD&mid=shm&sid1=105"
# You collect html page
response = urllib.request.urlopen(url)
# You parse collected html page
soup = BeautifulSoup(response, "html.parser")
# Web page can have same class, but id should be unique
# I use id selector and tag selector
results = soup.select("#section_body a")
for result in results:
print('title: ', result.attrs['titles'])
url_article = result.attrs['href'])
response = urllib.request.urlopen(url_article)
soup_article = BeautifulSoup(response, "html.parser")
content = soup_article.select_one("#articleBodyContents")
# print(content.contents)
# We process data
output = ''
for item in content.contents:
stripped = str(item).strip()
# if stripped is empty character,
# I execute "continue" to be out of if statement
if stripped == '':
continue
# if stripped[0] character doesn't contain '<' and '/'
if stripped[0] not in ['<', '/']:
# I append item into 'output'
output += str(item).strip()
# element which is located in content.contents is instance of tag class
# So, I need to convert it into string type to add each element
output += str(item)
print(output.replace('본문 내용TV플레이어', ''))
time.sleep(1)