002-RZqRkoSaoYA. automatic spell checker
@
You input "Iplaysoccer"
We see result via ajax call
Go to network tab in chorome developer tool
We can see "spellchecker.nhn? callback=window, ...."
@
# This makes request easier than urllib
import requests
url = "https//m.search.naver.com/p/csearch/dcontent/spellchecker.nhn? callback=window.__jindo2_callback._spellingCheck_0& ...."
requests.get(url)
# <
requests.get(url).text
< 'window.__jindo2_callback._spellingCheck_0({"message":{"@type":"response" ... I play soccer}})'
@
As we see 'window.__jindo2_callback._spellingCheck_0({"message":{"@type":"response" ... I play soccer}})', we can know this site uses jsonp
In "https//m.search.naver.com/p/csearch/dcontent/spellchecker.nhn? callback=window.__jindo2_callback._spellingCheck_0& ....", this site requests url with passing method name to be invoked as callback method
What we should do is to extract "{"message":{"@type":"response" ... I play soccer}}" part
@
Let's try to change callback method
url = "https//m.search.naver.com/p/csearch/dcontent/spellchecker.nhn? callback=window.mycustomcallback& ...."
requests.get(url).text
# < 'window.mycustomcallback({"message":{"@type":"response" ... I play soccer}})'
@
response = requests.get(url).text
# we delete what we don't need
json_string = response.replace('window.mycustomcallback(', '').replace(');', '')
import json
result = json.loads(json_string)
result.__class__
# < dict
result.keys()
# < dict_keys(['message'])
result['message']['result']['html']
# < "I play soccer"
@
You create new file, spellchecker.py
import requests
url = "https//m.search.naver.com/p/csearch/dcontent/spellchecker.nhn"
params = {
'callback': 'window.mycustomcallback',
'q': 'Iplaysoccer.'
}
response = requests.get(url, params=params).text
print(response)
python spellchecker.py
# < 'window.mycustomcallback({"message":{"@type":"response" ... I play soccer}})
response = response.replace(params['_callback'] + '(', '')
response = response.replace(');', '')
print(response)
# < {"message":{"@type":"response" ... I play soccer}}
import json
# I parse "response" into dictionary type
response_dict = json.loads(response)
print(response_dict)
# < {"message":{"@type":"response", "result": {"errata_count":1, "html":"I play soccer"}}}
print(response_dict["message"]["result"]["html"])
# < I play soccer
result_text = response_dict["message"]["result"]["html"]
import re
result_text = re.sub(r'<\/?.*?>', '', result_text)
print(result_text)
python spellchecker.py
# < I play soccer
@
I construct above logic as method
def spellchecker(q):
url = "https//m.search.naver.com/p/csearch/dcontent/spellchecker.nhn"
params = {
'callback': 'window.mycustomcallback',
'q': q
}
response = requests.get(url, params=params).text
response = response.replace(params['_callback'] + '(', '')
response = response.replace(');', '')
response_dict = json.loads(response)
result_text = response_dict["message"]["result"]["html"]
result_text = re.sub(r'<\/?.*?>', '', result_text)
return result_text
if __name__ == '__main__':
line = input('')
print(spellchecker(line))