011. basic of csv # @ # CSV # 1000,soap,300 # 1001,glove,150 # 1002,mask,230 # @ # SSV(space separated values) # 1000 soap 300 # 1001 glove 150 # 1002 mask 230 # @ # TSV(tab separated values) # 1000 soap 300 # 1001 glove 150 # 1002 mask 230 # @ # Properties of CSV: # 1. CSV has one data by one line # 1. Attributes of each data are separated by comma # 1. Most top line can be used as header # ID,name,price # 1000,soap,300 : data1 # 1001,glove,150 : data2 # 1002,mask,230 : data3 # @ # Comparison between xml, json, csv # size: xml > json > csv # expression: xml > json > csv # readability: json > xml > csv # @ # Poisonous mushroom data # https://archive.ics.uci.edu/ml/datasets/mushroom # go to data folder # download agaricus-lepiota.data # @ # Analyzing csv csv = """\ p,x,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,u e,x,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g e,b,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m\ """ # If you want to separate sequence of characters by particular symbol, # you can use split() # First, you can split sequence of characters by "\n" splitted = csv.split("\n") # You pass split symbol as first argument into split(), # you can omit the name of arugment if you use only first argumet # split(sep=",") for item in splitted: list_mushroom = csv.split(",") print(list_mushroom) # output: # ['p','x','s','n','t','p','f','c','n','k','e','e','s','s','w','w','p','w','o','p','k','s','u'] # ['e','x','s','y','t','a','f','c','b','k','e','c','s','s','w','w','p','w','o','p','n','n','g'] # ['e','b','s','w','t','l','f','c','b','n','e','c','s','s','w','w','p','w','o','p','n','n','m'] # @ # How to deal with "python list" # 1. When you choose one element list_mushroom[0] # output: # p # e # e list_mushroom[1] # output: # x # x # b # The far last element in the list list_mushroom[-1] # The second element from the most far back list_mushroom[-2] # 1. When you choose range list_mushroom[1:4] list_mushroom[5:] list_mushroom[:5] # @ # CSV files is generally created by excel # And many exported csv files from excel have euc-kr encoding # If you can export csv file as utf-8 encoding, you should do it # @ # codecs is module which converts encoding import csv, codecs filename = "test.csv" # When we read file in read only mode so we should use r # "euc-kr" means encoding format # When you read file as euc-kr format, you should use codecs module file = codecs.open(filename, "r", "euc-kr") # 1 arg: iterable object # 2 arg: delimiter # 3 arg is quotechar'"' : if it need, for example, 'a','b','c' csv.reader(file, delimiter=",") for cells in reader: print(cells[1], cells[2]) # @ # It's actually not much used to read csv file with csv module # We use pandas to read csv file