005-001. matplotlib, magic command, plot(), plot(kind="bar"), hist(), scatter()
# @
# Most used plots are line plot, bar plot, histogram, scatter diagram
# @
# Magic commands of matplotlib
# %matplotlib
# %matplotlib nbagg: You can manipulate plots interactively
# %matplotlib inline: You can show plot in cell but can't manipulate plots interactively
# @
%matplotlib nbagg
import matplotlib
# pyplot is subpackage of matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
series=pd.Series(np.random.rand(10).cumsum(),index=np.arange(0,100,10))
# 0 0.404094
# 10 1.206938
# 20 1.765261
# 30 2.335933
# 40 3.246339
# 50 4.098992
# 60 4.187145
# 70 5.065970
# 80 5.382409
# 90 5.727608
# You can draw line plot on series
series.plot()
# img 8962dc24-8ebd-4ca8-aec7-e32f79d2182e
# You can get same result with above code by following code
plt.plot(series)
# @
dataframe=pd.DataFrame(np.random.rand(10,4).cumsum(axis=0)\
,columns=["A","B","C","D"]\
,index=np.arange(0,100,10))
# A B C D
# 0 0.592465 0.207700 0.107388 0.842806
# 10 1.169806 0.886713 0.476341 1.000724
# 20 1.289329 1.677117 1.092054 1.985201
# 30 1.465031 2.128451 1.880766 2.505312
# 40 1.922107 2.570523 2.366606 2.684610
# 50 2.264113 2.616970 2.801705 2.857887
# 60 3.157483 3.131391 2.821849 3.534764
# 70 3.760922 4.119760 3.631236 4.217623
# 80 4.016019 4.458751 4.506558 4.694503
# 90 4.142605 4.819199 4.729676 4.728988
dataframe.plot()
# plt.plot(dataframe)
# img d39b8e7c-140b-4ac9-8d78-ba195da3bc2e
# When you want to draw only one column data,
# for example, "B" column data,
# you should first extract column as Series,
# then invoke plot()
dataframe["B"].plot()
# img 9fbb28e8-708d-4f22-96e4-baad9058f0ff
# @
# Let's talk about bar plot
# This is useful when X data has discret value
series_2=pd.Series(np.random.rand(16),index=list("abcdefghijklmnop"))
# a 0.397676
# b 0.846862
# c 0.480277
# d 0.941101
# e 0.078108
# f 0.528784
# g 0.389172
# h 0.515487
# i 0.484657
# j 0.638249
# k 0.630691
# l 0.832333
# m 0.640034
# n 0.183421
# o 0.573656
# p 0.474688
series_2.plot(kind="bar")
# img 3bfb0c29-ec34-4033-a6cd-cff24cc4936b
series_2.plot(kind="barh")
# @
dataframe_2=pd.DataFrame(np.random.rand(6,4),
index=["one","two","three","four","five","six"],
columns=pd.Index(["A","B","C","D"],name="Genus"))
# Genus A B C D
# one 0.066980 0.804447 0.131766 0.789896
# two 0.400801 0.799834 0.177774 0.688652
# three 0.777141 0.294200 0.759455 0.372023
# four 0.824237 0.166228 0.438885 0.113845
# five 0.986942 0.396242 0.156756 0.779464
# six 0.210179 0.495291 0.521406 0.581041
dataframe_2.plot(kind="bar")
dataframe_2.plot(kind="barh",stacked=True)
# @
# Let's talk about histogram
# Histogram doesn't require index
series_3=pd.Series(np.random.normal(0,1,size=200))
series_3.hist()
# bin=50 means one segment contains 50
series_3.hist(bins=50)
series_3.hist(bins=100,normed=True)
# @
# Scatter diagram
# bar, histogram: x -> y
# scatter diagram: 2 independant variable x, y
# its relation
x1=np.random.normal(1,1,size=(100,1))
x2=np.random.normal(-2,4,size=(100,1))
x=np.concatenate((x1,x2),axis=1)
dataframe_3=pd.DataFrame(x,columns=["x1","x2"])
plt.scatter(dataframe_3["x1"],dataframe_3["x2"])
# width: x1
# height: x2
# If x1, x2 have positive relation,
# scatter diagram will be plotted,
# along with right increasing diagonal line
# It turned out we can't see relation between x1 and x2