%matplotlib inline
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv("iris.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB

df.head()

sns.set(style="ticks", color_codes=True)
iris = sns.load_dataset("iris")
g = sns.pairplot(iris, hue="species", palette="husl")

iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB

iris['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

df['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

df.describe(include='all')

from pandas.api.types import is_numeric_dtype

for col in df.columns:
    if is_numeric_dtype(df[col]):
        print('%s:' % (col))
        print('\t Mean = %.2f' % df[col].mean())
        print('\t Standard deviation = %.2f' % df[col].std())
        print('\t Minimum = %.2f' % df[col].min())
        print('\t Maximum = %.2f' % df[col].max())

sepal_length:
	 Mean = 5.84
	 Standard deviation = 0.83
	 Minimum = 4.30
	 Maximum = 7.90
sepal_width:
	 Mean = 3.05
	 Standard deviation = 0.43
	 Minimum = 2.00
	 Maximum = 4.40
petal_length:
	 Mean = 3.76
	 Standard deviation = 1.76
	 Minimum = 1.00
	 Maximum = 6.90
petal_width:
	 Mean = 1.20
	 Standard deviation = 0.76
	 Minimum = 0.10
	 Maximum = 2.50

df['species'].value_counts()

virginica     50
setosa        50
versicolor    50
Name: species, dtype: int64

print('Covariance:')
df.cov()

Covariance:

print('Correlation:')
df.corr()

Correlation:

데이터 시각화

%matplotlib inline

df['sepal_length'].hist(bins=8)

<matplotlib.axes._subplots.AxesSubplot at 0x23469dc5208>

fig, axes = plt.subplots(2, 2, figsize=(10,6))
index = 0
bins_number=8
for ax1 in range(2):
    for ax2 in range(2):
        if index == 0 :
            axes[ax1][ax2].hist(df['sepal_length'],bins=bins_number)
        elif index == 1 :
            axes[ax1][ax2].hist(df['sepal_width'],bins=bins_number)
        elif index == 2 :
            axes[ax1][ax2].hist(df['petal_length'],bins=bins_number)
        else:
            axes[ax1][ax2].hist(df['petal_width'],bins=bins_number)
        axes[ax1][ax2].set_ylabel(df.columns[index])
        index += 1

df.boxplot()

<matplotlib.axes._subplots.AxesSubplot at 0x2346fa16b88>

import matplotlib.pyplot as plt

fig, axes = plt.subplots(3, 2, figsize=(10,12))
index = 0
for i in range(3):
    for j in range(i+1,4):
        ax1 = int(index/2)
        ax2 = index % 2
        axes[ax1][ax2].scatter(df[df.columns[i]], df[df.columns[j]], color='blue')
        axes[ax1][ax2].set_xlabel(df.columns[i])
        axes[ax1][ax2].set_ylabel(df.columns[j])
        index = index + 1

from pandas.plotting import parallel_coordinates
parallel_coordinates(df, 'species')

<matplotlib.axes._subplots.AxesSubplot at 0x2346a19f3c8>

from pandas.plotting import scatter_matrix
scatter_matrix(df, figsize=(8,5))
plt.show()

참조¶

블로그 화면크기에 맞추도록 Jupyter notebook 적용 코드가 아래에 삽입되어 있습니다.

from IPython.core.display import display, HTML
display(HTML("<style>.container {width:100% !important; }</style>"))

	sepal_length	sepal_width	petal_length	petal_width	species
0	5.1	3.5	1.4	0.2	setosa
1	4.9	3.0	1.4	0.2	setosa
2	4.7	3.2	1.3	0.2	setosa
3	4.6	3.1	1.5	0.2	setosa
4	5.0	3.6	1.4	0.2	setosa

	sepal_length	sepal_width	petal_length	petal_width	species
count	150.000000	150.000000	150.000000	150.000000	150
unique	NaN	NaN	NaN	NaN	3
top	NaN	NaN	NaN	NaN	virginica
freq	NaN	NaN	NaN	NaN	50
mean	5.843333	3.054000	3.758667	1.198667	NaN
std	0.828066	0.433594	1.764420	0.763161	NaN
min	4.300000	2.000000	1.000000	0.100000	NaN
25%	5.100000	2.800000	1.600000	0.300000	NaN
50%	5.800000	3.000000	4.350000	1.300000	NaN
75%	6.400000	3.300000	5.100000	1.800000	NaN
max	7.900000	4.400000	6.900000	2.500000	NaN

	sepal_length	sepal_width	petal_length	petal_width
sepal_length	0.685694	-0.039268	1.273682	0.516904
sepal_width	-0.039268	0.188004	-0.321713	-0.117981
petal_length	1.273682	-0.321713	3.113179	1.296387
petal_width	0.516904	-0.117981	1.296387	0.582414

	sepal_length	sepal_width	petal_length	petal_width
sepal_length	1.000000	-0.109369	0.871754	0.817954
sepal_width	-0.109369	1.000000	-0.420516	-0.356544
petal_length	0.871754	-0.420516	1.000000	0.962757
petal_width	0.817954	-0.356544	0.962757	1.000000

랜덤뽑기 (0)	2022.09.01
Class / lambda식 등 (0)	2020.09.07
jupyter notebook을 tistory에 적용하기 (0)	2020.07.30
class -sample (0)	2020.04.29
Turtle - Example (0)	2020.04.27

Kpage

iris.csv 분석

참조¶

'PROGRAM > Python' 카테고리의 다른 글

티스토리툴바

iris.csv 분석

참조¶

'PROGRAM > Python' 카테고리의 다른 글

'PROGRAM/Python' Related Articles

티스토리툴바