#선형회귀 적용하기 1.보스턴 집값 예측
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import tensorflow as tf
seed=0
np.random.seed(seed)
tf.random.set_seed(seed)
prehouse=pd.read_csv('/content/drive/MyDrive/Colab Notebooks/dataset/housing.csv', delim_whitespace=True,header=None)
house=prehouse.sample(frac=1)
#데이터 분류
dataset=house.values
X=dataset[:,0:13]
Y=dataset[:,13]
X_train ,X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.3,random_state=seed)
#선형 회귀 실행 참 거짓을 구분할 필요가 없음, 출력층에 활성화 함수를 지정할필요도 없음
model= Sequential()
model.add(Dense(30, input_dim=13,activation='relu'))
model.add(Dense(6,activation = 'relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam')
model.fit(X_train, Y_train ,epochs=200, batch_size=10)
y_prediction = model.predict(X_test).flatten()
for i in range(10):
label=Y_test[i]
prediction= y_prediction[i]
print('실제 가격: {:.3f}, 예상가격:{:.3f}'.format(label,prediction))
이항 다항 학습
accuracy val accuracy loss val loss 다 확인가능
선형회귀 학습에서는 loss 와 val loss만 확인가능
#선형회귀 적용하기 1.보스턴 집값 예측
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import tensorflow as tf
seed=0
np.random.seed(seed)
tf.random.set_seed(seed)
prehouse=pd.read_csv('/content/drive/MyDrive/Colab Notebooks/dataset/housing.csv', delim_whitespace=True,header=None)
house=prehouse.sample(frac=1)
#데이터 분류
dataset=house.values
X=dataset[:,0:13]
Y=dataset[:,13]
X_train ,X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.3,random_state=seed)
#선형 회귀 실행 참 거짓을 구분할 필요가 없음, 출력층에 활성화 함수를 지정할필요도 없음
model= Sequential()
model.add(Dense(30, input_dim=13,activation='relu'))
model.add(Dense(6,activation = 'relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam')
from tensorflow.keras.callbacks import ModelCheckpoint , EarlyStopping
#자동 중단 설정
early_stopping_callback=EarlyStopping(monitor='val_loss',patience=50)
#모델저장 조건
modelpath='./model/{epoch:02d}-{val_loss:.4f}.hdf5'
Checkpointer= ModelCheckpoint(filepath=modelpath, monitor='val_loss', verbose=1,save_best_only=True)
history=model.fit(X_train, Y_train , validation_split=0.33, epochs=500, batch_size=0, callbacks=[early_stopping_callback,Checkpointer])
history=pd.DataFrame(history.history)
print(history.tail())
y_vloss=history.history=['val_loss']
y_loss=history.history=['loss']
import matplotlib.pyplot as plt
x_len=np.arange(len(y_loss))
plt.figure(figsize=(10,5))
plt.plot(x_len,y_loss,'o',c='violet',markersize=3,label='val_loss')
plt.plot(x_len,y_vloss,'o',c='springgreen',markersize=3,label='loss')
plt.legend()
plt.show()
#예측 값과 실제 값의 비교
Y_prediction = model.predict(X_test).flatten()
for i in range(10):
label=Y_test[i]
prediction= Y_prediction[i]
print('실제 가격: {:.3f}, 예상가격:{:.3f}'.format(label,prediction))
정규화
머신러닝 알고리즘은 데이터가 가진 특성들을 비교하여 데이터의 패턴을 찾음데이터가 가진 특성의스케일 차이가 심하게 나는 경우 문제가 발생
예를 들어 주택에 관한 정보가 담김 데이터에서 특성으로 방싀갯수 (10개미만의차이) 얼마나 오래전에 지어졌는지(길게는 몇백년) 같은 것들이 포함될수 있음
모든 특성들이 비슷한 영향력을 행사하도록 값을 변환해주는 기술
단위가 다르면 직접적인 비교가불가능, 동일한 단위를 사용하더라도 값의 범위가 크게 차이나는상황에서는 비교가어려움
(10점만점의 9점,100점만점의 9점)
min-max 정규화
가장 일반적인 방범 모든 특성의 최소값0최대값1로 변환
outlier에 대한고민 이 필요
통계적인 기법을적용하면 개선되기는 하나 영향을 제거할수없음
따라서 모든스케일러 처리 전에는 outlier 제거가 선행되어야함
z-score 정규화
outlier문제를 피하는 데이터 정규화 전략
x라는 값을 z 점수로 바꿔줌
(x-평균)/표준편차
#선형회귀 적용하기 1.보스턴 집값 예측
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ModelCheckpoint , EarlyStopping
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
seed=0
np.random.seed(seed)
tf.random.set_seed(seed)
prehouse=pd.read_csv('/content/drive/MyDrive/Colab Notebooks/dataset/housing.csv', delim_whitespace=True,header=None)
house=prehouse.sample(frac=1)
#데이터 분류
dataset=house.values
X=dataset[:,0:13]
Y=dataset[:,13]
#정규화 전 boxplot으로 시각화 해서data확인해보기
house.plot.box(figsize=(10,10))
X_train ,X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.3,random_state=seed)
plt.boxplot(X_train)
#정규화 (x-평균)/표준편차
mean= X_train.mean(axis=0)
X_train-=mean
std=X_train.std(axis=0)
X_train/= std
X_test-= mean
X_test /= std
# 정규화 이후 시각화 확인
plt.boxplot(X_train)
model= Sequential()
model.add(Dense(30, input_dim=13,activation='relu'))
model.add(Dense(6,activation = 'relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam')
#자동 중단 설정
early_stopping_callback=EarlyStopping(monitor='val_loss',patience=50)
#모델저장 조건
modelpath='./model/{epoch:02d}-{val_loss:.4f}.hdf5'
Checkpointer= ModelCheckpoint(filepath=modelpath, monitor='val_loss', verbose=1,save_best_only=True)
history=model.fit(X_train, Y_train , validation_split=0.33, epochs=500, batch_size=10, callbacks=[early_stopping_callback,Checkpointer])
hist=pd.DataFrame(history.history)
print(hist.tail())
y_vloss=history.history=['val_loss']
y_loss=history.history=['loss']
x_len=np.arange(len(y_loss))
plt.figure(figsize=(10,5))
plt.plot(x_len,y_loss,'o',c='violet',markersize=3,label='val_loss')
plt.plot(x_len,y_vloss,'o',c='springgreen',markersize=3,label='loss')
plt.legend()
plt.show()
#예측 값과 실제 값의 비교
Y_prediction = model.predict(X_test).flatten()
for i in range(10):
label=Y_test[i]
prediction= Y_prediction[i]
print('실제 가격: {:.3f}, 예상가격:{:.3f}'.format(label,prediction))
plt.figure(figsize=(20,10))
plt.plot(Y_prediction,c='red')
plt.plot(Y_test,c='blue')
avr=abs(Y_prediction-Y_test).mean()
print(avr)
#선형회귀 연습문제 주가 변동 추이 정답
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
path ='/content/drive/MyDrive/Colab Notebooks/dataset/'
data=pd.read_csv('/content/drive/MyDrive/Colab Notebooks/dataset/data-02-stock_daily.csv', header=1)
fig=plt.figure(figsize=(30,20))
ax1=fig.add_subplot(3,1,1)
ax2=fig.add_subplot(3,1,2)
ax3=fig.add_subplot(3,1,3)
ax1.plot(data['Open'])
ax1.plot(data['High'])
ax1.plot(data['Low'])
ax1.plot(data['Close'])
ax2.plot(data['Volume'])
ax3.plot(data['Open'][0:7], linewidth=3.0, label='open')
ax3.plot(data['High'][0:7], linewidth=3.0, label='high')
ax3.plot(data['Low'][0:7], linewidth=3.0, label='low')
ax3.plot(data['Close'][0:7], linewidth=3.0, label='close')
ax3.legend(prop={'size':30})
xdata=data[['Open','High','Low','Volume']]
ydata=pd.DataFrame(data['Close'])
xdata.plot.box(figsize=(5,5))
plt.show()
#데이터 정규화
from sklearn.preprocessing import StandardScaler
xdata_ss=StandardScaler().fit_transform(xdata)
ydata_ss=StandardScaler().fit_transform(ydata)
print(xdata_ss.shape,ydata_ss.shape)
plt.boxplot(xdata_ss)
plt.show()
#트레이닝,테스트데이터 분리
xtrain=xdata_ss[220:,:]
xtest=xdata_ss[220:,:]
ytrain=ydata_ss[220:,:]
ytest=ydata_ss[220:,:]
print(xtrain.shape,ytrain.shape,xtest.shape,ytest.shape)
model= Sequential()
model.add(Dense(units=1024, input_dim=4,activation='relu'))
model.add(Dense(units=512,activation = 'relu'))
model.add(Dense(units=256,activation = 'relu'))
model.add(Dense(units=128,activation = 'relu'))
model.add(Dense(units=64,activation = 'relu'))
model.add(Dense(units=32,activation = 'relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam',metrics=['mae'])
from tensorflow.keras.callbacks import ModelCheckpoint , EarlyStopping
#자동 중단 설정
es=EarlyStopping(monitor='mae',patience=10)
seed=123
np.random.seed(seed)
tf.random.set_seed(seed)
hist=model.fit(xtrain, ytrain ,epochs=100, batch_size=16, callbacks=[es])
print('loss'+str(hist.history['loss']))
print('MAE'+str(hist.history['mae']))
res=model.evaluate(xtest,ytest,batch_size=32)
print('loss',res[0],'mae',res[1])
xhat=xtest
yhat=model.predict(xhat)
plt.figure()
plt.plot(yhat,label='predicted')
plt.plot(ytest,label='actual')
plt.legend(prop={'size':20})
print('Evalueate : {}'.format(np.average((yhat-ytest)**2)))
'First step > AI 기초반' 카테고리의 다른 글
[TIL] 21.07.19pandas로 웹크롤링 (0) | 2021.07.19 |
---|---|
[TIL]21.07.19 비지도학습 (0) | 2021.07.19 |
[TIL]21.07.15과적합 (0) | 2021.07.15 |
[TIL]21.07.14keras로 실습하기 (0) | 2021.07.14 |
[TIL]21.07.13 웹크롤링 (0) | 2021.07.13 |