본문 바로가기
First step/AI 기초반

[TIL] 21.07.06다중 선형 회귀

by Joshua21 2021. 7. 6.

#예제문제

 

data=[[0.3,12.27],[-0.78,14.44],[1.26,11.87],[0.03,18.75],[1.11,17.52],[0.24,16.37],[-0.24,19.78],[-0.47,19.51],

      [-0.77,12.65],[-0.37,14.74],[-0.85,10.72],[-0.41,21.94],[-0.27,12.83],[0.02,15.51],[-0.76,17.14],[2.66,14.42]]

inc=[i[0for i in data]

old=[i[1for i in data]

 

import tensorflow.compat.v1 as tf

tf.compat.v1.disable_v2_behavior()

import matplotlib.pyplot as plt

import numpy as np

 

a=tf.Variable(tf.random_uniform([1],0,10, dtype=tf.float64,seed=0))

b=tf.Variable(tf.random_uniform([1],0,100, dtype=tf.float64,seed=0))

 

y=a*inc+b

 

rmse=tf.sqrt(tf.reduce_mean(tf.square(y-old)))

learning_rate=0.1

gradient_decent = tf.train.GradientDescentOptimizer(learning_rate).minimize(rmse)

 

with tf.Session() as sess:

  sess.run(tf.global_variables_initializer())

  for step in range(2001):

    sess.run(gradient_decent)

    if step %100==0:

      print('Epoch: %.f,RMSE=%.04f, 기울기 a=%.4f, y절편 b=%.4f'%(step,sess.run(rmse),sess.run(a),sess.run(b)))

  data_a=sess.run(a)

  data_b=sess.run(b)

line_x=np.arange(min(inc),max(inc),0.01)

line_y=data_a*line_x+data_b



plt.plot(line_x,line_y, c='r', lw=3, ls='-',marker='o',ms=5,mfc='b')

plt.plot(inc,old,'bo')

plt.xlabel('Population Growth Rate(%)')

plt.ylabel('Elderly Growth Rate(%)')

plt.show()

 

#다중 선형 회귀

import tensorflow.compat.v1 as tf 

tf.disable_v2_behavior()

 

data=[[2,0,81],[4,4,93],[6,2,91],[8,3,97]]

x1=[x_row1[0for x_row1 in data]

x2=[x_row2[1for x_row2 in data]

y_data=[y_row[2for y_row in data]

 

#기울기의 범위는 0~10사이 t절편은 0~100사이

a1=tf.Variable(tf.random_uniform([1],0,10,dtype=tf.float64,seed=0))

b=tf.Variable(tf.random_uniform([1],0,100,dtype=tf.float64,seed=0))

 

a2=tf.Variable(tf.random_uniform([1],0,10,dtype=tf.float64,seed=0))



y=a1*x1 + a2*x2 +b

 

rmse=tf.sqrt(tf.reduce_mean(tf.square(y - y_data)))

 

learning_rate = 0.1

 

gradient_decent=tf.train.GradientDescentOptimizer(learning_rate).minimize(rmse)

 

#학습이 진행되는 부분

with tf.Session() as sess:

  sess.run(tf.global_variables_initializer())

 

  for step in range(2001):

    sess.run(gradient_decent)

    if step % 100 == 0 :

      print('Epoch: %.f,RMSE=%.04f, 기울기 a1=%.4f,기울기 a2=%.4f, y절편 b=%.4f'%(step,sess.run(rmse),sess.run(a1),sess.run(a2),sess.run(b)))

      z=sess.run(y)

      print('r1=%d...r2=%d...r3=%d...r4=%d'%(z[0],z[1],z[2],z[3]))

 

#내풀이

import numpy as np

 

data=[[2,0,81],[4,4,93],[6,2,91],[8,3,97]]

 

ym=[1.2301*i[0]+2.1633*i[1] +77.8117  for i in data ]

print(ym)

ym=np.array(ym)

print('다중 선형회귀의 점수 평균 :',ym.mean())

resultm=[abs((1.2301*i[0]+2.1633*i[1] +77.8117)-i[2] ) for i in data ]

resultm=np.array(resultm)

print('다중 선형회귀의 오차 평균 :',resultm.mean())

#단순선형회기

ys=[2.3*i[0]+79  for i in data ]

print(ys)

ys=np.array(ys)

print('단순 선형회귀의 점수 평균 :',ys.mean())

results=[abs((2.3*i[0]+79)-i[2])  for i in data ]

results=np.array(results)

print('단순 선형회귀의 오차 평균 :',results.mean())

 

#numpy 없이

ma1=1.2301

ma2=2.1633

mb=77.8117

mresulty=[]

for i in range(4):

  mresulty.append(ma1*x1[i]+ma2*x2[i]+mb)

 

mavr=sum(mresulty)/4

print('다중 선형회귀의 점수 평균 :',mavr)

 

mdiffy=[]

for i in range(4):

  mdiffy.append(abs(y_data[i]-mresulty[i]))

avrd1=sum(mdiffy)/4

print('다중 선형회귀의 오차 평균 :',avrd1)

 

sa1=2.3

sb=79

sresulty=[]

for i in range(4):

  sresulty.append(sa1*x1[i]+sb)



savar2=sum(sresulty)/4

print('단순 선형회귀의 점수 평균 :',savar2)

 

sdiffy=[]

for i in range(4):

  sdiffy.append(abs(y_data[i]-sresulty[i]))

avrd2=sum(sdiffy)/4

 

 

#넘파이로 푸는 정답

import numpy as np

 

data=[[2,0,81],[4,4,93],[6,2,91],[8,3,97]]

x1=np.array([x_row1[0for x_row1 in data], dtype='f')

x2=np.array([x_row2[1for x_row2 in data], dtype='f')

y=np.array([y_row[2for y_row in data], dtype='f')



m_a1=1.2301

m_a2=2.1633

m_b=77.8117

m_y2=m_a1* x1 +m_a2*x2 +m_b

print('다중 선형회귀의 점수 평균 :',m_y2.mean())

m_diff_y = abs(y-m_y2)

print('다중 선형회귀의 오차 평균 :',m_diff_y.mean())

 

s_a1=2.3

s_b=79

s_y1=s_a1*x1 + s_b

print('단순 선형회귀의 점수 평균 :',s_y1.mean())

s_diff_y =abs(y-s_y1)

print('단순 선형회귀의 오차 평균 :',s_diff_y.mean())

 

#다중 선형 회귀 문제

import tensorflow.compat.v1 as tf 

tf.disable_v2_behavior()

import numpy as np

 

data=np.loadtxt('/content/drive/MyDrive/Colab Notebooks/dataset/Blood_fat.csv',delimiter=',')

x1=[x_row1[0for x_row1 in data]

x2=[x_row2[1for x_row2 in data]

y_data=[y_row[2for y_row in data]



a1=tf.Variable(tf.random_uniform([1],0,10,dtype=tf.float64,seed=0))

b=tf.Variable(tf.random_uniform([1],0,100,dtype=tf.float64,seed=0))

 

a2=tf.Variable(tf.random_uniform([1],0,10,dtype=tf.float64,seed=0))



y=a1*x1 + a2*x2 +b

 

rmse=tf.sqrt(tf.reduce_mean(tf.square(y - y_data)))

 

learning_rate = 0.001

 

gradient_decent=tf.train.GradientDescentOptimizer(learning_rate).minimize(rmse)

 

#학습이 진행되는 부분

with tf.Session() as sess:

  sess.run(tf.global_variables_initializer())

 

  for step in range(2001):

    sess.run(gradient_decent)

    if step % 1000 == 0 :

      print('Epoch: %.f,RMSE=%.04f, 기울기 a1=%.4f,기울기 a2=%.4f, y절편 b=%.4f'%(step,sess.run(rmse),sess.run(a1),sess.run(a2),sess.run(b)))


#연습문제 3차원 그래프로 표현하기

 

import tensorflow.compat.v1 as tf 

tf.disable_v2_behavior()

import numpy as np

 

data=np.loadtxt('/content/drive/MyDrive/Colab Notebooks/dataset/Blood_fat.csv',delimiter=',')

x1=[x_row1[0for x_row1 in data]

x2=[x_row2[1for x_row2 in data]

y_data=[y_row[2for y_row in data]



a1=tf.Variable(tf.random_uniform([1],0,10,dtype=tf.float64,seed=0))

b=tf.Variable(tf.random_uniform([1],0,100,dtype=tf.float64,seed=0))

 

a2=tf.Variable(tf.random_uniform([1],0,10,dtype=tf.float64,seed=0))



y=a1*x1 + a2*x2 +b

 

rmse=tf.sqrt(tf.reduce_mean(tf.square(y - y_data)))

 

learning_rate = 0.001

 

gradient_decent=tf.train.GradientDescentOptimizer(learning_rate).minimize(rmse)



epoch_step=2001

with tf.Session() as sess:

  sess.run(tf.global_variables_initializer())

 

  for step in range(epoch_step):

    sess.run(gradient_decent)

    if step % 1000 == 0 :

      print('Epoch: %.f,RMSE=%.04f, 기울기 a1=%.4f,기울기 a2=%.4f, y절편 b=%.4f'%(step,sess.run(rmse),sess.run(a1),sess.run(a2),sess.run(b)))

 

    if step == epoch_step -1:

      #2개의 기울기와 절편을 텐서에서 변수에 할당

      da1=sess.run(a1)

      da2=sess.run(a2)

      db=sess.run(b)

      print(da1)

      print(da2)

      print(db)

      print(type(da1))

calc_y=[]

for i in range(25):

  new_y=(da1*x1[i])+(da2*x2[i])+db

  calc_y.append(new_y)

  print(new_y)

 

import matplotlib.pyplot as plt

from mpl_toolkits.mplot3d import Axes3D

 

#garaph view figure 그래프가그려지는 객체 생성

fig=plt.figure(figsize=(12,12))

 

#전체 공간으 1*1로 잡은 첫번째 ,3d 로 표사, 전체 공간을 나누는 개념

ax=fig.add_subplot(111,projection='3d')

 

#산점도 플롯을 만듦

ax.scatter(x1,x2,y_data)

ax.set_xlabel('Weight')

ax.set_ylabel('Age')

ax.set_zlabel('Bllod Fat')

ax.view_init(15,15#표고와 방위각 지정 (3d 그래프의 보이는 방향)

plt.show()

 

 

#tensorflow 2.x 버젼으로 경사하강법 실습하기

import tensorflow as tf

 

data=[[2,81],[4,93],[6,91],[8,97]]

 

x_train=[x_row[0for x_row in data]

y_train=[y_row[1for y_row in data]

 

w=tf.Variable(tf.random.uniform([1],0,10,dtype=tf.float64,seed=0))

b=tf.Variable(tf.random.uniform([1],0,100,dtype=tf.float64,seed=0))

 

#기울기와 절편을 통해 계산되는 예상 Y값

def hypothesis(w,b):#가설

  return x_train*w+b

def costFunc():#minimize에서 사용 손실(비용)함수

  return tf.sqrt(tf.reduce_mean(tf.square(hypothesis(w,b)-y_train)))

def cost(w,b):#손실을 계산하는 함수

  return tf.sqrt(tf.reduce_mean(tf.square(hypothesis(w,b)-y_train)))

 

opt=tf.keras.optimizers.SGD(learning_rate=0.1)

for i in range(2001):#steps

  opt.minimize(costFunc,var_list=[w,b])

  if i % 100==0:

    print(i,f'{cost(w,b)},{w.numpy()},{b.numpy()}')

 

 

import tensorflow as tf

from datetime import datetime

#### tensorboard 로 도식 보려고 추가한 코드

%load_ext tensorboard

%tensorboard --logdir=logs/mylogs

####tensorboard 로 도식 보려고 추가한 코드

#tensorflow 2.x 버젼으로 경사하강법 실습하기



data=[[2,81],[4,93],[6,91],[8,97]]

 

x_train=[x_row[0for x_row in data]

y_train=[y_row[1for y_row in data]

 

w=tf.Variable(tf.random.uniform([1],0,10,dtype=tf.float64,seed=0))

b=tf.Variable(tf.random.uniform([1],0,100,dtype=tf.float64,seed=0))

 

#기울기와 절편을 통해 계산되는 예상 Y값

def hypothesis(w,b):#가설

  return x_train*w+b

####tensorboard 로 도식 보려고 추가한 코드

@tf.function

###tensorboard 로 도식 보려고 추가한 코드

def costFunc():#minimize에서 사용 손실(비용)함수

  return tf.sqrt(tf.reduce_mean(tf.square(hypothesis(w,b)-y_train)))

def cost(w,b):#손실을 계산하는 함수

  return tf.sqrt(tf.reduce_mean(tf.square(hypothesis(w,b)-y_train)))

 

###tensorboard 로 도식 보려고 추가한 코드

stamp=datetime.now().strftime('%Y%M%d-%H%M%S')

logdir='logs/mylogs/%s'% stamp

writer=tf.summary.create_file_writer(logdir)

tf.summary.trace_on(graph=True,profiler=True)

costFunc()

with writer.as_default():

  tf.summary.trace_export(name='graph_t1',step=0,profiler_outdir=logdir)

  

###tensorboard 로 도식 보려고 추가한 코드

 

opt=tf.keras.optimizers.SGD(learning_rate=0.1)

for i in range(2001):#steps

  opt.minimize(costFunc,var_list=[w,b])

  if i % 100==0:

    print(i,f'{cost(w,b)},{w.numpy()},{b.numpy()}')

 

 

#경사하강법 실습 다중 선형 회귀 tensorflow2.x version

import tensorflow as tf

import numpy as np

%load_ext tensorboard

%tensorboard --logdir=logs/mylogs

 

data=[[2,0,81],[4,4,93],[6,2,91],[8,3,97]]

x1=[x_row1[0for x_row1 in data]

x2=[x_row2[1for x_row2 in data]

y_data=[y_row[2for y_row in data]

 

#기울기의 범위는 0~10사이 t절편은 0~100사이

a1=tf.Variable(tf.random.uniform([1],0,10,dtype=tf.float64,seed=0))

b=tf.Variable(tf.random.uniform([1],0,100,dtype=tf.float64,seed=0))

 

a2=tf.Variable(tf.random.uniform([1],0,10,dtype=tf.float64,seed=0))

 

def Func(a1,a2,b):

  return a1*x1 + a2*x2 +b

@tf.function

def costFunc():#minimize에서 사용 손실(비용)함수

  return tf.sqrt(tf.reduce_mean(tf.square(Func(a1,a2,b)-y_data)))

def cost(a1,a2,b):

  return tf.sqrt(tf.reduce_mean(tf.square(Func(a1,a2,b)-y_data)))

 

stamp=datetime.now().strftime('%Y%M%d-%H%M%S')

logdir='logs/mylogs/%s'% stamp

writer=tf.summary.create_file_writer(logdir)

tf.summary.trace_on(graph=True,profiler=True)

costFunc()

with writer.as_default():

  tf.summary.trace_export(name='graph_t1',step=0,profiler_outdir=logdir)

 

opt=tf.keras.optimizers.SGD(learning_rate=0.1)

for i in range(2001):#steps

  opt.minimize(costFunc, var_list=[a1,a2,b])

  if i % 100==0:

    print(i,f'{cost(a1,a2,b)},{a1.numpy()},{a2.numpy()},{b.numpy()}')