import tensorflow as tf

def huber(y_true, y_pred):
  error = y_true - y_pred
  se = tf.abs(error) < 1
  sq_loss = tf.square(error) / 2
  linear_loss = tf.abs(error) - 0.5
  return tf.where(se,sq_loss,linear_loss)

#모델 컴파일
#model.compile(loss=huber, optimizer="nadam")
#model.fit(...)


def create_huber(threshold=1.0):
  def huber(y_true, y_pred):
    error = y_true - y_pred
    se = tf.abs(error) < threshold
    sq_loss = tf.square(error) / 2
    linear_loss = threshold * tf.abs(error) - threshold**2 / 2
    return tf.where(se,sq_loss,linear_loss)
  return huber
#model.compile(loss=create_huber(2.0), optimizer="nadam")

#model = keras.models.load_model("models.h5",
#       custom_objects={"huber": create_huber(2.0)})


#이러한 문제는 keras.losses.Loss 클래스를 상속하여
#get_config() 메소드를 구현해서 해결 할 수 있다.
#아래 코드를 확인하자.


import keras

class HuberLoss(keras.losses.Loss):
  def __init__(self, threshold=1.0, **kwargs):
    self.threshold = threshold
    super().__init__(**kwargs)
  def call(self, y_true , y_pred):
    error = y_true - y_pred
    se = tf.abs(error) < self.threshold
    sq_loss = tf.square(error) / 2
    linear_loss = self.threshold * tf.abs(error) - self.threshold**2 / 2
    return tf.where(se,sq_loss,linear_loss)
  def get_config(self):
    base_config = super().get_config()
    return {**base_config, "threshold" : self.threshold}


#모델을 컴파일 한다면 다음과 같이 할 수 있을 것이다.

#model.compile(loss=HuberLoss(2.), optimizer="nadam")


#model = keras.models.load_model("models_5", 
#         custom_objects={HuberLoss : HuberLoss})


def my_softplus(z):
  return tf.math.log(tf.exp(z) + 1.0)

def my_glorot_initial(shape, dtype=tf.float32):
  stddev = tf.sqrt(2. / (shape[0] + shape[1]))
  return tf.random.normal(shape, stddev=stddev, dtype=dtype)

def my_l1_regular(weights):
  return tf.reduce_sum(tf.abs(0.01 * weights))

def my_positive_weights(weights):
  return tf.where(weights < 0. , tf.zeros_like(weights), weights)

#위의 사용자 정의로 만든 함수들은 layer에서 일반적으로 사용하듯이 사용할 수 있다.

#layer = keras.layers.Dense(30, activation=my_softplus,
#           kernel_initializer = my_glorot_initial,
#           kernel_regularizer = my_l1_regular,
#           kernel_constraint = my_positive_weights )


pre = keras.metrics.Precision()
pre([0,1,1,1],[1,1,1,0])
pre([0,0,0],[1,1,1])

#Precision 객체 안에 배치의 레이블과 실제 예측이 들어간다.
#이때 샘플 가중치를 매개변수로 전달 할 수 있다.
#2번째 줄에서 첫배치를 , 3번째 줄에서 두번째 배치를 처리해
#정밀도를 알려준다.

#정밀도는 이처럼 배치를 거치면서 점점 업데이트 된다.
#이러한 지표를 스트리밍 지표 라고 한다.

pre.result()
#result() 메소드로 현재 지표값을 얻을 수 있다.

pre.variables
#variables 속성으로 변수도 확인 가능하다.

pre.reset_states()
#reset_states 메소드로 변수 초기화도 가능하다.


#위와 같은 스트리밍 지표를 만들고 싶다면 , 
#keras.metrics.Metric 클래스를 상속해 만들 수 있다.

<tf.Tensor: shape=(), dtype=float32, numpy=0.33333334>


exp_layer = keras.layers.Lambda(lambda x: tf.exp(x))


class MyDense(keras.layers.Layer):
  def __init__(self, units, activation=None, **kwargs):
    super().__init__(**kwargs)
    self.units = units
    self.activation = keras.activations.get(activation)

  def build(self, batch_input_shape):
    self.kernel = self.add_weight(
        name = "kernel", shape=[batch_input_shape[-1], self.units],
        initializer="glorot_normal"
    )
    self.bias = self.add_weight(
        name = "bias", shape=[self.units], initializer="zeros"
    )
    super().build(batch_input_shape)

  def call(self, X):
    return self.activation(X @ self.kernel + self.bias)

  def compute_output_shape(self, batch_input_shape):
    return tf.TensorShape(batch_input_shape.as_list()[:-1] + [self.units])

  def get_config(self):
    base_config = super().get_config()
    return {**base_config, "units" : self.units,
            "activation" : keras.activations.serialize(self.activation)}


#수동 미분

w1 , w2 =  5 , 3
eps = 1e-6

def f(w1, w2):
  return 3 * w1 ** 2 + 2 * w1 *w2

#f 함수는 3w1^2 + 2w1w2 이고 이 함수를 미분 즉 도함수를 구한다고 가정
#w1 에 대한 도함수는 6*w1 + 2*w2
#w2 에 대한 도함수는 2*w1

#(5,3)을 대입했을 떄 경사 벡터는 (36,10)

print("w1에 대한 경사 값")
print((f(w1+eps,w2) - f(w1,w2)) / eps)
print("w2에 대한 경사 값")
print((f(w1, w2+eps) - f(w1,w2)) / eps) 

#비슷하게 나오는 것을 알 수 있다.
#이는 f(x+a) - f(x) / (x+a) - x 같은 식으로 a를 작게 만들어서
#limit로 미분하는 방식과 같다.

w1에 대한 경사 값
36.000003007075065
w2에 대한 경사 값
10.000000003174137


#자동 미분

#위의 수동 미분은 미분시 함수 f()를 계속 호출하므로 대규모 Network에서는 사용하기 어렵다.
#텐서플로우 에서는 다음과 같이 자동미분을 구현해 쉽게 미분이 가능하다.

w1, w2 = tf.Variable(5.), tf.Variable(3.)
with tf.GradientTape() as tape:
  z = f(w1 , w2)

gradients = tape.gradient(z, [w1 , w2])
gradients

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]


with tf.GradientTape(persistent=True) as tape:
  z = f(w1 , w2)

gt1 = tape.gradient(z, w1)
print(gt1)
gt2 = tape.gradient(z, w2)
print(gt2)
del tape

tf.Tensor(36.0, shape=(), dtype=float32)
tf.Tensor(10.0, shape=(), dtype=float32)


#기본적으로 tape은 변수가 포함된 연산만 기록한다.
#아래는 Variable을 constant 즉 상수로 두고 한 예시이다.

w1, w2 = tf.constant(5.), tf.constant(3.)
with tf.GradientTape() as tape:
  z = f(w1 , w2)

gradients = tape.gradient(z, [w1 , w2])
gradients

#확인 경과 [None, None] 이 출력됨

[None, None]


#변수가 아닌 것들을 가지고 하려면 다음과 같이 하면 된다.

with tf.GradientTape() as tape:
  tape.watch(w1)
  tape.watch(w2)
  z = f(w1 , w2)

gradients = tape.gradient(z, [w1 , w2])
gradients

#예를 들어 입력이 작은 경우 변동 폭이 큰 activation function에
#대하여 규제 손실을 구현하는 경우 입력은 변수가 아니므로 이런식으로
#구현해야 할 것이다.

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]


#모델을 만들다 보면 경사 역전파를 막아야 하는 경우도 있다.
#이때는 tf.stop_gradient() 함수를 사용한다.
#이 함수는 정방향 계산시 일반적인 식으로 작동하고 , 역전파 시에는
#작동하지 않는다.

w1, w2 = tf.Variable(5.), tf.Variable(3.)
def f(w1, w2):
  return 3 * w1 ** 2 + tf.stop_gradient(2 * w1 *w2)

with tf.GradientTape() as tape:
  z = f(w1 , w2)

gradients = tape.gradient(z, [w1 , w2])
gradients

#30과 None이 나오는 것을 볼 수 있다.

[<tf.Tensor: shape=(), dtype=float32, numpy=30.0>, None]

사용자 정의 손실 함수¶

사용자 정의 요소 모델 저장 & 로드¶

사용자 정의 커스터마이징¶

사용자 정의 지표¶

사용자 정의 층¶

사용자 정의 모델¶

모델 구성요소로 만드는 손실과 지표¶

자동 미분으로 경사 계산하기¶