#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys
caffe_root = 'E:/code/caffe/caffe-windows/Build/x64/Release/pycaffe/'
sys.path.insert(0, caffe_root)
import caffe
from caffe import layers as L, params as P
from caffe.proto import caffe_pb2

weight_param = dict(lr_mult=1, decay_mult=1)
bias_param   = dict(lr_mult=2, decay_mult=0)
learned_param = [weight_param, bias_param]

frozen_param = [dict(lr_mult=0)] * 2

def conv_relu(bottom, ks, nout, stride=1, pad=0, group=1,
              weight_filler=dict(type='gaussian', std=0.01),
              bias_filler=dict(type='constant', value=1)):
    conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
                         num_output=nout, pad=pad, group=group,
                         param=param, weight_filler=weight_filler,
    return conv, L.ReLU(conv, in_place=True)

def fc_relu(bottom, nout, param=learned_param,
            weight_filler=dict(type='gaussian', std=0.01),
            bias_filler=dict(type='constant', value=1)):
    fc = L.InnerProduct(bottom, num_output=nout, param=param,
    return fc, L.ReLU(fc, in_place=True)

def max_pool(bottom, ks, stride=1):
    return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)

def caffenet(datapath, label=None, num_classes=4,
             classifier_name='fc6', learn_all=True):
    """Returns a NetSpec specifying CaffeNet, following the original proto text
       specification (./models/bvlc_reference_caffenet/train_val.prototxt)."""
    subset = ['img_train_lmdb', 'img_test_lmdb']
    n = caffe.NetSpec()
    n.data, n.label = L.Data(batch_size=256, backend=P.Data.LMDB, source=datapath + subset[0], include=dict(phase=caffe.TRAIN),
                           transform_param=dict(crop_size = 90, mirror = 1, scale=1./255), ntop=2)
    train_layer = str(n.to_proto())
    n.data, n.label = L.Data(batch_size=256, backend=P.Data.LMDB, source=datapath + subset[1], include=dict(phase=caffe.TEST),
                           transform_param=dict(crop_size = 90, mirror = 1, scale=1./255),ntop=2)
    #n.data = data
    param = learned_param if learn_all else frozen_param
    n.conv1, n.relu1 = conv_relu(n.data, 11, 32, pad=2, stride=4, param=param)#55*55
    n.pool1 = max_pool(n.relu1, 3, stride=2) #28*28

    n.conv2, n.relu2 = conv_relu(n.pool1, 5, 96, pad=2, stride=1, param=param)#24*24
    n.pool2 = max_pool(n.relu2, 3, stride=2) #12*12

    n.conv3, n.relu3 = conv_relu(n.pool2, 5, 128, pad=2, param=param) #10*10
    n.conv4, n.relu4 = conv_relu(n.conv3, 5, 128, pad=2, param=param) #10*10
    n.fc5, n.relu5 = fc_relu(n.conv4, 160, param=param)
    n.drop5 = L.Dropout(n.fc5, dropout_ratio = 0.5, in_place=True)
    # always learn fc8 (param=learned_param)
    fc6 = L.InnerProduct(n.drop5, num_output=num_classes, param=learned_param)
    # give fc8 the name specified by argument `classifier_name`
    n.__setattr__(classifier_name, fc6)
   # n.label = label
    n.accuracy = L.Accuracy(fc6, n.label, include=dict(phase=caffe.TEST)) 
    n.loss = L.SoftmaxWithLoss(fc6, n.label)    
    return train_layer + str(n.to_proto())
    # write the net to a temporary file and return its filename
#    with tempfile.NamedTemporaryFile(delete=False) as f:
#        f.write(str(n.to_proto()))
#        return f.name
def input_data():
  subset = ['train', 'test']
  source = '/home/caffe/examples/images/img_%s_lmdb' % subset[0]
  data, label = L.Data(batch_size=256, backend=P.Data.LMDB, source=source, include=dict(phase=0),
                           transform_param=dict(crop_size = 90, mirror = 1, scale=1./255), ntop=2)
  return caffenet(data,label= label)
def solver(solver_config_path, net_path, base_lr=0.01):
    s = caffe_pb2.SolverParameter()

    # Specify locations of the train and (maybe) test networks.
    s.net = net_path
    s.test_interval = 1000  # Test after every 1000 training iterations.
    s.test_iter.append(100) # Test on 100 batches each time we test.

    # The number of iterations over which to average the gradient.
    # Effectively boosts the training batch size by the given factor, without
    # affecting memory utilization.
    #s.iter_size = 64
    s.max_iter = 10000     # # of times to update the net (training iterations)
    # Solve using the stochastic gradient descent (SGD) algorithm.
    # Other choices include 'Adam' and 'RMSProp'.
    s.type = 'SGD'

    # Set the initial learning rate for SGD.
    s.base_lr = base_lr

    # Set `lr_policy` to define how the learning rate changes during training.
    # Here, we 'step' the learning rate by multiplying it by a factor `gamma`
    # every `stepsize` iterations.
    s.lr_policy = 'step'
    s.gamma = 0.1
    s.stepsize = 2000

    # Set other SGD hyperparameters. Setting a non-zero `momentum` takes a
    # weighted average of the current gradient and previous gradients to make
    # learning more stable. L2 weight decay regularizes learning, to help prevent
    # the model from overfitting.
    s.momentum = 0.9
    s.weight_decay = 5e-3

    # Display the current training loss and accuracy every 1000 iterations.
    s.display = 1000

    # Snapshots are files used to store networks we've trained.  Here, we'll
    # snapshot every 10K iterations -- ten times during training.
    s.snapshot = 5000
    #s.snapshot_prefix = solver_config_path
    s.snapshot_prefix = '/home/caffe/examples/images'
    # Train on the GPU.  Using the CPU to train large networks is very slow.
    s.solver_mode = caffe_pb2.SolverParameter.CPU
    # Write the solver to a temporary file and return its filename.
    with open(solver_config_path, 'w') as f:
#    with tempfile.NamedTemporaryFile(delete=False) as f:
#        f.write(str(s))
        #return f.name
if __name__ == '__main__':
  solver(solver_config_path = 'smile_solver.prototxt',net_path = 'smile_train_val.prototxt')

  with open('smile_train_val.prototxt', 'w') as f:
    f.write('name: smile_net\n')

