『写在前面』
以CTC Beam search decoder为例,简单整理一下TensorFlow实现自定义Op的操作流程。
基本的流程
1. 定义Op接口
#include "tensorflow/core/framework/op.h"
REGISTER_OP("Custom")
.Input("custom_input: int32")
.Output("custom_output: int32");
2. 为Op实现Compute操作(CPU)或实现kernel(GPU)
#include "tensorflow/core/framework/op_kernel.h"
using namespace tensorflow;
class CustomOp : public OpKernel{
public:
explicit CustomOp(OpKernelConstruction* context) : OpKernel(context) {}
void Compute(OpKernelContext* context) override {
// 获取输入 tensor.
const Tensor& input_tensor = context->input(0);
auto input = input_tensor.flat<int32>();
// 创建一个输出 tensor.
Tensor* output_tensor = NULL;
OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(),
&output_tensor));
auto output = output_tensor->template flat<int32>();
//进行具体的运算,操作input和output
//……
}
};
3. 将实现的kernel注册到TensorFlow系统中
REGISTER_KERNEL_BUILDER(Name("Custom").Device(DEVICE_CPU), CustomOp);
CTCBeamSearchDecoder自定义
该Op对应TensorFlow中的源码部分
Op接口的定义:
tensorflow-master/tensorflow/core/ops/ctc_ops.cc
CTCBeamSearchDecoder本身的定义:
tensorflow-master/tensorflow/core/util/ctc/ctc_beam_search.cc
Op-Class的封装与Op注册:
tensorflow-master/tensorflow/core/kernels/ctc_decoder_ops.cc
基于源码修改的Op
#include <algorithm>
#include <vector>
#include <cmath>
#include "tensorflow/core/util/ctc/ctc_beam_search.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/kernels/bounds_check.h"
namespace tf = tensorflow;
using tf::shape_inference::DimensionHandle;
using tf::shape_inference::InferenceContext;
using tf::shape_inference::ShapeHandle;
using namespace tensorflow;
REGISTER_OP("CTCBeamSearchDecoderWithParam")
.Input("inputs: float")
.Input("sequence_length: int32")
.Attr("beam_width: int >= 1")
.Attr("top_paths: int >= 1")
.Attr("merge_repeated: bool = true")
//新添加了两个参数
.Attr("label_selection_size: int >= 0 = 0")
.Attr("label_selection_margin: float")
.Output("decoded_indices: top_paths * int64")
.Output("decoded_values: top_paths * int64")
.Output("decoded_shape: top_paths * int64")
.Output("log_probability: float")
.SetShapeFn([](InferenceContext* c) {
ShapeHandle inputs;
ShapeHandle sequence_length;
TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 3, &inputs));
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &sequence_length));
// Get batch size from inputs and sequence_length.
DimensionHandle batch_size;
TF_RETURN_IF_ERROR(
c->Merge(c->Dim(inputs, 1), c->Dim(sequence_length, 0), &batch_size));
int32 top_paths;
TF_RETURN_IF_ERROR(c->GetAttr("top_paths", &top_paths));
// Outputs.
int out_idx = 0;
for (int i = 0; i < top_paths; ++i) { // decoded_indices
c->set_output(out_idx++, c->Matrix(InferenceContext::kUnknownDim, 2));
}
for (int i = 0; i < top_paths; ++i) { // decoded_values
c->set_output(out_idx++, c->Vector(InferenceContext::kUnknownDim));
}
ShapeHandle shape_v = c->Vector(2);
for (int i = 0; i < top_paths; ++i) { // decoded_shape
c->set_output(out_idx++, shape_v);
}
c->set_output(out_idx++, c->Matrix(batch_size, top_paths));
return Status::OK();
});
typedef Eigen::ThreadPoolDevice CPUDevice;
inline float RowMax(const TTypes<float>::UnalignedConstMatrix& m, int r,
int* c) {
*c = 0;
CHECK_LT(0, m.dimension(1));
float p = m(r, 0);
for (int i = 1; i < m.dimension(1); ++i) {
if (m(r, i) > p) {
p = m(r, i);
*c = i;
}
}
return p;
}
class CTCDecodeHelper {
public:
CTCDecodeHelper() : top_paths_(1) {}
inline int GetTopPaths() const { return top_paths_; }
void SetTopPaths(int tp) { top_paths_ = tp; }
Status ValidateInputsGenerateOutputs(
OpKernelContext* ctx, const Tensor** inputs, const Tensor** seq_len,
Tensor** log_prob, OpOutputList* decoded_indices,
OpOutputList* decoded_values, OpOutputList* decoded_shape) const {
Status status = ctx->input("inputs", inputs);
if (!status.ok()) return status;
status = ctx->input("sequence_length", seq_len);
if (!status.ok()) return status;
const TensorShape& inputs_shape = (*inputs)->shape();
if (inputs_shape.dims() != 3) {
return errors::InvalidArgument("inputs is not a 3-Tensor");
}
const int64 max_time = inputs_shape.dim_size(0);
const int64 batch_size = inputs_shape.dim_size(1);
if (max_time == 0) {
return errors::InvalidArgument("max_time is 0");
}
if (!TensorShapeUtils::IsVector((*seq_len)->shape())) {
return ery}йф}}M=A}IEU%IM}=,4(}Q}}4(}}|4(}IР4(Ё}}}С}4(=A}IEU%IM}=,}MM4(}}}4(}4(
Q
!}ьQ
MM}}|Ё}|ZV6"CFcc
Z*j>V(Ё}}4(Ё}}Q}%M11=]}
=Ae}9}MM%8
Q
MA=4)I%MQI}-I91} U%1H9
Q
MAY%
}
AT4(
Q
MA=4(kj=G"@Z4(rn/ZZ}4(}4(ZU%1Zr*4(4)}4(э}}4((4(t4(l}t4(}t4(t4(鍽4(ь4(ɑ}何4(t4(4(G4("n,4(镰}э}}4(镰}n/R"@э}}4(/kj=4(r/kjZW4(4)}}}э}}4)}}}}4(}}}}}(}}э}}}}4(}}}4(}}4(}}}}4(4(mMQ4(}}}t4(}4(4(B;>?э}}}=4({:kZ?bj3ro>rokkR2 |