profile
viewpoint
Faijul Amin mdfaijul Intel Corporation Chandler, AZ, USA

mdfaijul/addons 0

Useful extra functionality for TensorFlow 2.x maintained by SIG-addons

mdfaijul/models 0

Models and examples built with TensorFlow

mdfaijul/nmt 0

TensorFlow Neural Machine Translation Tutorial

mdfaijul/tensorflow 0

An Open Source Machine Learning Framework for Everyone

delete branch Intel-tensorflow/tensorflow

delete branch : amin/inplace-fix

delete time in 17 days

delete branch Intel-tensorflow/tensorflow

delete branch : amin/dnnl-batchmatmul

delete time in 17 days

Pull request review commenttensorflow/tensorflow

[INTEL MKL] Enable DNNL BatchMatMul support with broadcast and update oneDNN to v1.6.4.

 class BatchMatMulMkl : public OpKernel {       return;     } -    auto rhs_reshaped = rhs.template flat_inner_dims<Scalar, 3>();-    auto lhs_reshaped = lhs.template flat_inner_dims<Scalar, 3>();-    auto out_reshaped = out->template flat_inner_dims<Scalar, 3>();-    const uint64 M = lhs_reshaped.dimension(adj_x_ ? 2 : 1);-    const uint64 K = lhs_reshaped.dimension(adj_x_ ? 1 : 2);-    const uint64 N = rhs_reshaped.dimension(adj_y_ ? 1 : 2);--    std::vector<MKL_INT> m_array(batch_size, M);-    std::vector<MKL_INT> n_array(batch_size, N);-    std::vector<MKL_INT> k_array(batch_size, K);-    std::vector<MKL_INT> lda_array(batch_size, adj_x_ ? M : K);-    std::vector<MKL_INT> ldb_array(batch_size, adj_y_ ? K : N);-    std::vector<MKL_INT> ldc_array(batch_size, N);-    std::vector<MKL_INT> group_size(1, batch_size);--    bool bcast_not_supported = false;-#if defined(INTEL_MKL_DNN_ONLY)-    bcast_not_supported = true;-#endif  // INTEL_MKL_DNN_ONLY-    if (std::is_same<Scalar, bfloat16>::value || bcast_not_supported) {-      // DNNL bfloat16 API requires a, b, and c as pointers to tensors-      // represented as flat-byte array.-      const Scalar* a = nullptr;-      const Scalar* b = nullptr;-      Scalar* c = nullptr;-      a = &lhs_reshaped(0, 0, 0);-      b = &rhs_reshaped(0, 0, 0);-      OP_REQUIRES(ctx, !bcast.IsBroadcastingRequired(),-                  errors::Unimplemented("Broadcasting is not supported for "-                                        "_MklBatchMatMul yet."));-      c = &out_reshaped(0, 0, 0);-      // TODO(nhasabni): Use appropriate cast instead of passing addresses of-      // a,b and c.-      MklCblasGemmBatch(CblasRowMajor, adj_x_, adj_y_, m_array, n_array,-                        k_array, &a, lda_array, &b, ldb_array, &c, ldc_array, 1,-                        group_size, ctx);-    } else {-      std::vector<const Scalar*> a_array;-      std::vector<const Scalar*> b_array;-      std::vector<Scalar*> c_array;-      a_array.reserve(batch_size);-      b_array.reserve(batch_size);-      c_array.reserve(batch_size);--      if (!bcast.IsBroadcastingRequired()) {-        for (int64 i = 0; i < batch_size; i++) {-          a_array.push_back(&lhs_reshaped(i, 0, 0));-          b_array.push_back(&rhs_reshaped(i, 0, 0));-          c_array.push_back(&out_reshaped(i, 0, 0));-        }-      } else {-        // Broadcasting is needed, so get the mapping from flattened output-        // batch indices to x's and y's flattened batch indices.-        const std::vector<int64>& a_batch_indices = bcast.x_batch_indices();-        const std::vector<int64>& b_batch_indices = bcast.y_batch_indices();--        for (int64 i = 0; i < batch_size; i++) {-          a_array.push_back(&lhs_reshaped(a_batch_indices[i], 0, 0));-          b_array.push_back(&rhs_reshaped(b_batch_indices[i], 0, 0));-          c_array.push_back(&out_reshaped(i, 0, 0));-        }-      }--      // MKL CBLAS API requires a, b, and c as array of pointers, where each-      // pointer is to 2D matrix.-      MklCblasGemmBatch(CblasRowMajor, adj_x_, adj_y_, m_array, n_array,-                        k_array, &a_array[0], lda_array, &b_array[0], ldb_array,-                        &c_array[0], ldc_array, 1, group_size, ctx);-    }+    // Compute parameters for DNNL matmul primitive.+    auto params = CreateMatMulParams(lhs.shape(), rhs.shape(), out_shape);+    // Create or retrieve matmul primitive from cache.+    MklMatMulPrimitive<Scalar>* matmul_prim =+        MklMatMulPrimitiveFactory<Scalar>::Get(+            *params, false /* value for do_not_cache */);+    // Execute matmul primitive.+    std::shared_ptr<stream> cpu_stream;+    cpu_stream.reset(CreateStream(ctx, matmul_prim->GetEngine()));+    matmul_prim->Execute(lhs.flat<Scalar>().data(), rhs.flat<Scalar>().data(),+                         out->flat<Scalar>().data(), cpu_stream);   }   private:   bool adj_x_;   bool adj_y_;   BatchMatMulV2Op<CPUDevice, Scalar> eigen_batch_mm_v2_; -  void MklCblasGemmBatch(-      const CBLAS_LAYOUT Layout, const bool TransA, const bool TransB,-      const std::vector<MKL_INT>& M_Array, const std::vector<MKL_INT>& N_Array,-      const std::vector<MKL_INT>& K_Array, const float** A_Array,-      const std::vector<MKL_INT>& lda_Array, const float** B_Array,-      const std::vector<MKL_INT>& ldb_Array, float** C_Array,-      const std::vector<MKL_INT>& ldc_Array, const MKL_INT group_count,-      const std::vector<MKL_INT>& group_size, OpKernelContext* ctx) {-#if !defined(INTEL_MKL_DNN_ONLY)-    std::vector<CBLAS_TRANSPOSE> TransA_Array(-        group_size[0], TransA ? CblasTrans : CblasNoTrans);-    std::vector<CBLAS_TRANSPOSE> TransB_Array(-        group_size[0], TransB ? CblasTrans : CblasNoTrans);-    std::vector<float> alpha_Array(group_size[0], 1.0);-    std::vector<float> beta_Array(group_size[0], 0.0);-    cblas_sgemm_batch(Layout, &TransA_Array[0], &TransB_Array[0], &M_Array[0],-                      &N_Array[0], &K_Array[0], &alpha_Array[0],-                      reinterpret_cast<const float**>(A_Array), &lda_Array[0],-                      reinterpret_cast<const float**>(B_Array), &ldb_Array[0],-                      &beta_Array[0], reinterpret_cast<float**>(C_Array),-                      &ldc_Array[0], group_count, &group_size[0]);-#else-    DCHECK(Layout == CblasRowMajor);-    std::vector<bool> TransA_Array(group_size[0], TransA);-    std::vector<bool> TransB_Array(group_size[0], TransB);-    std::vector<float> alpha_Array(group_size[0], 1.0);-    std::vector<float> beta_Array(group_size[0], 0.0);-    dnnl_gemm_batch<float>(TransA_Array, TransB_Array, M_Array, N_Array,-                           K_Array, alpha_Array, *A_Array, *B_Array, beta_Array,-                           *C_Array, group_count, group_size, ctx);-#endif  // !INTEL_MKL_DNN_ONLY+  using dims = dnnl::memory::dims;++  // This method makes the rank (ndims) of input same as the output by creating+  // new axes to the input. For example, if input shape is [a, b, c, d] and+  // output shape is [e, f, g, h, i, j], then the reshaped input would have a+  // shape of [1, 1, a, b, c, d].+  void ExpandInputDimsToOutputShape(const TensorShape& input_shape,+                                    const TensorShape& output_shape,+                                    dims* reshaped_dims) {+    auto ndims_input = input_shape.dims();+    auto ndims_output = output_shape.dims();+    auto dim_offset = ndims_output - ndims_input;+    DCHECK(dim_offset > 0);+    reshaped_dims->clear();+    reshaped_dims->resize(ndims_output, 1);+    auto input_dims = input_shape.dim_sizes();+    for (int dim_idx = 0; dim_idx < ndims_input; ++dim_idx)+      reshaped_dims->at(dim_idx + dim_offset) = input_dims[dim_idx];   }-// BatchMatMul BFloat16 support only exists in DNNL 1.2 onwards.-#if defined(ENABLE_MKLDNN_V1) && defined(ENABLE_INTEL_MKL_BFLOAT16)-  void MklCblasGemmBatch(-      const CBLAS_LAYOUT Layout, const bool TransA, const bool TransB,-      const std::vector<MKL_INT>& M_Array, const std::vector<MKL_INT>& N_Array,-      const std::vector<MKL_INT>& K_Array, const bfloat16** A_Array,-      const std::vector<MKL_INT>& lda_Array, const bfloat16** B_Array,-      const std::vector<MKL_INT>& ldb_Array, bfloat16** C_Array,-      const std::vector<MKL_INT>& ldc_Array, const MKL_INT group_count,-      const std::vector<MKL_INT>& group_size, OpKernelContext* ctx) {-    DCHECK(Layout == CblasRowMajor);-    std::vector<bool> TransA_Array(group_size[0], TransA);-    std::vector<bool> TransB_Array(group_size[0], TransB);-    std::vector<float> alpha_Array(group_size[0], 1.0);-    std::vector<float> beta_Array(group_size[0], 0.0);-    // TODO(nhasabni): Remove *A when we pass a, b, and c correctly.-    // MKLDNN API does not require lda, ldb, and ldc.-    dnnl_gemm_batch<bfloat16>(-        TransA_Array, TransB_Array, M_Array, N_Array, K_Array, alpha_Array,-        *A_Array, *B_Array, beta_Array, *C_Array, group_count, group_size, ctx);++  std::unique_ptr<MklMatMulParams> CreateMatMulParams(+      const TensorShape& lhs_shape, const TensorShape& rhs_shape,+      const TensorShape& out_shape) {+    const auto ndims_lhs = lhs_shape.dims();+    const auto ndims_rhs = rhs_shape.dims();+    const auto ndims_out = out_shape.dims();+    auto lhs_dims = TFShapeToMklDnnDims(lhs_shape);+    auto rhs_dims = TFShapeToMklDnnDims(rhs_shape);+    auto out_dims = TFShapeToMklDnnDims(out_shape);++    // DNNL matmul_primitive requires ranks of inputs and output to be same.+    // Create dnnl::memory::dims for inputs and output of same rank.+    // It is assumed here that MatMulBCast object creates output_batch_shape as+    // a conforming superset of input batch shapes, i.e., ndims_out >=+    // ndims_lhs and ndims_out >= ndims_lhs.

Done

mdfaijul

comment created time in 22 days

PullRequestReviewEvent
PullRequestReviewEvent

Pull request review commenttensorflow/tensorflow

[INTEL MKL] Enable DNNL BatchMatMul support with broadcast and update oneDNN to v1.6.4.

 class BatchMatMulMkl : public OpKernel {       return;     } -    auto rhs_reshaped = rhs.template flat_inner_dims<Scalar, 3>();-    auto lhs_reshaped = lhs.template flat_inner_dims<Scalar, 3>();-    auto out_reshaped = out->template flat_inner_dims<Scalar, 3>();-    const uint64 M = lhs_reshaped.dimension(adj_x_ ? 2 : 1);-    const uint64 K = lhs_reshaped.dimension(adj_x_ ? 1 : 2);-    const uint64 N = rhs_reshaped.dimension(adj_y_ ? 1 : 2);--    std::vector<MKL_INT> m_array(batch_size, M);-    std::vector<MKL_INT> n_array(batch_size, N);-    std::vector<MKL_INT> k_array(batch_size, K);-    std::vector<MKL_INT> lda_array(batch_size, adj_x_ ? M : K);-    std::vector<MKL_INT> ldb_array(batch_size, adj_y_ ? K : N);-    std::vector<MKL_INT> ldc_array(batch_size, N);-    std::vector<MKL_INT> group_size(1, batch_size);--    bool bcast_not_supported = false;-#if defined(INTEL_MKL_DNN_ONLY)-    bcast_not_supported = true;-#endif  // INTEL_MKL_DNN_ONLY-    if (std::is_same<Scalar, bfloat16>::value || bcast_not_supported) {-      // DNNL bfloat16 API requires a, b, and c as pointers to tensors-      // represented as flat-byte array.-      const Scalar* a = nullptr;-      const Scalar* b = nullptr;-      Scalar* c = nullptr;-      a = &lhs_reshaped(0, 0, 0);-      b = &rhs_reshaped(0, 0, 0);-      OP_REQUIRES(ctx, !bcast.IsBroadcastingRequired(),-                  errors::Unimplemented("Broadcasting is not supported for "-                                        "_MklBatchMatMul yet."));-      c = &out_reshaped(0, 0, 0);-      // TODO(nhasabni): Use appropriate cast instead of passing addresses of-      // a,b and c.-      MklCblasGemmBatch(CblasRowMajor, adj_x_, adj_y_, m_array, n_array,-                        k_array, &a, lda_array, &b, ldb_array, &c, ldc_array, 1,-                        group_size, ctx);-    } else {-      std::vector<const Scalar*> a_array;-      std::vector<const Scalar*> b_array;-      std::vector<Scalar*> c_array;-      a_array.reserve(batch_size);-      b_array.reserve(batch_size);-      c_array.reserve(batch_size);--      if (!bcast.IsBroadcastingRequired()) {-        for (int64 i = 0; i < batch_size; i++) {-          a_array.push_back(&lhs_reshaped(i, 0, 0));-          b_array.push_back(&rhs_reshaped(i, 0, 0));-          c_array.push_back(&out_reshaped(i, 0, 0));-        }-      } else {-        // Broadcasting is needed, so get the mapping from flattened output-        // batch indices to x's and y's flattened batch indices.-        const std::vector<int64>& a_batch_indices = bcast.x_batch_indices();-        const std::vector<int64>& b_batch_indices = bcast.y_batch_indices();--        for (int64 i = 0; i < batch_size; i++) {-          a_array.push_back(&lhs_reshaped(a_batch_indices[i], 0, 0));-          b_array.push_back(&rhs_reshaped(b_batch_indices[i], 0, 0));-          c_array.push_back(&out_reshaped(i, 0, 0));-        }-      }--      // MKL CBLAS API requires a, b, and c as array of pointers, where each-      // pointer is to 2D matrix.-      MklCblasGemmBatch(CblasRowMajor, adj_x_, adj_y_, m_array, n_array,-                        k_array, &a_array[0], lda_array, &b_array[0], ldb_array,-                        &c_array[0], ldc_array, 1, group_size, ctx);-    }+    // Compute parameters for DNNL matmul primitive.+    auto params = CreateMatMulParams(lhs.shape(), rhs.shape(), out_shape);+    // Create or retrieve matmul primitive from cache.+    MklMatMulPrimitive<Scalar>* matmul_prim =+        MklMatMulPrimitiveFactory<Scalar>::Get(+            *params, false /* value for do_not_cache */);+    // Execute matmul primitive.+    std::shared_ptr<stream> cpu_stream;+    cpu_stream.reset(CreateStream(ctx, matmul_prim->GetEngine()));+    matmul_prim->Execute(lhs.flat<Scalar>().data(), rhs.flat<Scalar>().data(),+                         out->flat<Scalar>().data(), cpu_stream);   }   private:   bool adj_x_;   bool adj_y_;   BatchMatMulV2Op<CPUDevice, Scalar> eigen_batch_mm_v2_; -  void MklCblasGemmBatch(-      const CBLAS_LAYOUT Layout, const bool TransA, const bool TransB,-      const std::vector<MKL_INT>& M_Array, const std::vector<MKL_INT>& N_Array,-      const std::vector<MKL_INT>& K_Array, const float** A_Array,-      const std::vector<MKL_INT>& lda_Array, const float** B_Array,-      const std::vector<MKL_INT>& ldb_Array, float** C_Array,-      const std::vector<MKL_INT>& ldc_Array, const MKL_INT group_count,-      const std::vector<MKL_INT>& group_size, OpKernelContext* ctx) {-#if !defined(INTEL_MKL_DNN_ONLY)-    std::vector<CBLAS_TRANSPOSE> TransA_Array(-        group_size[0], TransA ? CblasTrans : CblasNoTrans);-    std::vector<CBLAS_TRANSPOSE> TransB_Array(-        group_size[0], TransB ? CblasTrans : CblasNoTrans);-    std::vector<float> alpha_Array(group_size[0], 1.0);-    std::vector<float> beta_Array(group_size[0], 0.0);-    cblas_sgemm_batch(Layout, &TransA_Array[0], &TransB_Array[0], &M_Array[0],-                      &N_Array[0], &K_Array[0], &alpha_Array[0],-                      reinterpret_cast<const float**>(A_Array), &lda_Array[0],-                      reinterpret_cast<const float**>(B_Array), &ldb_Array[0],-                      &beta_Array[0], reinterpret_cast<float**>(C_Array),-                      &ldc_Array[0], group_count, &group_size[0]);-#else-    DCHECK(Layout == CblasRowMajor);-    std::vector<bool> TransA_Array(group_size[0], TransA);-    std::vector<bool> TransB_Array(group_size[0], TransB);-    std::vector<float> alpha_Array(group_size[0], 1.0);-    std::vector<float> beta_Array(group_size[0], 0.0);-    dnnl_gemm_batch<float>(TransA_Array, TransB_Array, M_Array, N_Array,-                           K_Array, alpha_Array, *A_Array, *B_Array, beta_Array,-                           *C_Array, group_count, group_size, ctx);-#endif  // !INTEL_MKL_DNN_ONLY+  using dims = dnnl::memory::dims;++  // This method makes the rank (ndims) of input same as the output by creating+  // new axes to the input. For example, if input shape is [a, b, c, d] and

Done

mdfaijul

comment created time in 22 days

Pull request review commenttensorflow/tensorflow

[INTEL MKL] Enable DNNL BatchMatMul support with broadcast and update oneDNN to v1.6.4.

 class BatchMatMulMkl : public OpKernel {      out_shape.AddDim(lhs_rows);     out_shape.AddDim(rhs_cols);+    // The maximum number of dimensions for a tensor in DNNL is 12.+    OP_REQUIRES(ctx, out_shape.dims() <= 12,+                errors::InvalidArgument(+                    "Rank of output tensor is required as <= 12, ", "but is ",

Done

mdfaijul

comment created time in 22 days

PullRequestReviewEvent

Pull request review commenttensorflow/tensorflow

[INTEL MKL] Enable DNNL BatchMatMul support with broadcast and update oneDNN to v1.6.4.

 limitations under the License.  // See docs in ../ops/math_ops.cc. -// This file uses both oneDNN and MKL CBLAS batched xGEMM for acceleration of-// Batch Matrix-Matrix Multiplication (MatMul) operations.-// We currently register this kernel only for oneDNN supported data-// types (float, bfloat16). This file can be built with and without the use of-// the binary MKL CBLAS calls, controlled by the macro INTEL_MKL_DNN_ONLY.-// If INTEL_MKL_DNN_ONLY is defined, only oneDNN is used. For cases not-// supported by oneDNN (ex. Batchmatmul with broadcasting) we fall back to the-// default CPU implementation.-// if INTEL_MKL_DNN_ONLY is not defined, both oneDNN and MKL CBLAS-// implementations are used. This is only temporary, once we are able handle all-// cases with oneDNN, CBLAS calls will be removed.+// This file uses oneDNN library for acceleration of Batch Matrix-Matrix+// Multiplication (MatMul) operations. We currently register this kernel only+// for oneDNN supported data types (float, bfloat16). The maximum number of+// dimensions (rank) for output tensor is 12 in oneDNN. If output tensor rank+// exceeds 12, we fallback to Eigen library based kernel.

Done.

mdfaijul

comment created time in 22 days

PullRequestReviewEvent

pull request commenttensorflow/tensorflow

[INTEL MKL] Enable DNNL BatchMatMul support with broadcast and update oneDNN to v1.6.4.

@penpornk Thanks for the review and great suggestions. I have addressed them. Please check.

mdfaijul

comment created time in 22 days

push eventIntel-tensorflow/tensorflow

mdfaijul

commit sha ce41ea78005d839d25b2b4a7da03b40f3545352a

Addressed review comments.

view details

push time in 22 days

pull request commenttensorflow/tensorflow

[INTEL MKL] Bug-fix to in-place computation with tensor forwarding.

@penpornk Thanks a lot for the valuable comments. I have addressed them. Please check.

mdfaijul

comment created time in 22 days

Pull request review commenttensorflow/tensorflow

[INTEL MKL] Bug-fix to in-place computation with tensor forwarding.

 def Test(self):   return Test  +class FusedConv2DTest(test.TestCase):++  def _CreateNumpyTensor(self, shape):+    total_size = 1+    for s in shape:+      total_size *= s+    return np.arange(1, total_size + 1, dtype=np.float32).reshape(shape)++  def _CreateConv2D(self, input_values, filters,+                    strides=[1, 1], padding="SAME"):+    return nn_ops.convolution(+              input_values,+              filters,+              strides=strides,+              padding=padding)++  @test_util.deprecated_graph_mode_only+  def testAddWithRefCountOne(self):+    expected_output = [+        113377, 125570, 77305, 86738, 19433, 22226, 60681,+        70722, 36291, 43718, 7143, 9206, 9785, 12098,+        4783, 6366, 779, 1134]+    tensor_in_sizes = [1, 3, 3, 2]+    filter_in_sizes = [2, 2, 2, 2]+    bias_in_sizes = [2]++    x = self._CreateNumpyTensor(tensor_in_sizes)+    filter_in = self._CreateNumpyTensor(filter_in_sizes)+    bias_in = self._CreateNumpyTensor(bias_in_sizes)+    # To get different weights for filter+    ofs = 1++    conv1 = self._CreateConv2D(x, filter_in)+    conv2 = self._CreateConv2D(conv1, filter_in + ofs)++    conv = self._CreateConv2D(conv1, filter_in - ofs)+    bias_add = nn_ops.bias_add(conv, bias_in)+    add = math_ops.add_n([bias_add, conv2])++    self.assertAllEqual(+        np.rint(expected_output),+        self.evaluate(add).reshape(-1))++  @test_util.deprecated_graph_mode_only+  def testAddWithRefCountTwoAndRunAddLast(self):+    expected_output = [+        1.907175e+06, 2.253505e+06, 7.809210e+05, 9.537180e+05,+        1.184170e+05, 1.523070e+05, 5.367010e+05, 6.803700e+05,+        1.867090e+05, 2.529460e+05, 2.362300e+04, 3.522600e+04,+        5.121700e+04, 7.168300e+04, 1.494300e+04, 2.347400e+04,+        1.558000e+03, 2.903000e+03]+    tensor_in_sizes = [1, 3, 3, 2]+    filter_in_sizes = [2, 2, 2, 2]+    bias_in_sizes = [2]++    x = self._CreateNumpyTensor(tensor_in_sizes)+    filter_in = self._CreateNumpyTensor(filter_in_sizes)+    bias_in = self._CreateNumpyTensor(bias_in_sizes)+    # To get different weights for filter+    ofs = 1++    conv1 = self._CreateConv2D(x, filter_in)+    conv2 = self._CreateConv2D(conv1, filter_in + ofs)++    conv = self._CreateConv2D(conv2, filter_in - ofs)+    bias_add = nn_ops.bias_add(conv, bias_in)+    add = math_ops.add_n([bias_add, conv1])++    self.assertAllEqual(+        np.rint(expected_output),+        self.evaluate(add).reshape(-1))++  @test_util.deprecated_graph_mode_only+  def testAddWithRefCountTwoAndRunAddFirst(self):+    expected_output = [+        176161, 194450, 120673, 134822, 30545, 34734, 96041,+        111102, 58149, 69289, 11745, 14839, 15833, 19302,+        7965, 10339, 1345, 1877]+    tensor_in_sizes = [1, 3, 3, 2]+    filter_in_sizes = [2, 2, 2, 2]+    bias_in_sizes = [2]++    x = self._CreateNumpyTensor(tensor_in_sizes)+    filter_in = self._CreateNumpyTensor(filter_in_sizes)+    bias_in = self._CreateNumpyTensor(bias_in_sizes)+    # To get different weights for filter+    ofs = 1++    conv1 = self._CreateConv2D(x, filter_in)+    conv2 = self._CreateConv2D(conv1, filter_in + ofs)++    conv = self._CreateConv2D(conv1, filter_in - ofs)+    bias_add = nn_ops.bias_add(conv, bias_in)+    add = math_ops.add_n([bias_add, conv2])++    relu = nn_ops.relu(add)+    output = math_ops.add_n([relu, conv2])++    self.assertAllEqual(+        np.rint(expected_output),+        self.evaluate(output).reshape(-1))++  @test_util.deprecated_graph_mode_only+  def testAddWithRefCountTwoAndNoDependence(self):+    expected_output = [+        176161, 194450, 120673, 134822, 30545, 34734, 96041,+        111102, 58149, 69289, 11745, 14839, 15833, 19302,+        7965, 10339, 1345, 1877]+    tensor_in_sizes = [1, 3, 3, 2]+    filter_in_sizes = [2, 2, 2, 2]+    bias_in_sizes = [2]++    x = self._CreateNumpyTensor(tensor_in_sizes)+    filter_in = self._CreateNumpyTensor(filter_in_sizes)+    bias_in = self._CreateNumpyTensor(bias_in_sizes)+    # To get different weights for filter+    ofs = 1++    conv1 = self._CreateConv2D(x, filter_in)+    conv2 = self._CreateConv2D(conv1, filter_in + ofs)++    conv = self._CreateConv2D(conv1, filter_in - ofs)+    bias_add = nn_ops.bias_add(conv, bias_in)+    add = math_ops.add_n([bias_add, conv2])++    relu1 = nn_ops.relu(add)+    relu2 = nn_ops.relu(conv2)+    output = math_ops.add_n([relu1, relu2])++    self.assertAllEqual(+        np.rint(expected_output),+        self.evaluate(output).reshape(-1))+++  @test_util.deprecated_graph_mode_only+  def testAddWithSameSrcAndAddTensorBuffer(self):

Done

mdfaijul

comment created time in 22 days

PullRequestReviewEvent

Pull request review commenttensorflow/tensorflow

[INTEL MKL] Bug-fix to in-place computation with tensor forwarding.

 def Test(self):   return Test  +class FusedConv2DTest(test.TestCase):++  def _CreateNumpyTensor(self, shape):+    total_size = 1+    for s in shape:+      total_size *= s+    return np.arange(1, total_size + 1, dtype=np.float32).reshape(shape)++  def _CreateConv2D(self, input_values, filters,+                    strides=[1, 1], padding="SAME"):+    return nn_ops.convolution(+              input_values,+              filters,+              strides=strides,+              padding=padding)++  @test_util.deprecated_graph_mode_only+  def testAddWithRefCountOne(self):+    expected_output = [+        113377, 125570, 77305, 86738, 19433, 22226, 60681,+        70722, 36291, 43718, 7143, 9206, 9785, 12098,+        4783, 6366, 779, 1134]+    tensor_in_sizes = [1, 3, 3, 2]+    filter_in_sizes = [2, 2, 2, 2]+    bias_in_sizes = [2]++    x = self._CreateNumpyTensor(tensor_in_sizes)+    filter_in = self._CreateNumpyTensor(filter_in_sizes)+    bias_in = self._CreateNumpyTensor(bias_in_sizes)+    # To get different weights for filter+    ofs = 1++    conv1 = self._CreateConv2D(x, filter_in)+    conv2 = self._CreateConv2D(conv1, filter_in + ofs)++    conv = self._CreateConv2D(conv1, filter_in - ofs)+    bias_add = nn_ops.bias_add(conv, bias_in)+    add = math_ops.add_n([bias_add, conv2])++    self.assertAllEqual(+        np.rint(expected_output),+        self.evaluate(add).reshape(-1))++  @test_util.deprecated_graph_mode_only+  def testAddWithRefCountTwoAndRunAddLast(self):+    expected_output = [+        1.907175e+06, 2.253505e+06, 7.809210e+05, 9.537180e+05,+        1.184170e+05, 1.523070e+05, 5.367010e+05, 6.803700e+05,+        1.867090e+05, 2.529460e+05, 2.362300e+04, 3.522600e+04,+        5.121700e+04, 7.168300e+04, 1.494300e+04, 2.347400e+04,+        1.558000e+03, 2.903000e+03]+    tensor_in_sizes = [1, 3, 3, 2]+    filter_in_sizes = [2, 2, 2, 2]+    bias_in_sizes = [2]++    x = self._CreateNumpyTensor(tensor_in_sizes)+    filter_in = self._CreateNumpyTensor(filter_in_sizes)+    bias_in = self._CreateNumpyTensor(bias_in_sizes)+    # To get different weights for filter+    ofs = 1++    conv1 = self._CreateConv2D(x, filter_in)+    conv2 = self._CreateConv2D(conv1, filter_in + ofs)++    conv = self._CreateConv2D(conv2, filter_in - ofs)+    bias_add = nn_ops.bias_add(conv, bias_in)+    add = math_ops.add_n([bias_add, conv1])++    self.assertAllEqual(+        np.rint(expected_output),+        self.evaluate(add).reshape(-1))++  @test_util.deprecated_graph_mode_only+  def testAddWithRefCountTwoAndRunAddFirst(self):+    expected_output = [+        176161, 194450, 120673, 134822, 30545, 34734, 96041,+        111102, 58149, 69289, 11745, 14839, 15833, 19302,+        7965, 10339, 1345, 1877]+    tensor_in_sizes = [1, 3, 3, 2]+    filter_in_sizes = [2, 2, 2, 2]+    bias_in_sizes = [2]++    x = self._CreateNumpyTensor(tensor_in_sizes)+    filter_in = self._CreateNumpyTensor(filter_in_sizes)+    bias_in = self._CreateNumpyTensor(bias_in_sizes)+    # To get different weights for filter+    ofs = 1++    conv1 = self._CreateConv2D(x, filter_in)+    conv2 = self._CreateConv2D(conv1, filter_in + ofs)++    conv = self._CreateConv2D(conv1, filter_in - ofs)+    bias_add = nn_ops.bias_add(conv, bias_in)+    add = math_ops.add_n([bias_add, conv2])++    relu = nn_ops.relu(add)+    output = math_ops.add_n([relu, conv2])++    self.assertAllEqual(+        np.rint(expected_output),+        self.evaluate(output).reshape(-1))++  @test_util.deprecated_graph_mode_only+  def testAddWithRefCountTwoAndNoDependence(self):

Done

mdfaijul

comment created time in 22 days

PullRequestReviewEvent

Pull request review commenttensorflow/tensorflow

[INTEL MKL] Bug-fix to in-place computation with tensor forwarding.

 def Test(self):   return Test  +class FusedConv2DTest(test.TestCase):++  def _CreateNumpyTensor(self, shape):+    total_size = 1+    for s in shape:+      total_size *= s+    return np.arange(1, total_size + 1, dtype=np.float32).reshape(shape)++  def _CreateConv2D(self, input_values, filters,+                    strides=[1, 1], padding="SAME"):+    return nn_ops.convolution(+              input_values,+              filters,+              strides=strides,+              padding=padding)++  @test_util.deprecated_graph_mode_only+  def testAddWithRefCountOne(self):+    expected_output = [+        113377, 125570, 77305, 86738, 19433, 22226, 60681,+        70722, 36291, 43718, 7143, 9206, 9785, 12098,+        4783, 6366, 779, 1134]+    tensor_in_sizes = [1, 3, 3, 2]+    filter_in_sizes = [2, 2, 2, 2]+    bias_in_sizes = [2]++    x = self._CreateNumpyTensor(tensor_in_sizes)+    filter_in = self._CreateNumpyTensor(filter_in_sizes)+    bias_in = self._CreateNumpyTensor(bias_in_sizes)+    # To get different weights for filter+    ofs = 1++    conv1 = self._CreateConv2D(x, filter_in)+    conv2 = self._CreateConv2D(conv1, filter_in + ofs)++    conv = self._CreateConv2D(conv1, filter_in - ofs)+    bias_add = nn_ops.bias_add(conv, bias_in)+    add = math_ops.add_n([bias_add, conv2])++    self.assertAllEqual(+        np.rint(expected_output),+        self.evaluate(add).reshape(-1))++  @test_util.deprecated_graph_mode_only+  def testAddWithRefCountTwoAndRunAddLast(self):+    expected_output = [+        1.907175e+06, 2.253505e+06, 7.809210e+05, 9.537180e+05,+        1.184170e+05, 1.523070e+05, 5.367010e+05, 6.803700e+05,+        1.867090e+05, 2.529460e+05, 2.362300e+04, 3.522600e+04,+        5.121700e+04, 7.168300e+04, 1.494300e+04, 2.347400e+04,+        1.558000e+03, 2.903000e+03]+    tensor_in_sizes = [1, 3, 3, 2]+    filter_in_sizes = [2, 2, 2, 2]+    bias_in_sizes = [2]++    x = self._CreateNumpyTensor(tensor_in_sizes)+    filter_in = self._CreateNumpyTensor(filter_in_sizes)+    bias_in = self._CreateNumpyTensor(bias_in_sizes)+    # To get different weights for filter+    ofs = 1++    conv1 = self._CreateConv2D(x, filter_in)+    conv2 = self._CreateConv2D(conv1, filter_in + ofs)++    conv = self._CreateConv2D(conv2, filter_in - ofs)+    bias_add = nn_ops.bias_add(conv, bias_in)+    add = math_ops.add_n([bias_add, conv1])++    self.assertAllEqual(+        np.rint(expected_output),+        self.evaluate(add).reshape(-1))++  @test_util.deprecated_graph_mode_only+  def testAddWithRefCountTwoAndRunAddFirst(self):

Done

mdfaijul

comment created time in 22 days

PullRequestReviewEvent

Pull request review commenttensorflow/tensorflow

[INTEL MKL] Bug-fix to in-place computation with tensor forwarding.

 def Test(self):   return Test  +class FusedConv2DTest(test.TestCase):++  def _CreateNumpyTensor(self, shape):+    total_size = 1+    for s in shape:+      total_size *= s+    return np.arange(1, total_size + 1, dtype=np.float32).reshape(shape)++  def _CreateConv2D(self, input_values, filters,+                    strides=[1, 1], padding="SAME"):+    return nn_ops.convolution(+              input_values,+              filters,+              strides=strides,+              padding=padding)++  @test_util.deprecated_graph_mode_only+  def testAddWithRefCountOne(self):+    expected_output = [+        113377, 125570, 77305, 86738, 19433, 22226, 60681,+        70722, 36291, 43718, 7143, 9206, 9785, 12098,+        4783, 6366, 779, 1134]+    tensor_in_sizes = [1, 3, 3, 2]+    filter_in_sizes = [2, 2, 2, 2]+    bias_in_sizes = [2]++    x = self._CreateNumpyTensor(tensor_in_sizes)+    filter_in = self._CreateNumpyTensor(filter_in_sizes)+    bias_in = self._CreateNumpyTensor(bias_in_sizes)+    # To get different weights for filter+    ofs = 1++    conv1 = self._CreateConv2D(x, filter_in)+    conv2 = self._CreateConv2D(conv1, filter_in + ofs)++    conv = self._CreateConv2D(conv1, filter_in - ofs)+    bias_add = nn_ops.bias_add(conv, bias_in)+    add = math_ops.add_n([bias_add, conv2])++    self.assertAllEqual(+        np.rint(expected_output),+        self.evaluate(add).reshape(-1))++  @test_util.deprecated_graph_mode_only+  def testAddWithRefCountTwoAndRunAddLast(self):

Done

mdfaijul

comment created time in 22 days

PullRequestReviewEvent

Pull request review commenttensorflow/tensorflow

[INTEL MKL] Bug-fix to in-place computation with tensor forwarding.

 def Test(self):   return Test  +class FusedConv2DTest(test.TestCase):++  def _CreateNumpyTensor(self, shape):+    total_size = 1+    for s in shape:+      total_size *= s+    return np.arange(1, total_size + 1, dtype=np.float32).reshape(shape)++  def _CreateConv2D(self, input_values, filters,+                    strides=[1, 1], padding="SAME"):+    return nn_ops.convolution(+              input_values,+              filters,+              strides=strides,+              padding=padding)++  @test_util.deprecated_graph_mode_only+  def testAddWithRefCountOne(self):+    expected_output = [

Done

mdfaijul

comment created time in 22 days

PullRequestReviewEvent

Pull request review commenttensorflow/tensorflow

[INTEL MKL] Bug-fix to in-place computation with tensor forwarding.

 def Test(self):   return Test  +class FusedConv2DTest(test.TestCase):++  def _CreateNumpyTensor(self, shape):+    total_size = 1+    for s in shape:+      total_size *= s+    return np.arange(1, total_size + 1, dtype=np.float32).reshape(shape)++  def _CreateConv2D(self, input_values, filters,+                    strides=[1, 1], padding="SAME"):+    return nn_ops.convolution(+              input_values,+              filters,+              strides=strides,+              padding=padding)++  @test_util.deprecated_graph_mode_only+  def testAddWithRefCountOne(self):+    expected_output = [+        113377, 125570, 77305, 86738, 19433, 22226, 60681,+        70722, 36291, 43718, 7143, 9206, 9785, 12098,+        4783, 6366, 779, 1134]+    tensor_in_sizes = [1, 3, 3, 2]+    filter_in_sizes = [2, 2, 2, 2]+    bias_in_sizes = [2]++    x = self._CreateNumpyTensor(tensor_in_sizes)+    filter_in = self._CreateNumpyTensor(filter_in_sizes)+    bias_in = self._CreateNumpyTensor(bias_in_sizes)+    # To get different weights for filter+    ofs = 1

Done

mdfaijul

comment created time in 22 days

PullRequestReviewEvent

Pull request review commenttensorflow/tensorflow

[INTEL MKL] Bug-fix to in-place computation with tensor forwarding.

 def Test(self):   return Test  +class FusedConv2DTest(test.TestCase):++  def _CreateNumpyTensor(self, shape):+    total_size = 1+    for s in shape:+      total_size *= s+    return np.arange(1, total_size + 1, dtype=np.float32).reshape(shape)++  def _CreateConv2D(self, input_values, filters,+                    strides=[1, 1], padding="SAME"):+    return nn_ops.convolution(+              input_values,+              filters,+              strides=strides,+              padding=padding)++  @test_util.deprecated_graph_mode_only

Done

mdfaijul

comment created time in 22 days

PullRequestReviewEvent

Pull request review commenttensorflow/tensorflow

[INTEL MKL] Bug-fix to in-place computation with tensor forwarding.

 def Test(self):   return Test  +class FusedConv2DTest(test.TestCase):++  def _CreateNumpyTensor(self, shape):+    total_size = 1+    for s in shape:+      total_size *= s+    return np.arange(1, total_size + 1, dtype=np.float32).reshape(shape)

Done

mdfaijul

comment created time in 22 days

PullRequestReviewEvent

Pull request review commenttensorflow/tensorflow

[INTEL MKL] Bug-fix to in-place computation with tensor forwarding.

 inline void SetDummyMklDnnShapeOutput(OpKernelContext* context,   AllocateOutputSetMklShape(context, idx_data_out, mkl_shape_output); } -inline void ForwardMklTensorInToOutWithMklShape(OpKernelContext* context,+// If the input tensor has ref count as 1, it is forwarded to the desired+// output port and the function reutrns true. In that case, it also allocates

Done

mdfaijul

comment created time in 22 days

PullRequestReviewEvent

Pull request review commenttensorflow/tensorflow

[INTEL MKL] Bug-fix to in-place computation with tensor forwarding.

 inline void SetDummyMklDnnShapeOutput(OpKernelContext* context,   AllocateOutputSetMklShape(context, idx_data_out, mkl_shape_output); } -inline void ForwardMklTensorInToOutWithMklShape(OpKernelContext* context,+// If the input tensor has ref count as 1, it is forwarded to the desired+// output port and the function reutrns true. In that case, it also allocates+// the serialized MklDnnShape object. Otherwise, the function returns false.+inline bool ForwardMklTensorInToOutWithMklShape(OpKernelContext* context,                                                 int idx_in, int idx_out,-                                                const MklDnnShape& mkl_shape) {+                                                Tensor** output,+                                                const MklDnnShape& mkl_shape,+                                                bool always_forward = true) {   int num_inputs = context->num_inputs();   int num_outputs = context->num_outputs();   int idx_data_in = GetTensorDataIndex(idx_in, num_inputs);   int idx_data_out = GetTensorDataIndex(idx_out, num_outputs);--  AllocateOutputSetMklShape(context, idx_out, mkl_shape);--  if (IsRefType(context->input_dtype(idx_data_in))) {-    context->forward_ref_input_to_ref_output(idx_data_in, idx_data_out);+  bool is_forwarded = false;+  const Tensor& input_tensor = context->input(idx_data_in);+  const auto output_shape = input_tensor.shape();+  if (always_forward) {+    if (IsRefType(context->input_dtype(idx_data_in))) {+      context->forward_ref_input_to_ref_output(idx_data_in, idx_data_out);+    } else {+      context->set_output(idx_data_out, input_tensor);+    }   } else {-    context->set_output(idx_data_out, context->input(idx_data_in));+    is_forwarded = context->forward_input_to_output_with_shape(+        idx_data_in, idx_data_out, output_shape, output);+  }+  if (is_forwarded || always_forward) {+    AllocateOutputSetMklShape(context, idx_out, mkl_shape);+    return true;+  } else {+    return false;   }

Done.

mdfaijul

comment created time in 22 days

PullRequestReviewEvent

push eventIntel-tensorflow/tensorflow

mdfaijul

commit sha c53f7a7b1b386b4031d27dcd198c203921e9eda8

Addressed comments.

view details

push time in 22 days

PR opened tensorflow/tensorflow

[INTEL MKL] Enable DNNL BatchMatMul support with broadcast.

This PR enables new features from DNNL to perform BatchMatMul with broadcast. Current support is up to rank 12 tensors. Earlier, the BatchMatMul op was using cblas api from Intel Math Kernel Library(MKL).

+101 -224

0 comment

4 changed files

pr created time in a month

push eventIntel-tensorflow/tensorflow

mdfaijul

commit sha 819c4dc8133f138738e0f366b3a2c490c268eb60

Enabled DNNL support for BatchMatMul with broadcast.

view details

push time in a month

pull request commenttensorflow/tensorflow

[INTEL MKL] Enable DNNL BatchMatMul support with broadcast.

Submitted from wrong email.

mdfaijul

comment created time in a month

PR closed tensorflow/tensorflow

[INTEL MKL] Enable DNNL BatchMatMul support with broadcast. cla: no size:L

This PR enables new features from DNNL to perform BatchMatMul with broadcast. Current support is up to rank 12 tensors. Earlier, the BatchMatMul op was using cblas api from Intel Math Kernel Library(MKL).

+101 -224

1 comment

4 changed files

mdfaijul

pr closed time in a month

PR opened tensorflow/tensorflow

[INTEL MKL] Enable DNNL BatchMatMul support with broadcast.

This PR enables new features from DNNL to perform BatchMatMul with broadcast. Current support is up to rank 12 tensors. Earlier, the BatchMatMul op was using cblas api from Intel Math Kernel Library(MKL).

+101 -224

0 comment

4 changed files

pr created time in a month

create barnchIntel-tensorflow/tensorflow

branch : amin/dnnl-batchmatmul

created branch time in a month

PR opened tensorflow/tensorflow

[INTEL MKL] Bug-fix to in-place computation with tensor forwarding.

This PR fixes a bug in a fused operator (conv2d + bias_add + add + relu), wherein one of the inputs to add is used as in-place buffer for output result. Previous code was forcefully forwarding the input to the output, although the input could have more than one fan-outs.

+197 -17

0 comment

3 changed files

pr created time in a month

create barnchIntel-tensorflow/tensorflow

branch : amin/inplace-fix

created branch time in a month

delete branch Intel-tensorflow/tensorflow

delete branch : amin/upgrade-to-dnnl1.2.2

delete time in 2 months

delete branch Intel-tensorflow/tensorflow

delete branch : amin/nchw-nhwc-grappler-part1

delete time in 2 months

delete branch Intel-tensorflow/tensorflow

delete branch : amin/nchw-dataformat-remapper

delete time in 2 months

pull request commenttensorflow/tensorflow

[INTEL MKL] Enable NCHW to NHWC conversion for CPU - part1

@gbaned @ezhulenev @andyly I had some CLA issues in #39760 while I was fixing GPU failure. This PR is identical to old one.

mdfaijul

comment created time in 2 months

push eventIntel-tensorflow/tensorflow

mdfaijul

commit sha 214e762929d88b1210e38706220e580248b2db7d

Style fix.

view details

push time in 2 months

pull request commenttensorflow/tensorflow

[INTEL MKL] Enable NCHW to NHWC conversion for CPU - part1.

@gbaned @ezhulenev When committed a fix to the GPU failure from another machine where git was not setup well, I got CLA issues. The PR #42448 is identical to this closed one except with small fix to the GPU failure. Sorry for the inconvenience.

mdfaijul

comment created time in 2 months

PR opened tensorflow/tensorflow

[INTEL MKL] Enable NCHW to NHWC conversion for CPU - part1

This is an already reviewed PR #39760 with a fix to GPU failure.

This PR enables layout (data format) conversion from NCHW to NHWC on CPU. This is useful (1) when a model is trained on GPU and later doing inference/fine-tuning on CPU and (2) helps quantizing NCHW trained model for CPU.

To make the PR small, we have included a few unit tests in this part1. More unit tests will follow on future PRs.

+186 -91

0 comment

6 changed files

pr created time in 2 months

push eventIntel-tensorflow/tensorflow

mdfaijul

commit sha 898624778b083ddf903b1086aa1fc1e84a438c5b

NCHW to NHWC conversion on CPU.

view details

push time in 2 months

PR closed tensorflow/tensorflow

Reviewers
[INTEL MKL] Enable NCHW to NHWC conversion for CPU - part1. cla: no comp:grappler size:L stat:awaiting response

This PR enables layout (data format) conversion from NCHW to NHWC on CPU. This is useful (1) when a model is trained on GPU and later doing inference/fine-tuning on CPU and (2) helps quantizing NCHW trained model for CPU.

To make the PR small, we have included a few unit tests in this part1. More unit tests will follow on future PRs.

+0 -0

11 comments

0 changed file

mdfaijul

pr closed time in 2 months

push eventIntel-tensorflow/tensorflow

push time in 2 months

push eventIntel-tensorflow/tensorflow

md.faijul.amin

commit sha b0beaea3ad36acbf21b40a49b515689caeda12cf

Fixed test

view details

push time in 2 months

push eventIntel-tensorflow/tensorflow

Andy Ly

commit sha 37a0da627ce2b8075c77d6ef576315af7ed8f179

Fix GuaranteeAllFuncsOneUse to only clone a function after checking if the limit for cloning is reached. This fixes a bug determined via ASAN. PiperOrigin-RevId: 324540320 Change-Id: I607082ed26198f70c33b86267105f5271fcdd270

view details

Yuefeng Zhou

commit sha 151bd5901aad789d309697ea4ea634f430145896

Cancel in-flight closures when there is an error. PiperOrigin-RevId: 324542620 Change-Id: I1d6cddf8130df74f00ce7b0a3b6b84f553990e78

view details

Yuanzhong Xu

commit sha 1cb7ce30b747bc0afb301a4ee3af3dbd0e23be85

Explicitly disable SPMD in TPU strategy. Mirrored variables are not yet supported for SPMD. This is in preparation of turning SPMD by default. PiperOrigin-RevId: 324548129 Change-Id: Ie7adf563402bd5ef31b7759232b1cd8f441586c7

view details

TensorFlower Gardener

commit sha 1e9b9b1568d550e6779d2ddd5d193968254d3029

Merge pull request #40693 from yongtang:40653-tf.math.segment_mean-error-message PiperOrigin-RevId: 324550275 Change-Id: I0e6350d251bdff3603cc9f0ff3957edfce6e0a0f

view details

A. Unique TensorFlower

commit sha d201be6284693c9ac5b93bdccfeeac2524d05239

Minor improvement, saying why the object is invalid. PiperOrigin-RevId: 324556545 Change-Id: Ieb0728e400ff2c220d07a29fecb40c37070d2f08

view details

Alexander Belyaev

commit sha 82126e56ddb0c76330290e09815ec4240bef8bd3

[MLIR][KERNEL_GEN] Legalize TF Framework dialect to LLVM. PiperOrigin-RevId: 324559430 Change-Id: I1685be7f2aace9cf9658fe05574cf957ee67bd37

view details

A. Unique TensorFlower

commit sha c730b889e20569833c50c5aa224a943fc92027b0

Update GraphDef version to 482. PiperOrigin-RevId: 324562958 Change-Id: Ie80c4ff6b968b2281711d8437fcdde7205c22518

view details

A. Unique TensorFlower

commit sha 71329959e563a6278a31d45029b8b4a86642aad2

compat: Update forward compatibility horizon to 2020-08-03 PiperOrigin-RevId: 324562959 Change-Id: I688798867a67903d517e9402be2441a1657c24da

view details

Alexander Belyaev

commit sha 4305e83f8cc65d9bf941b7136859fc9397368da6

[MLIR][KERNEL_GEN] Use TF_FRAMEWORK_TYPE instead of PRIVATE_EXPERIMENTAL_0. This is just a clean up. PiperOrigin-RevId: 324567279 Change-Id: I1a14ff07b60ed24baf94ef8c544f175f88e1dc89

view details

Stephan Herhut

commit sha 123af9ba8605aedeb11c9f1eca0a3e7c64f324c2

Also allow older compute capabilities 32 and 30 when generating ptx. PiperOrigin-RevId: 324575272 Change-Id: I1a9a49edb55b36cb971ad713e070ce1ee58b34fb

view details

A. Unique TensorFlower

commit sha 8e9f3196fd8841de83bd6a622df696ea191d1d78

Added a bunch of unary ops to the estimator. PiperOrigin-RevId: 324607213 Change-Id: I24369f36cc29f68caac412a5d3076f5ef43859fe

view details

Mihai Maruseac

commit sha fbb9c59ab276664ba1a3c09adbe5f2d397c71ea4

Update tensorflow/python/ops/custom_gradient.py

view details

Mihai Maruseac

commit sha 86927dd8ddc90ffc5a9bdfb4f8b48ecf76e8aa44

Update tensorflow/python/ops/custom_gradient.py

view details

Mihai Maruseac

commit sha 84ad9375524def803dc0b1e6470688d893f8cf1b

Update tensorflow/python/ops/custom_gradient.py

view details

Mihai Maruseac

commit sha cad412bfcbaea7952e7758620e3928f64d83be32

Update tensorflow/python/ops/math_ops.py

view details

Andy Ly

commit sha 3ccad31d3963077b9879c9e18921e32213b9c32f

Update copyright in GuaranteeAllFuncsOneUse to be of TensorFlow and update function names to match Google C++ Style Guide (NFC). PiperOrigin-RevId: 324622347 Change-Id: Ib1d19560afa26e4fef197b1694328e26f53ade8e

view details

A. Unique TensorFlower

commit sha 856dc4f7b6b1c4b35142961e432a5fa66c6d1259

Add MLIR definition for StatelessTruncatedNormalOp. PiperOrigin-RevId: 324625125 Change-Id: Ia17f1179c18c509b60427765134c377be3aef403

view details

Francois Chollet

commit sha 59dc165d26caaed925bcfe7b40752b8429d922ea

Enable input spec checking for Functional models. PiperOrigin-RevId: 324625967 Change-Id: Ide0a8cb4d6d7614f86f22088a5ef95d72636c54e

view details

Andy Ly

commit sha 60c8033ebc325550cae779c393f9e4c73108a75e

Add support for token operands to mhlo.tuple. mhlo.get_tuple_element supports extracting a mhlo.token type from a tuple. This updates the creation of tuples to allow for mhlo.token typed operands. PiperOrigin-RevId: 324628663 Change-Id: I18c77aabdfcb2d84ae70d49e85a52d751bc962c2

view details

Robert David

commit sha 74d526257013ee74c34e6e48cd52e650b4bde6ec

Use workgroup-local reductions for MeanStdDevNormalization. PiperOrigin-RevId: 324630237 Change-Id: Ie0fe32a072039809b7b1b51bbeda8665e7f1a5ce

view details

push time in 2 months

pull request commenttensorflow/tensorflow

[INTEL MKL] Enable NCHW to NHWC conversion for CPU - part1.

@gbaned Thanks. I am looking into the error. Will get back to you shortly.

mdfaijul

comment created time in 2 months

pull request commenttensorflow/tensorflow

[INTEL MKL] Enable NCHW to NHWC conversion for CPU - part1.

@ezhulenev @gbaned Did you have chance to look into the updates in response to review comments.

mdfaijul

comment created time in 3 months

more