profile
viewpoint

CODAIT/deep-histopath 85

A deep learning approach to predicting breast tumor proliferation scores for the TUPAC16 challenge

CODAIT/graph_def_editor 12

GraphDef Editor: A port of the TensorFlow contrib.graph_editor package that operates over serialized graphs

feihugis/CF-2 1

Group Repo

feihugis/academicpages.github.io 0

Github Pages template for academic personal websites, forked from mmistakes/minimal-mistakes

feihugis/addons 0

Useful extra functionality for TensorFlow 2.0 maintained by SIG-addons

feihugis/awesome-deep-vision 0

A curated list of deep learning resources for computer vision

feihugis/awesome-nlp 0

:book: A curated list of resources dedicated to Natural Language Processing (NLP)

startedtoddhollon/srh_cnn

started time in 15 hours

issue commenttensorflow/addons

Benchmark activations using different implementations.

TensorFlow-core has some benchmarking as well (e.g. https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/data/benchmarks/list_files_benchmark.py). Maybe we can build a similar module to provide the benchmarking utilities for the operations/kernels/functions in tf-addons.

gabrieldemarmiesse

comment created time in 17 hours

startedAlibabaPAI/DAPPLE

started time in 2 days

push eventIBM/MAX-Nucleus-Segmenter

Brendan Dwyer

commit sha 235babf4338dea4bb078c9c402a22a16912799a6

bump MAX Base

view details

Brendan Dwyer

commit sha e472416002aa0b0669c8832023a46f97f70d2d8d

bump h5py

view details

push time in 3 days

startedhawkinsp/ZTopo

started time in 7 days

Pull request review commenttensorflow/tensorflow

[Features] DLPack functions

+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.+#+# Licensed under the Apache License, Version 2.0 (the "License");+# you may not use this file except in compliance with the License.+# You may obtain a copy of the License at+#+#     http://www.apache.org/licenses/LICENSE-2.0+#+# Unless required by applicable law or agreed to in writing, software+# distributed under the License is distributed on an "AS IS" BASIS,+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.+# See the License for the specific language governing permissions and+# limitations under the License.+# ==============================================================================++from tensorflow.python import pywrap_tfe+from tensorflow.python.util.tf_export import tf_export++@tf_export("dlpack.to_dlpack")+def to_dlpack(tf_tensor):+    return pywrap_tfe.TFE_ToDlpackCapsule(tf_tensor)++@tf_export("dlpack.from_dlpack")+def from_dlpack(dlcapsule):+    return pywrap_tfe.TFE_FromDlpackCapsule(dlcapsule)

nitpick: miss the end line

VoVAllen

comment created time in 7 days

Pull request review commenttensorflow/tensorflow

[Features] DLPack functions

+load("//tensorflow:tensorflow.bzl", "tf_py_test")+load("//tensorflow:tensorflow.bzl", "cuda_py_test")++py_library(+    name = "dlpack",+    srcs = ["dlpack.py"],+    deps = [+    ], +    srcs_version = "PY3",+)++cuda_py_test(+    name = "dlpack_test",+    srcs = ["dlpack_test.py"],+    python_version = "PY3",+    deps = [+        ":dlpack",      +        "//tensorflow/python/eager:test",+        "@absl_py//absl/testing:absltest",+        "@absl_py//absl/testing:parameterized",+    ]+)

nitpick: miss the end line

VoVAllen

comment created time in 7 days

Pull request review commenttensorflow/tensorflow

[Features] DLPack functions

+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.++Licensed under the Apache License, Version 2.0 (the "License");+you may not use this file except in compliance with the License.+You may obtain a copy of the License at++    http://www.apache.org/licenses/LICENSE-2.0++Unless required by applicable law or agreed to in writing, software+distributed under the License is distributed on an "AS IS" BASIS,+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.+See the License for the specific language governing permissions and+limitations under the License.+==============================================================================*/++#include "tensorflow/c/eager/dlpack.h"+#include "include/dlpack/dlpack.h"  // TF:dlpack+#include "tensorflow/c/eager/c_api_internal.h"+#include "tensorflow/c/tf_status_helper.h"+#include "tensorflow/core/framework/tensor.h"+#include "tensorflow/core/platform/casts.h"++#include "tensorflow/core/framework/tensor_reference.h"+#include "tensorflow/core/platform/logging.h"++namespace tensorflow {++namespace {++struct TFDLManagedTensorCtx {+  TensorReference* handle;+  DLManagedTensor tensor;+};++const Tensor* GetTensorFromHandle(TFE_TensorHandle* h, TF_Status* status) {+  if (h == nullptr || !h->handle->IsValid(&status->status)) {+    status->status = tensorflow::errors::InvalidArgument(+        "The passed in handle is a nullptr");+    return nullptr;+  }+  tensorflow::TensorHandle* handle =+      tensorflow::down_cast<tensorflow::TensorHandleInterface*>(h->handle.get())+          ->Handle();++  if (handle->IsRemote()) {+    status->status = tensorflow::errors::InvalidArgument(+        "DLPack doesn't support remote tensor");+    return nullptr;+  }+  const tensorflow::Tensor* tensor;+  status->status = handle->Tensor(&tensor);+  if (!status->status.ok()) {+    return nullptr;+  }+  return tensor;+};++void DLManagedTensorDeleter(DLManagedTensor* arg) {+  TFDLManagedTensorCtx* owner =+      static_cast<TFDLManagedTensorCtx*>(arg->manager_ctx);+  owner->handle->Unref();+  delete owner;+}++DLDataType GetDLDataType(TF_DataType data_type, TF_Status* status) {+  DLDataType dtype;+  dtype.lanes = 1;+  dtype.bits = TF_DataTypeSize(data_type) * 8;+  switch (data_type) {+    case TF_DataType::TF_HALF:+    case TF_DataType::TF_FLOAT:+    case TF_DataType::TF_DOUBLE:+      dtype.code = DLDataTypeCode::kDLFloat;+      break;+    case TF_DataType::TF_INT8:+    case TF_DataType::TF_INT16:+    case TF_DataType::TF_INT32:+    case TF_DataType::TF_INT64:+      dtype.code = DLDataTypeCode::kDLInt;+      break;+    case TF_DataType::TF_BOOL:+    case TF_DataType::TF_UINT8:+    case TF_DataType::TF_UINT16:+    case TF_DataType::TF_UINT32:+    case TF_DataType::TF_UINT64:+      dtype.code = DLDataTypeCode::kDLUInt;+      break;+    case TF_DataType::TF_BFLOAT16:+      dtype.code = DLDataTypeCode::kDLBfloat;+      break;+    default:+      status->status = tensorflow::errors::InvalidArgument(+          DataType_Name(static_cast<DataType>(data_type)),+          " is not supported by dlpack");+      break;+  }+  return dtype;+}++DLContext GetDLContext(TFE_TensorHandle* h, TF_Status* status) {+  DLContext ctx;+  const char* device_name = h->handle->DeviceName(&status->status);+  DeviceNameUtils::ParsedName parsed_name;+  tensorflow::DeviceNameUtils::ParseFullName(device_name, &parsed_name);+  std::string device_type = parsed_name.type;+  int device_id = -1;+  if (parsed_name.has_id) {+    device_id = parsed_name.id;+  }  // Question: Is it possible that it doens't have id?++  ctx.device_id = device_id;+  if (device_type == "CPU") {+    ctx.device_type = DLDeviceType::kDLCPU;+  } else if (device_type == "GPU") {+    ctx.device_type = DLDeviceType::kDLGPU;+  } else {+    status->status = tensorflow::errors::InvalidArgument(+        "Unsupported Device Type for dlpack");+  }++  return ctx;+}++DLManagedTensor* TFEHandleToTFDLManagedTensorCtx(TFE_TensorHandle* h,+                                                 TF_Status* status) {+  const Tensor* tensor = GetTensorFromHandle(h, status);+  TF_DataType data_type = static_cast<TF_DataType>(tensor->dtype());+  auto* tf_dlm_tensor_ctx = new TFDLManagedTensorCtx;++  TensorReference* tensor_ref =+      new TensorReference(*tensor);  // This will call buf_->Ref()+  tf_dlm_tensor_ctx->handle = tensor_ref;+  tf_dlm_tensor_ctx->tensor.manager_ctx = tf_dlm_tensor_ctx;+  tf_dlm_tensor_ctx->tensor.deleter = &DLManagedTensorDeleter;+  tf_dlm_tensor_ctx->tensor.dl_tensor.ctx = GetDLContext(h, status);+  int ndim = tensor->dims();+  tf_dlm_tensor_ctx->tensor.dl_tensor.ndim = ndim;+  tf_dlm_tensor_ctx->tensor.dl_tensor.data =+      TFE_TensorHandleDevicePointer(h, status);+  tf_dlm_tensor_ctx->tensor.dl_tensor.dtype = GetDLDataType(data_type, status);++  int64_t* shape_arr = new int64_t[ndim];+  for (int i = 0; i < ndim; i++) {+    shape_arr[i] = tensor->dim_size(i);+  }++  tf_dlm_tensor_ctx->tensor.dl_tensor.shape = shape_arr;++  tf_dlm_tensor_ctx->tensor.dl_tensor.strides = nullptr;+  tf_dlm_tensor_ctx->tensor.dl_tensor.byte_offset =+      0;  // TF doesn't handle the strides and byte_offsets here+  return &tf_dlm_tensor_ctx->tensor;+}++absl::optional<std::string> DeviceNameFromDlContext(const DLContext& ctx,+                                                    TF_Status* status) {+  switch (ctx.device_type) {+    case DLDeviceType::kDLCPU:+      return "CPU:0";+    case DLDeviceType::kDLGPU:+      return absl::StrCat("GPU:", ctx.device_id);+    default:+      return absl::nullopt;+  };+}+TF_DataType TfDataTypeFormDlDataType(const DLDataType& dtype,+                                     TF_Status* status) {+  TF_DataType tf_dtype;+  switch (dtype.code) {+    case DLDataTypeCode::kDLUInt:+      switch (dtype.bits) {+        case 1:+          tf_dtype = TF_DataType::TF_BOOL;+          break;+        case 8:+          tf_dtype = TF_DataType::TF_UINT8;+          break;+        case 16:+          tf_dtype = TF_DataType::TF_UINT16;+          break;+        case 32:+          tf_dtype = TF_DataType::TF_UINT32;+          break;+        case 64:+          tf_dtype = TF_DataType::TF_UINT64;+          break;+        default:+          status->status = tensorflow::errors::InvalidArgument(+              "Unsupported UInt bits: ", dtype.bits);+      }+      break;+    case DLDataTypeCode::kDLInt:+      switch (dtype.bits) {+        case 8:+          tf_dtype = TF_DataType::TF_INT8;+          break;+        case 16:+          tf_dtype = TF_DataType::TF_INT16;+          break;+        case 32:+          tf_dtype = TF_DataType::TF_INT32;+          break;+        case 64:+          tf_dtype = TF_DataType::TF_INT64;+          break;+        default:+          status->status = tensorflow::errors::InvalidArgument(+              "Unsupported Int bits: ", dtype.bits);+      }+      break;+    case DLDataTypeCode::kDLFloat:+      switch (dtype.bits) {+        case 16:+          tf_dtype = TF_DataType::TF_HALF;+          break;+        case 32:+          tf_dtype = TF_DataType::TF_FLOAT;+          break;+        case 64:+          tf_dtype = TF_DataType::TF_DOUBLE;+          break;+        default:+          status->status = tensorflow::errors::InvalidArgument(+              "Unsupported Float bits: ", dtype.bits);+      }+      break;+    case DLDataTypeCode::kDLBfloat:+      switch (dtype.bits) {+        case 16:+          tf_dtype = TF_DataType::TF_BFLOAT16;+          break;+        default:+          status->status = tensorflow::errors::InvalidArgument(+              "Unsupported BFloat bits: ", dtype.bits);+      }+      break;+    default:+      status->status = tensorflow::errors::InvalidArgument(+          "Unsupported Type Codes: ", dtype.code);+  }++  return tf_dtype;+}++void DeallocatorWrapperFunc(void* data, size_t len, void* dlmt_vptr) {+  DLManagedTensor* dlmt = static_cast<DLManagedTensor*>(dlmt_vptr);+  dlmt->deleter(const_cast<DLManagedTensor*>(dlmt));+}++}  // namespace++void TFE_CallDLManagedTensorDeleter(void* dlm_ptr) {+  DLManagedTensor* dlMTensor = static_cast<DLManagedTensor*>(dlm_ptr);+  if (dlMTensor) {+    dlMTensor->deleter(const_cast<DLManagedTensor*>(dlMTensor));+  }+}++void* TFE_HandleToDLPack(TFE_TensorHandle* h, TF_Status* status) {+  DLManagedTensor* tfdlmtensor = TFEHandleToTFDLManagedTensorCtx(h, status);+  return static_cast<void*>(tfdlmtensor);+}++TFE_TensorHandle* TFE_HandleFromDLPack(void* dlm, TF_Status* status) {+  TFE_ContextOptions* opts = TFE_NewContextOptions();+  TFE_Context* ctx = TFE_NewContext(opts, status);+  DLManagedTensor* dlmt = static_cast<DLManagedTensor*>(dlm);++  absl::optional<std::string> device_name =+      DeviceNameFromDlContext(dlmt->dl_tensor.ctx, status);+  if (!device_name.has_value()) {+    status->status =+        tensorflow::errors::InvalidArgument("Unsupported Device Type");+    return nullptr;+  }+  TF_DataType dtype = TfDataTypeFormDlDataType(dlmt->dl_tensor.dtype, status);+  int num_dims = dlmt->dl_tensor.ndim;+  const int64_t* dims = dlmt->dl_tensor.shape;+  void* data = dlmt->dl_tensor.data;++  size_t total_bytes = dlmt->dl_tensor.dtype.bits / 8;+  for (int i = 0; i < num_dims; i++) {+    total_bytes *= dims[i];+  }+  TFE_TensorHandle* handle = TFE_NewTensorHandleFromDeviceMemory(+      ctx, device_name.value().c_str(), dtype, dims, num_dims, data,+      total_bytes, &DeallocatorWrapperFunc, &dlmt, status);++  return handle;+};++}  // namespace tensorflow

nitpick: miss the end line

VoVAllen

comment created time in 7 days

starteddeepmind/haiku

started time in 7 days

startedtensorflow/cloud

started time in 7 days

push eventIBM/MAX-Breast-Cancer-Mitosis-Detector

Brendan Dwyer

commit sha 67abcaaa6067f1c4179d7f114817f135d1216ecc

Bump MAX Base (#40)

view details

push time in 7 days

starteddwmkerr/hacker-laws

started time in 10 days

startedlutzroeder/netron

started time in 10 days

startedbaidu-research/DeepBench

started time in 10 days

starteddeepmind/sonnet

started time in 14 days

CommitCommentEvent

startedmicrosoft/cpprestsdk

started time in 17 days

startedFloopCZ/tensorflow_cc

started time in 17 days

push eventfeihugis/tensorflow

Ayush Dubey

commit sha a88cebf43cd860289a824007f5a0780f810fbdd9

Make NcclManager's error message more descriptive. This should make it easier to tracking down NCCL/CUDA errors in NcclManager. PiperOrigin-RevId: 291253749 Change-Id: I9d349509ee25cc4de5d40a657918e32199bc496b

view details

TensorFlower Gardener

commit sha 9652ec9ec328463550c1b579bd90b9aeafca4143

Merge pull request #36165 from angerson:master PiperOrigin-RevId: 291254453 Change-Id: I0de717de3c9db98cc21d4bc89985bef6a001d9c5

view details

Pete Warden

commit sha adf6e22e4af83afd55e0da3caa7e7959def1e6b6

Enable visualization script to work from pip install Current the visualization script for TensorFlow Lite files only works if you build through Bazel. To make it more accessible, this change uses Flatbuffer's new Python interface to extract the information from the file, rather than calling out to an external tool. It also adds some tests. A lot of the changes here are related to upgrading to the latest version of Flatbuffers, which has an impact on other parts of the code. PiperOrigin-RevId: 291255530 Change-Id: I28a64a182e6197a58926d02c41988e4791c60fce

view details

Shanqing Cai

commit sha 72144ee6f671d768236f41181e320110bafe978a

[tfdbg2] Temporarily disable source_utils_test on Windows PiperOrigin-RevId: 291256022 Change-Id: Ieea82957440924a0d3d827c2788e7f53f497fd24

view details

George Karpenkov

commit sha 4721fbaf2b2e99fe74292d61bfa7b0ba90e2b090

[XLA] Disable tests which do not run in OSS PiperOrigin-RevId: 291256337 Change-Id: Iba9d4a74885b6ca675f5ac3f5aeb3e5b465c72f3

view details

Skye Wanderman-Milne

commit sha aa50d2b624c7e8d56b4b1644c4ccf489d8e8c55c

[XLA:Python] Changes to appease our OSS compiler. In the compiler's defense, structured bindings are a C++17 feature, and TF is supposedly built with C++14 (not sure how these got through before). PiperOrigin-RevId: 291256419 Change-Id: I283fac8b09aaffb4766d6a368b063ca09f3dbcff

view details

Sachin Joglekar

commit sha d855adfc5a0195788bf5f92c3c7352e638aa1109

Adds support for half_pixel_centers in TFLite's resize op. PiperOrigin-RevId: 291256449 Change-Id: Ied0a676cc9afc04ee8a9557f8e3934d82d528c2a

view details

George Karpenkov

commit sha fbf0e3dd90ba4fee4bb768df31b2b2a72b147cc4

Fix OSS build Adds common_target_td_sources, which was previously not defined in the generated BUILD file. PiperOrigin-RevId: 291257367 Change-Id: I50c63f6fb64667d6df9220a035e9f6c49cc799fc

view details

George Karpenkov

commit sha 306dee4096e97520b1218d78c23b1a72941a9508

Enable OSS testing for XLA. PiperOrigin-RevId: 291258425 Change-Id: If8423622b2f44b8e46ffbcd9880735a354a719d3

view details

A. Unique TensorFlower

commit sha 4b17c1073961a70220b1d7f348bfb49ed2c5f6ce

Support Resize operation with two NearestNeighbor and Bilinear params. PiperOrigin-RevId: 291261285 Change-Id: Ia4e20f6766182af9e451a75be23d612d2807a8ab

view details

Henry Tan

commit sha 2c23f12b5a75e3471d491f88b4f813ba67d08273

Updating tpu_client.py to handle backward new parameter num_partitions PiperOrigin-RevId: 291268005 Change-Id: I9367584730e32cc5744df526e36ecd0cf216717b

view details

Raman Sarokin

commit sha caa9ae7c75e4597b8939e79234a344b89bfed042

Renamed z_coord to s_coord in LinkingContext. Z will be used for Depth coord. PiperOrigin-RevId: 291269030 Change-Id: I7c8de757c371da0f280115fa7337d3a96d99042c

view details

Rick Chao

commit sha e07baace55bb6f8439585d30ae3b0448f877b40f

Skip distribute:keras_save_load_test_xla_gpu test while investigating. PiperOrigin-RevId: 291275353 Change-Id: I53c450b1bb686f4ffc0d397e42c549667329544e

view details

A. Unique TensorFlower

commit sha 7cdc2fe8ff18fb3b19e13c52b327238f3ca59bc1

Tune a constant that shows some minor improvements on the sparse op benchmarks. PiperOrigin-RevId: 291275364 Change-Id: Ibd6eea7c1f49483566e102eecc3c9297eda060d2

view details

A. Unique TensorFlower

commit sha d27747e2e4f82d3f9a66e971bedac0732de8b2b9

Create ParallelExecute Op in TF device dialect. PiperOrigin-RevId: 291275744 Change-Id: I9778e24d27a16d66bd8fe27d406f078c3bde51d4

view details

A. Unique TensorFlower

commit sha 54f1004cb638476e5d2268a216a37af01b418b9a

Tune a constant that results in a 20-50% improvement on some benchmarks, modest improvement on others and perf neutral for the rest. PiperOrigin-RevId: 291275748 Change-Id: Ib98bcac0f36bbad91a8403af2b4ed7158d0347c1

view details

Pooya Davoodi

commit sha b1cd057295b081053428e791e99f4d259a0f9f95

Make TrtConversionParams immutable

view details

Scott Zhu

commit sha 147a847bb3ca178cb0b25574958637d1b62e7f53

PR #33441: Fix the memory leak described in Issue #33178 Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/33441 Documented in [THIS](https://github.com/tensorflow/tensorflow/issues/33178) thread. Based on the documentation of get() and generic_utils.object_list_uid, this has no functional effect, except to remove an unnecessary map that was growing with every input. Tested using the example program in [THIS](https://github.com/tensorflow/tensorflow/issues/331... PiperOrigin-RevId: 291277510 Change-Id: I97df3c26850ae460d41e5032bb71edd11c948670

view details

A. Unique TensorFlower

commit sha da479d95301c73fb4c6dccd68fb77cf1546fba64

1. Add OpKernel::Trace to return the trace string for profiler. and use it in Eager and non-Eager executors. 2. Trace input tensors' shape for profiler cost analysis. 3. Polymorphism is used to override for special ops whose cost analysis depends others attributes besides the input shapes. 4. verbose is used to control trace string verbosity, ideally we can use some host trace level to decide if these extra information should be generated. PiperOrigin-RevId: 291278431 Change-Id: I0316a8376519461f9bf0346726cbca812af75da7

view details

A. Unique TensorFlower

commit sha ea1a100b0bd271f873825510ae82dac02312b14b

Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 291283852 Change-Id: Ic5b3f2d8b4854e75f11a0c1e99be444f71f12f24

view details

push time in 21 days

push eventfeihugis/tensorflow

Fei Hu

commit sha 42548f415e23e4e4d67056f84a102b4f42662026

switch BucketBySequenceLengthTest to use combinations

view details

Fei Hu

commit sha 11bfc2e5fef61f6232e69071eb1839b17b121dda

Switch CopyToDeviceTest to use TF combination

view details

Fei Hu

commit sha 4bc07356527ef23fb1706f16f0526e366927285a

Switch CounterTest to use TF combinations

view details

Fei Hu

commit sha 876279a93f23c9a550fab1ea38aee6b788b96afa

Switch CsvDatasetTest to use TF combinations

view details

Fei Hu

commit sha 1067f41641d111aaea8b915d9fb95cce5a586716

Switch DenseToSparseBatchTest to use TF combinations

view details

Fei Hu

commit sha c8fddc26b63817a79ae276f828d9f7ec82c5f1b2

Switch DirectedInterleaveDatasetTest to use TF combinations

view details

Fei Hu

commit sha 8ba0c3ff55be23e233cc5f5abee0e0e41304a65e

Switch GetSingleElementTest to use TF combinations

view details

Fei Hu

commit sha f898b693721c12702af56a9a7f0212449f13fd2b

Switch GroupByReducerTest to use TF combinations

view details

Fei Hu

commit sha d26b04c4172d30d2c2cc40512b8514540833a16e

Switch GroupByWindowTest to use TF combinations

view details

Fei Hu

commit sha ffa973850cc6dea4a469d1db41970d4915a6e183

Switch IgnoreErrorsTest to use TF combinations

view details

Fei Hu

commit sha f06213b2423a6655f5c5ed00f03401a073dc6b7e

Switch MakeBatchedFeaturesDatasetTest to use TF combinations

view details

Fei Hu

commit sha 86ff3a156666418d1f26dc2bdef3bbe3bd0758d2

Switch MakeCsvDatasetTest to use TF combinations

view details

Fei Hu

commit sha 28418a07dd9e9df418105f82923e9712ba916588

Switch MakeTFRecordDatasetTest to use TF combinations

view details

Fei Hu

commit sha 428fa7ede60c24125817da99642cf9628932445e

Switch MapDefunTest to use TF combinations

view details

Fei Hu

commit sha 2434bfd3a2042e38410eb53e76db7f27477b649b

Switch OverrideThreadpoolTest to use TF combinations

view details

Fei Hu

commit sha 6ff609aa1f98e35c282a05471a95c15fa97a0918

Switch ParallelInterleaveTest to use TF combinations

view details

Fei Hu

commit sha 28248c54fe6813675954643bb3ceecd16b1be71a

Switch ParseExampleDatasetTest to use TF combinations

view details

Fei Hu

commit sha bfb314227800bf6a4e92639de14d8e6ec996ec6a

Switch PrefetchToDeviceTest to use TF combinations

view details

Fei Hu

commit sha efd4441af886b5700d96b880b6cb65fe246ef5a7

Switch PrefetchWithSlackTest to use TF combinations

view details

Fei Hu

commit sha 8faa464bdf3200feff2f262edc3fa242995735d2

Switch RebatchDatasetTest to use TF combinations

view details

push time in 21 days

push eventfeihugis/tensorflow

TensorFlower Gardener

commit sha b840bf5d3329b7f7eb2f52ae485a32230e10ba99

Merge pull request #36018 from ROCmSoftwarePlatform:google_upstream_rocm_update_200118_2 PiperOrigin-RevId: 291244967 Change-Id: Iec082e8b901a055ec2aff71cddc5cf131205d308

view details

Juhyun Lee

commit sha 29c1d308904f41aa92125d3bcf04b512599bd166

Make some //tf/compiler/mlir/tensorflow/utils modules build with --config android_arm64. PiperOrigin-RevId: 291246582 Change-Id: Idf13e25462a9722ea70bcc48a9c7b9091bcba8a5

view details

Raman Sarokin

commit sha 4d16c3d443cd49925abf16e170762f1d8b4fad34

Added determination of floating rounding modes. Fixed typo in Conv3D. PiperOrigin-RevId: 291247063 Change-Id: I573ac31f79fdc158366e5a5aaeb03adc6527af2f

view details

A. Unique TensorFlower

commit sha ee8d4da3218da19101045c367bf3725f9b0c6c83

Fix mul_test compilation + revert unwanted changes in test run_unit_tests.sh PiperOrigin-RevId: 291247838 Change-Id: Ibf413fd64e0e4e22e3df1feceec3cc9ea284735c

view details

Derek Murray

commit sha 1c1a90e2ae5cee23d1b2e3978676a8a4be7e8f57

[tstring] Avoid temporary std::string creation in DecodeRawOp. PiperOrigin-RevId: 291248157 Change-Id: I26ce5a69ac5f26da702627c9e52eec62b681a7e1

view details

Andy Ly

commit sha e4d255252321300750e6962cab47d175956baf69

Remove canonicalizer and extra breakup-islands passes at the end of the TPU bridge. As the end of the TPU bridge passes is preparing the IR for exporting back to Graph, having the canonicalizer run again is not be necessary as we want to preserve tf_executor.graph with single op tf_executor.islands. PiperOrigin-RevId: 291248163 Change-Id: I8ac71d190d94c1515a1c76c43b6d11020bc1b2c2

view details

Yifei Feng

commit sha 77bed6590d239c3eee2f25e6f483094cf282ecf6

Only update default docker images for official TF release versions. PiperOrigin-RevId: 291249321 Change-Id: I7676cde9b79c22702b21c9d7bfd8ca8f64f721a7

view details

Abin Shahab

commit sha 06094928234856a89bf4b607e2fbc9eca9b2d35f

JSON Serializable checks for array and structs Checks for np.ndarray and np.generic for logs.item to fix JSON Serializable issues in tenorflow.keral.RemoteMonitor class #32192

view details

Yunlu Li

commit sha 82a7f1a471c74b83c9b172ce08e0b029b8fe5e36

Make Densify op working. PiperOrigin-RevId: 291249962 Change-Id: Ib1cbb27e7069d007c115a0be20d43e96512d736f

view details

Ayush Dubey

commit sha a88cebf43cd860289a824007f5a0780f810fbdd9

Make NcclManager's error message more descriptive. This should make it easier to tracking down NCCL/CUDA errors in NcclManager. PiperOrigin-RevId: 291253749 Change-Id: I9d349509ee25cc4de5d40a657918e32199bc496b

view details

TensorFlower Gardener

commit sha 9652ec9ec328463550c1b579bd90b9aeafca4143

Merge pull request #36165 from angerson:master PiperOrigin-RevId: 291254453 Change-Id: I0de717de3c9db98cc21d4bc89985bef6a001d9c5

view details

Pete Warden

commit sha adf6e22e4af83afd55e0da3caa7e7959def1e6b6

Enable visualization script to work from pip install Current the visualization script for TensorFlow Lite files only works if you build through Bazel. To make it more accessible, this change uses Flatbuffer's new Python interface to extract the information from the file, rather than calling out to an external tool. It also adds some tests. A lot of the changes here are related to upgrading to the latest version of Flatbuffers, which has an impact on other parts of the code. PiperOrigin-RevId: 291255530 Change-Id: I28a64a182e6197a58926d02c41988e4791c60fce

view details

Shanqing Cai

commit sha 72144ee6f671d768236f41181e320110bafe978a

[tfdbg2] Temporarily disable source_utils_test on Windows PiperOrigin-RevId: 291256022 Change-Id: Ieea82957440924a0d3d827c2788e7f53f497fd24

view details

George Karpenkov

commit sha 4721fbaf2b2e99fe74292d61bfa7b0ba90e2b090

[XLA] Disable tests which do not run in OSS PiperOrigin-RevId: 291256337 Change-Id: Iba9d4a74885b6ca675f5ac3f5aeb3e5b465c72f3

view details

Skye Wanderman-Milne

commit sha aa50d2b624c7e8d56b4b1644c4ccf489d8e8c55c

[XLA:Python] Changes to appease our OSS compiler. In the compiler's defense, structured bindings are a C++17 feature, and TF is supposedly built with C++14 (not sure how these got through before). PiperOrigin-RevId: 291256419 Change-Id: I283fac8b09aaffb4766d6a368b063ca09f3dbcff

view details

Sachin Joglekar

commit sha d855adfc5a0195788bf5f92c3c7352e638aa1109

Adds support for half_pixel_centers in TFLite's resize op. PiperOrigin-RevId: 291256449 Change-Id: Ied0a676cc9afc04ee8a9557f8e3934d82d528c2a

view details

George Karpenkov

commit sha fbf0e3dd90ba4fee4bb768df31b2b2a72b147cc4

Fix OSS build Adds common_target_td_sources, which was previously not defined in the generated BUILD file. PiperOrigin-RevId: 291257367 Change-Id: I50c63f6fb64667d6df9220a035e9f6c49cc799fc

view details

George Karpenkov

commit sha 306dee4096e97520b1218d78c23b1a72941a9508

Enable OSS testing for XLA. PiperOrigin-RevId: 291258425 Change-Id: If8423622b2f44b8e46ffbcd9880735a354a719d3

view details

A. Unique TensorFlower

commit sha 4b17c1073961a70220b1d7f348bfb49ed2c5f6ce

Support Resize operation with two NearestNeighbor and Bilinear params. PiperOrigin-RevId: 291261285 Change-Id: Ia4e20f6766182af9e451a75be23d612d2807a8ab

view details

Henry Tan

commit sha 2c23f12b5a75e3471d491f88b4f813ba67d08273

Updating tpu_client.py to handle backward new parameter num_partitions PiperOrigin-RevId: 291268005 Change-Id: I9367584730e32cc5744df526e36ecd0cf216717b

view details

push time in 21 days

startedtensorflow/workshops

started time in 25 days

push eventIBM/MAX-Nucleus-Segmenter

dependabot[bot]

commit sha d89595d6ee79a8f8075829e3a65aa78a81d56ea4

Bump tensorflow from 1.12.2 to 1.15.2 Bumps [tensorflow](https://github.com/tensorflow/tensorflow) from 1.12.2 to 1.15.2. - [Release notes](https://github.com/tensorflow/tensorflow/releases) - [Changelog](https://github.com/tensorflow/tensorflow/blob/master/RELEASE.md) - [Commits](https://github.com/tensorflow/tensorflow/compare/v1.12.2...v1.15.2) Signed-off-by: dependabot[bot] <support@github.com>

view details

Fei Hu

commit sha cb82d50a805cc698e698974c8b4465d7892cbe33

Bump `keras-applications` from 1.0.6 to 1.0.8 `keras-applications` v1.0.6 has a compatibility issue with tf 1.15.2 when importing resnet.

view details

push time in a month

PR merged IBM/MAX-Nucleus-Segmenter

Bump tensorflow from 1.12.2 to 1.15.2 dependencies

Bumps tensorflow from 1.12.2 to 1.15.2. <details> <summary>Release notes</summary>

Sourced from tensorflow's releases.

TensorFlow 1.15.2

Release 1.15.2

Bug Fixes and Other Changes

TensorFlow 1.15.0

Release 1.15.0

This is the last 1.x release for TensorFlow. We do not expect to update the 1.x branch with features, although we will issue patch releases to fix vulnerabilities for at least one year.

Major Features and Improvements

  • As announced, tensorflow pip package will by default include GPU support (same as tensorflow-gpu now) for the platforms we currently have GPU support (Linux and Windows). It will work on machines with and without Nvidia GPUs. tensorflow-gpu will still be available, and CPU-only packages can be downloaded at tensorflow-cpu for users who are concerned about package size.
  • TensorFlow 1.15 contains a complete implementation of the 2.0 API in its compat.v2 module. It contains a copy of the 1.15 main module (without contrib) in the compat.v1 module. TensorFlow 1.15 is able to emulate 2.0 behavior using the enable_v2_behavior() function. This enables writing forward compatible code: by explicitly importing either tensorflow.compat.v1 or tensorflow.compat.v2, you can ensure that your code works without modifications against an installation of 1.15 or 2.0.
  • EagerTensor now supports numpy buffer interface for tensors.
  • Add toggles tf.enable_control_flow_v2() and tf.disable_control_flow_v2() for enabling/disabling v2 control flow.
  • Enable v2 control flow as part of tf.enable_v2_behavior() and TF2_BEHAVIOR=1.
  • AutoGraph translates Python control flow into TensorFlow expressions, allowing users to write regular Python inside tf.function-decorated functions. AutoGraph is also applied in functions used with tf.data, tf.distribute and tf.keras APIS.
  • Adds enable_tensor_equality(), which switches the behavior such that:
    • Tensors are no longer hashable.
    • Tensors can be compared with == and !=, yielding a Boolean Tensor with element-wise comparison results. This will be the default behavior in 2.0.
  • Auto Mixed-Precision graph optimizer simplifies converting models to float16 for acceleration on Volta and Turing Tensor Cores. This feature can be enabled by wrapping an optimizer class with tf.train.experimental.enable_mixed_precision_graph_rewrite().
  • Add environment variable TF_CUDNN_DETERMINISTIC. Setting to "true" or "1" forces the selection of deterministic cuDNN convolution and max-pooling algorithms. When this is enabled, the algorithm selection procedure itself is also deterministic.
  • TensorRT
    • Migrate TensorRT conversion sources from contrib to compiler directory in preparation for TF 2.0.
    • Add additional, user friendly TrtGraphConverter API for TensorRT conversion.
    • Expand support for TensorFlow operators in TensorRT conversion (e.g. Gather, Slice, Pack, Unpack, ArgMin, ArgMax,DepthSpaceShuffle).
    • Support TensorFlow operator CombinedNonMaxSuppression in TensorRT conversion which significantly accelerates object detection models.

Breaking Changes

  • Tensorflow code now produces 2 different pip packages: tensorflow_core containing all the code (in the future it will contain only the private implementation) and tensorflow which is a virtual pip package doing forwarding to tensorflow_core (and in the future will contain only the public API of tensorflow). We don't expect this to be breaking, unless you were importing directly from the implementation.
  • TensorFlow 1.15 is built using devtoolset7 (GCC7) on Ubuntu 16. This may lead to ABI incompatibilities with extensions built against earlier versions of TensorFlow.
  • Deprecated the use of constraint= and .constraint with ResourceVariable.
  • tf.keras:
    • OMP_NUM_THREADS is no longer used by the default Keras config. To configure the number of threads, use tf.config.threading APIs.
    • tf.keras.model.save_model and model.save now defaults to saving a TensorFlow SavedModel.
    • keras.backend.resize_images (and consequently, keras.layers.Upsampling2D) behavior has changed, a bug in the resizing implementation was fixed.
    • Layers now default to float32, and automatically cast their inputs to the layer's dtype. If you had a model that used float64, it will probably silently use float32 in TensorFlow2, and a warning will be issued that starts with Layer "layer-name" is casting an input tensor from dtype float64 to the layer's dtype of float32. To fix, either set the default dtype to float64 with tf.keras.backend.set_floatx('float64'), or pass dtype='float64' to each of the Layer constructors. See tf.keras.layers.Layer for more information.
    • Some tf.assert_* methods now raise assertions at operation creation time (i.e. when this Python line executes) if the input tensors' values are known at that time, not during the session.run(). When this happens, a noop is returned and the input tensors are marked non-feedable. In other words, if they are used as keys in feed_dict argument to session.run(), an error will be raised. Also, because some assert ops don't make it into the graph, the graph structure changes. A different graph can result in different per-op random seeds when they are not given explicitly (most often).

Bug Fixes and Other Changes

  • tf.estimator:
    • tf.keras.estimator.model_to_estimator now supports exporting to tf.train.Checkpoint format, which allows the saved checkpoints to be compatible with model.load_weights.
    • Fix tests in canned estimators.
    • Expose Head as public API.
    • Fixes critical bugs that help with DenseFeatures usability in TF2 </tr></table> ... (truncated) </details> <details> <summary>Changelog</summary>

Sourced from tensorflow's changelog.

Release 1.15.2

Bug Fixes and Other Changes

Release 2.1.0

TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support officially ends an January 1, 2020. As announced earlier, TensorFlow will also stop supporting Python 2 starting January 1, 2020, and no more releases are expected in 2019.

Major Features and Improvements

  • The tensorflow pip package now includes GPU support by default (same as tensorflow-gpu) for both Linux and Windows. This runs on machines with and without NVIDIA GPUs. tensorflow-gpu is still available, and CPU-only packages can be downloaded at tensorflow-cpu for users who are concerned about package size.
  • Windows users: Officially-released tensorflow Pip packages are now built with Visual Studio 2019 version 16.4 in order to take advantage of the new /d2ReducedOptimizeHugeFunctions compiler flag. To use these new packages, you must install "Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019", available from Microsoft's website here.
    • This does not change the minimum required version for building TensorFlow from source on Windows, but builds enabling EIGEN_STRONG_INLINE can take over 48 hours to compile without this flag. Refer to configure.py for more information about EIGEN_STRONG_INLINE and /d2ReducedOptimizeHugeFunctions.
    • If either of the required DLLs, msvcp140.dll (old) or msvcp140_1.dll (new), are missing on your machine, import tensorflow will print a warning message.
  • The tensorflow pip package is built with CUDA 10.1 and cuDNN 7.6.
  • tf.keras
    • Experimental support for mixed precision is available on GPUs and Cloud TPUs. See usage guide.
    • Introduced the TextVectorization layer, which takes as input raw strings and takes care of text standardization, tokenization, n-gram generation, and vocabulary indexing. See this end-to-end text classification example.
    • Keras .compile .fit .evaluate and .predict are allowed to be outside of the DistributionStrategy scope, as long as the model was constructed inside of a scope.
    • Experimental support for Keras .compile, .fit, .evaluate, and .predict is available for Cloud TPUs, Cloud TPU, for all types of Keras models (sequential, functional and subclassing models).
    • Automatic outside compilation is now enabled for Cloud TPUs. This allows tf.summary to be used more conveniently with Cloud TPUs.
    • Dynamic batch sizes with DistributionStrategy and Keras are supported on Cloud TPUs.
    • Support for .fit, .evaluate, .predict on TPU using numpy data, in addition to tf.data.Dataset.
    • Keras reference implementations for many popular models are available in the TensorFlow Model Garden.
  • tf.data
    • Changes rebatching for tf.data datasets + DistributionStrategy for better performance. Note that the dataset also behaves slightly differently, in that the rebatched dataset cardinality will always be a multiple of the number of replicas.
    • tf.data.Dataset now supports automatic data distribution and sharding in distributed environments, including on TPU pods.
    • Distribution policies for tf.data.Dataset can now be tuned with 1. tf.data.experimental.AutoShardPolicy(OFF, AUTO, FILE, DATA) 2. tf.data.experimental.ExternalStatePolicy(WARN, IGNORE, FAIL)
  • tf.debugging
    • Add tf.debugging.enable_check_numerics() and tf.debugging.disable_check_numerics() to help debugging the root causes of issues involving infinities and NaNs.
  • tf.distribute
    • Custom training loop support on TPUs and TPU pods is avaiable through strategy.experimental_distribute_dataset, strategy.experimental_distribute_datasets_from_function, strategy.experimental_run_v2, strategy.reduce.
    • Support for a global distribution strategy through tf.distribute.experimental_set_strategy(), in addition to strategy.scope().
  • TensorRT
    • TensorRT 6.0 is now supported and enabled by default. This adds support for more TensorFlow ops including Conv3D, Conv3DBackpropInputV2, AvgPool3D, MaxPool3D, ResizeBilinear, and ResizeNearestNeighbor. In addition, the TensorFlow-TensorRT python conversion API is exported as tf.experimental.tensorrt.Converter.
  • Environment variable TF_DETERMINISTIC_OPS has been added. When set to "true" or "1", this environment variable makes tf.nn.bias_add operate deterministically (i.e. reproducibly), but currently only when XLA JIT compilation is not enabled. Setting TF_DETERMINISTIC_OPS to "true" or "1" also makes cuDNN convolution and max-pooling operate deterministically. This makes Keras Conv*D and MaxPool*D layers operate deterministically in both the forward and backward directions when running on a CUDA-enabled GPU.

Breaking Changes

  • Deletes Operation.traceback_with_start_lines for which we know of no usages.
  • Removed id from tf.Tensor.__repr__() as id is not useful other than internal debugging.
  • Some tf.assert_* methods now raise assertions at operation creation time if the input tensors' values are known at that time, not during the session.run(). This only changes behavior when the graph execution would have resulted in an error. When this happens, a noop is returned and the input tensors are marked non-feedable. In other words, if they are used as keys in feed_dict argument to session.run(), an error will be raised. Also, because some assert ops don't make it into the graph, the graph structure changes. A different graph can result in different per-op random seeds when they are not given explicitly (most often).
  • The following APIs are not longer experimental: tf.config.list_logical_devices, tf.config.list_physical_devices, tf.config.get_visible_devices, tf.config.set_visible_devices, tf.config.get_logical_device_configuration, tf.config.set_logical_device_configuration.
  • tf.config.experimentalVirtualDeviceConfiguration has been renamed to tf.config.LogicalDeviceConfiguration.
  • tf.config.experimental_list_devices has been removed, please use tf.config.list_logical_devices.

Bug Fixes and Other Changes

</tr></table> ... (truncated) </details> <details> <summary>Commits</summary>

  • 5d80e1e Merge pull request #36215 from tensorflow-jenkins/version-numbers-1.15.2-8214
  • 71e9d8f Update version numbers to 1.15.2
  • e50120e Merge pull request #36214 from tensorflow-jenkins/relnotes-1.15.2-2203
  • 1a7e9fb Releasing 1.15.2 instead of 1.15.1
  • 85f7aab Insert release notes place-fill
  • e75a6d6 Merge pull request #36190 from tensorflow/mm-r1.15-fix-v2-build
  • a6d8973 Use config=v1 as this is r1.15 branch.
  • fdb8589 Merge pull request #35912 from tensorflow-jenkins/relnotes-1.15.1-31298
  • a6051e8 Add CVE number for main patch
  • 360b2e3 Merge pull request #34532 from ROCmSoftwarePlatform/r1.15-rccl-upstream-patch
  • Additional commits viewable in compare view </details> <br />

Dependabot compatibility score

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting @dependabot rebase.


<details> <summary>Dependabot commands and options</summary> <br />

You can trigger Dependabot actions by commenting on this PR:

  • @dependabot rebase will rebase this PR
  • @dependabot recreate will recreate this PR, overwriting any edits that have been made to it
  • @dependabot merge will merge this PR after your CI passes on it
  • @dependabot squash and merge will squash and merge this PR after your CI passes on it
  • @dependabot cancel merge will cancel a previously requested merge and block automerging
  • @dependabot reopen will reopen this PR if it is closed
  • @dependabot ignore this [patch|minor|major] version will close this PR and stop Dependabot creating any more for this minor/major version (unless you reopen the PR or upgrade to it yourself)
  • @dependabot ignore this dependency will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
  • @dependabot use these labels will set the current labels as the default for future PRs for this repo and language
  • @dependabot use these reviewers will set the current reviewers as the default for future PRs for this repo and language
  • @dependabot use these assignees will set the current assignees as the default for future PRs for this repo and language
  • @dependabot use this milestone will set the current milestone as the default for future PRs for this repo and language

You can disable automated security fix PRs for this repo from the Security Alerts page.

</details>

+2 -2

0 comment

1 changed file

dependabot[bot]

pr closed time in a month

push eventIBM/MAX-Nucleus-Segmenter

Fei Hu

commit sha 47741809cda28df8b0a4173e3151e551d8001272

Bump `keras-applications` from 1.0.6 to 1.0.8 `keras-applications` v1.0.6 has a compatibility issue with tf 1.15.2 when importing resnet.

view details

push time in a month

Pull request review commentIBM/MAX-Nucleus-Segmenter

Bump tensorflow from 1.12.2 to 1.15.2

 Pillow==5.1.0 cython==0.28.2 matplotlib==2.2.2 scikit-image==0.15.0-tensorflow==1.12.2+tensorflow==1.15.2 keras==2.1.6 keras-applications==1.0.6
keras-applications==1.0.8
dependabot[bot]

comment created time in a month

push eventIBM/MAX-Nucleus-Segmenter

dependabot[bot]

commit sha ec224f15437f178bcbb229d71e9506b5f944ac95

Bump pillow from 5.1.0 to 6.2.0 Bumps [pillow](https://github.com/python-pillow/Pillow) from 5.1.0 to 6.2.0. - [Release notes](https://github.com/python-pillow/Pillow/releases) - [Changelog](https://github.com/python-pillow/Pillow/blob/master/CHANGES.rst) - [Commits](https://github.com/python-pillow/Pillow/compare/5.1.0...6.2.0) Signed-off-by: dependabot[bot] <support@github.com>

view details

push time in a month

PR merged IBM/MAX-Nucleus-Segmenter

Bump pillow from 5.1.0 to 6.2.0 dependencies

Bumps pillow from 5.1.0 to 6.2.0. <details> <summary>Release notes</summary>

Sourced from pillow's releases.

6.2.0

https://pillow.readthedocs.io/en/stable/releasenotes/6.2.0.html

6.1.0

https://pillow.readthedocs.io/en/stable/releasenotes/6.1.0.html

6.0.0

No release notes provided.

5.4.1

No release notes provided.

5.4.0

No release notes provided.

5.3.0

No release notes provided.

5.2.0

No release notes provided. </details> <details> <summary>Changelog</summary>

Sourced from pillow's changelog.

6.2.0 (2019-10-01)

  • Catch buffer overruns #4104 [radarhere]

  • Initialize rows_per_strip when RowsPerStrip tag is missing #4034 [cgohlke, radarhere]

  • Raise error if TIFF dimension is a string #4103 [radarhere]

  • Added decompression bomb checks #4102 [radarhere]

  • Fix ImageGrab.grab DPI scaling on Windows 10 version 1607+ #4000 [nulano, radarhere]

  • Corrected negative seeks #4101 [radarhere]

  • Added argument to capture all screens on Windows #3950 [nulano, radarhere]

  • Updated warning to specify when Image.frombuffer defaults will change #4086 [radarhere]

  • Changed WindowsViewer format to PNG #4080 [radarhere]

  • Use TIFF orientation #4063 [radarhere]

  • Raise the same error if a truncated image is loaded a second time #3965 [radarhere]

  • Lazily use ImageFileDirectory_v1 values from Exif #4031 [radarhere]

  • Improved HSV conversion #4004 [radarhere]

  • Added text stroking #3978 [radarhere, hugovk]

  • No more deprecated bdist_wininst .exe installers #4029 [hugovk]

  • Do not allow floodfill to extend into negative coordinates #4017 [radarhere] </tr></table> ... (truncated) </details> <details> <summary>Commits</summary>

Dependabot compatibility score

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting @dependabot rebase.


<details> <summary>Dependabot commands and options</summary> <br />

You can trigger Dependabot actions by commenting on this PR:

  • @dependabot rebase will rebase this PR
  • @dependabot recreate will recreate this PR, overwriting any edits that have been made to it
  • @dependabot merge will merge this PR after your CI passes on it
  • @dependabot squash and merge will squash and merge this PR after your CI passes on it
  • @dependabot cancel merge will cancel a previously requested merge and block automerging
  • @dependabot reopen will reopen this PR if it is closed
  • @dependabot ignore this [patch|minor|major] version will close this PR and stop Dependabot creating any more for this minor/major version (unless you reopen the PR or upgrade to it yourself)
  • @dependabot ignore this dependency will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
  • @dependabot use these labels will set the current labels as the default for future PRs for this repo and language
  • @dependabot use these reviewers will set the current reviewers as the default for future PRs for this repo and language
  • @dependabot use these assignees will set the current assignees as the default for future PRs for this repo and language
  • @dependabot use this milestone will set the current milestone as the default for future PRs for this repo and language

You can disable automated security fix PRs for this repo from the Security Alerts page.

</details>

+1 -1

0 comment

1 changed file

dependabot[bot]

pr closed time in a month

push eventIBM/MAX-Human-Pose-Estimator

dependabot[bot]

commit sha 5656376aaaacdf0b8c829f3d7e94933ef19c7631

Bump tensorflow from 1.12.2 to 1.15.2 (#35) Bumps [tensorflow](https://github.com/tensorflow/tensorflow) from 1.12.2 to 1.15.2. - [Release notes](https://github.com/tensorflow/tensorflow/releases) - [Changelog](https://github.com/tensorflow/tensorflow/blob/master/RELEASE.md) - [Commits](https://github.com/tensorflow/tensorflow/compare/v1.12.2...v1.15.2) Signed-off-by: dependabot[bot] <support@github.com>

view details

push time in a month

PR merged IBM/MAX-Human-Pose-Estimator

Bump tensorflow from 1.12.2 to 1.15.2 dependencies

Bumps tensorflow from 1.12.2 to 1.15.2. <details> <summary>Release notes</summary>

Sourced from tensorflow's releases.

TensorFlow 1.15.2

Release 1.15.2

Bug Fixes and Other Changes

TensorFlow 1.15.0

Release 1.15.0

This is the last 1.x release for TensorFlow. We do not expect to update the 1.x branch with features, although we will issue patch releases to fix vulnerabilities for at least one year.

Major Features and Improvements

  • As announced, tensorflow pip package will by default include GPU support (same as tensorflow-gpu now) for the platforms we currently have GPU support (Linux and Windows). It will work on machines with and without Nvidia GPUs. tensorflow-gpu will still be available, and CPU-only packages can be downloaded at tensorflow-cpu for users who are concerned about package size.
  • TensorFlow 1.15 contains a complete implementation of the 2.0 API in its compat.v2 module. It contains a copy of the 1.15 main module (without contrib) in the compat.v1 module. TensorFlow 1.15 is able to emulate 2.0 behavior using the enable_v2_behavior() function. This enables writing forward compatible code: by explicitly importing either tensorflow.compat.v1 or tensorflow.compat.v2, you can ensure that your code works without modifications against an installation of 1.15 or 2.0.
  • EagerTensor now supports numpy buffer interface for tensors.
  • Add toggles tf.enable_control_flow_v2() and tf.disable_control_flow_v2() for enabling/disabling v2 control flow.
  • Enable v2 control flow as part of tf.enable_v2_behavior() and TF2_BEHAVIOR=1.
  • AutoGraph translates Python control flow into TensorFlow expressions, allowing users to write regular Python inside tf.function-decorated functions. AutoGraph is also applied in functions used with tf.data, tf.distribute and tf.keras APIS.
  • Adds enable_tensor_equality(), which switches the behavior such that:
    • Tensors are no longer hashable.
    • Tensors can be compared with == and !=, yielding a Boolean Tensor with element-wise comparison results. This will be the default behavior in 2.0.
  • Auto Mixed-Precision graph optimizer simplifies converting models to float16 for acceleration on Volta and Turing Tensor Cores. This feature can be enabled by wrapping an optimizer class with tf.train.experimental.enable_mixed_precision_graph_rewrite().
  • Add environment variable TF_CUDNN_DETERMINISTIC. Setting to "true" or "1" forces the selection of deterministic cuDNN convolution and max-pooling algorithms. When this is enabled, the algorithm selection procedure itself is also deterministic.
  • TensorRT
    • Migrate TensorRT conversion sources from contrib to compiler directory in preparation for TF 2.0.
    • Add additional, user friendly TrtGraphConverter API for TensorRT conversion.
    • Expand support for TensorFlow operators in TensorRT conversion (e.g. Gather, Slice, Pack, Unpack, ArgMin, ArgMax,DepthSpaceShuffle).
    • Support TensorFlow operator CombinedNonMaxSuppression in TensorRT conversion which significantly accelerates object detection models.

Breaking Changes

  • Tensorflow code now produces 2 different pip packages: tensorflow_core containing all the code (in the future it will contain only the private implementation) and tensorflow which is a virtual pip package doing forwarding to tensorflow_core (and in the future will contain only the public API of tensorflow). We don't expect this to be breaking, unless you were importing directly from the implementation.
  • TensorFlow 1.15 is built using devtoolset7 (GCC7) on Ubuntu 16. This may lead to ABI incompatibilities with extensions built against earlier versions of TensorFlow.
  • Deprecated the use of constraint= and .constraint with ResourceVariable.
  • tf.keras:
    • OMP_NUM_THREADS is no longer used by the default Keras config. To configure the number of threads, use tf.config.threading APIs.
    • tf.keras.model.save_model and model.save now defaults to saving a TensorFlow SavedModel.
    • keras.backend.resize_images (and consequently, keras.layers.Upsampling2D) behavior has changed, a bug in the resizing implementation was fixed.
    • Layers now default to float32, and automatically cast their inputs to the layer's dtype. If you had a model that used float64, it will probably silently use float32 in TensorFlow2, and a warning will be issued that starts with Layer "layer-name" is casting an input tensor from dtype float64 to the layer's dtype of float32. To fix, either set the default dtype to float64 with tf.keras.backend.set_floatx('float64'), or pass dtype='float64' to each of the Layer constructors. See tf.keras.layers.Layer for more information.
    • Some tf.assert_* methods now raise assertions at operation creation time (i.e. when this Python line executes) if the input tensors' values are known at that time, not during the session.run(). When this happens, a noop is returned and the input tensors are marked non-feedable. In other words, if they are used as keys in feed_dict argument to session.run(), an error will be raised. Also, because some assert ops don't make it into the graph, the graph structure changes. A different graph can result in different per-op random seeds when they are not given explicitly (most often).

Bug Fixes and Other Changes

  • tf.estimator:
    • tf.keras.estimator.model_to_estimator now supports exporting to tf.train.Checkpoint format, which allows the saved checkpoints to be compatible with model.load_weights.
    • Fix tests in canned estimators.
    • Expose Head as public API.
    • Fixes critical bugs that help with DenseFeatures usability in TF2 </tr></table> ... (truncated) </details> <details> <summary>Changelog</summary>

Sourced from tensorflow's changelog.

Release 1.15.2

Bug Fixes and Other Changes

Release 2.1.0

TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support officially ends an January 1, 2020. As announced earlier, TensorFlow will also stop supporting Python 2 starting January 1, 2020, and no more releases are expected in 2019.

Major Features and Improvements

  • The tensorflow pip package now includes GPU support by default (same as tensorflow-gpu) for both Linux and Windows. This runs on machines with and without NVIDIA GPUs. tensorflow-gpu is still available, and CPU-only packages can be downloaded at tensorflow-cpu for users who are concerned about package size.
  • Windows users: Officially-released tensorflow Pip packages are now built with Visual Studio 2019 version 16.4 in order to take advantage of the new /d2ReducedOptimizeHugeFunctions compiler flag. To use these new packages, you must install "Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019", available from Microsoft's website here.
    • This does not change the minimum required version for building TensorFlow from source on Windows, but builds enabling EIGEN_STRONG_INLINE can take over 48 hours to compile without this flag. Refer to configure.py for more information about EIGEN_STRONG_INLINE and /d2ReducedOptimizeHugeFunctions.
    • If either of the required DLLs, msvcp140.dll (old) or msvcp140_1.dll (new), are missing on your machine, import tensorflow will print a warning message.
  • The tensorflow pip package is built with CUDA 10.1 and cuDNN 7.6.
  • tf.keras
    • Experimental support for mixed precision is available on GPUs and Cloud TPUs. See usage guide.
    • Introduced the TextVectorization layer, which takes as input raw strings and takes care of text standardization, tokenization, n-gram generation, and vocabulary indexing. See this end-to-end text classification example.
    • Keras .compile .fit .evaluate and .predict are allowed to be outside of the DistributionStrategy scope, as long as the model was constructed inside of a scope.
    • Experimental support for Keras .compile, .fit, .evaluate, and .predict is available for Cloud TPUs, Cloud TPU, for all types of Keras models (sequential, functional and subclassing models).
    • Automatic outside compilation is now enabled for Cloud TPUs. This allows tf.summary to be used more conveniently with Cloud TPUs.
    • Dynamic batch sizes with DistributionStrategy and Keras are supported on Cloud TPUs.
    • Support for .fit, .evaluate, .predict on TPU using numpy data, in addition to tf.data.Dataset.
    • Keras reference implementations for many popular models are available in the TensorFlow Model Garden.
  • tf.data
    • Changes rebatching for tf.data datasets + DistributionStrategy for better performance. Note that the dataset also behaves slightly differently, in that the rebatched dataset cardinality will always be a multiple of the number of replicas.
    • tf.data.Dataset now supports automatic data distribution and sharding in distributed environments, including on TPU pods.
    • Distribution policies for tf.data.Dataset can now be tuned with 1. tf.data.experimental.AutoShardPolicy(OFF, AUTO, FILE, DATA) 2. tf.data.experimental.ExternalStatePolicy(WARN, IGNORE, FAIL)
  • tf.debugging
    • Add tf.debugging.enable_check_numerics() and tf.debugging.disable_check_numerics() to help debugging the root causes of issues involving infinities and NaNs.
  • tf.distribute
    • Custom training loop support on TPUs and TPU pods is avaiable through strategy.experimental_distribute_dataset, strategy.experimental_distribute_datasets_from_function, strategy.experimental_run_v2, strategy.reduce.
    • Support for a global distribution strategy through tf.distribute.experimental_set_strategy(), in addition to strategy.scope().
  • TensorRT
    • TensorRT 6.0 is now supported and enabled by default. This adds support for more TensorFlow ops including Conv3D, Conv3DBackpropInputV2, AvgPool3D, MaxPool3D, ResizeBilinear, and ResizeNearestNeighbor. In addition, the TensorFlow-TensorRT python conversion API is exported as tf.experimental.tensorrt.Converter.
  • Environment variable TF_DETERMINISTIC_OPS has been added. When set to "true" or "1", this environment variable makes tf.nn.bias_add operate deterministically (i.e. reproducibly), but currently only when XLA JIT compilation is not enabled. Setting TF_DETERMINISTIC_OPS to "true" or "1" also makes cuDNN convolution and max-pooling operate deterministically. This makes Keras Conv*D and MaxPool*D layers operate deterministically in both the forward and backward directions when running on a CUDA-enabled GPU.

Breaking Changes

  • Deletes Operation.traceback_with_start_lines for which we know of no usages.
  • Removed id from tf.Tensor.__repr__() as id is not useful other than internal debugging.
  • Some tf.assert_* methods now raise assertions at operation creation time if the input tensors' values are known at that time, not during the session.run(). This only changes behavior when the graph execution would have resulted in an error. When this happens, a noop is returned and the input tensors are marked non-feedable. In other words, if they are used as keys in feed_dict argument to session.run(), an error will be raised. Also, because some assert ops don't make it into the graph, the graph structure changes. A different graph can result in different per-op random seeds when they are not given explicitly (most often).
  • The following APIs are not longer experimental: tf.config.list_logical_devices, tf.config.list_physical_devices, tf.config.get_visible_devices, tf.config.set_visible_devices, tf.config.get_logical_device_configuration, tf.config.set_logical_device_configuration.
  • tf.config.experimentalVirtualDeviceConfiguration has been renamed to tf.config.LogicalDeviceConfiguration.
  • tf.config.experimental_list_devices has been removed, please use tf.config.list_logical_devices.

Bug Fixes and Other Changes

</tr></table> ... (truncated) </details> <details> <summary>Commits</summary>

  • 5d80e1e Merge pull request #36215 from tensorflow-jenkins/version-numbers-1.15.2-8214
  • 71e9d8f Update version numbers to 1.15.2
  • e50120e Merge pull request #36214 from tensorflow-jenkins/relnotes-1.15.2-2203
  • 1a7e9fb Releasing 1.15.2 instead of 1.15.1
  • 85f7aab Insert release notes place-fill
  • e75a6d6 Merge pull request #36190 from tensorflow/mm-r1.15-fix-v2-build
  • a6d8973 Use config=v1 as this is r1.15 branch.
  • fdb8589 Merge pull request #35912 from tensorflow-jenkins/relnotes-1.15.1-31298
  • a6051e8 Add CVE number for main patch
  • 360b2e3 Merge pull request #34532 from ROCmSoftwarePlatform/r1.15-rccl-upstream-patch
  • Additional commits viewable in compare view </details> <br />

Dependabot compatibility score

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting @dependabot rebase.


<details> <summary>Dependabot commands and options</summary> <br />

You can trigger Dependabot actions by commenting on this PR:

  • @dependabot rebase will rebase this PR
  • @dependabot recreate will recreate this PR, overwriting any edits that have been made to it
  • @dependabot merge will merge this PR after your CI passes on it
  • @dependabot squash and merge will squash and merge this PR after your CI passes on it
  • @dependabot cancel merge will cancel a previously requested merge and block automerging
  • @dependabot reopen will reopen this PR if it is closed
  • @dependabot ignore this [patch|minor|major] version will close this PR and stop Dependabot creating any more for this minor/major version (unless you reopen the PR or upgrade to it yourself)
  • @dependabot ignore this dependency will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
  • @dependabot use these labels will set the current labels as the default for future PRs for this repo and language
  • @dependabot use these reviewers will set the current reviewers as the default for future PRs for this repo and language
  • @dependabot use these assignees will set the current assignees as the default for future PRs for this repo and language
  • @dependabot use this milestone will set the current milestone as the default for future PRs for this repo and language

You can disable automated security fix PRs for this repo from the Security Alerts page.

</details>

+1 -1

0 comment

1 changed file

dependabot[bot]

pr closed time in a month

startedclang-ykt/ONNF

started time in a month

startedgoogle/trax

started time in a month

issue commenttensorflow/tensorflow

Linking error when building Tensorflow 2.1

Hi @feihugis, thanks for the workaround. Any clue why it doesn't get solved by adding "--linkopt=-lstdc++"? I tried that, but seems like failing. I will try your suggestion.

I am not very clear about this. @wdirons Do you have any ideas?

DnPlas

comment created time in a month

issue commenttensorflow/tensorflow

Linking error when building Tensorflow 2.1

The solution from @wdirons works for me : BAZEL_LINKLIBS=-l%:libstdc++.a bazel build -c opt //tensorflow/tools/pip_package:build_pip_package

DnPlas

comment created time in a month

startedsuperzilong/neo-blog

started time in a month

startedsuperzilong/simple-blog-server

started time in a month

fork feihugis/TEASER-plusplus

A fast and robust point-cloud registration library

fork in a month

startedMIT-SPARK/TEASER-plusplus

started time in a month

pull request commentIBM/MAX-Object-Detector

Use CDN link in the readme

Tried the link. It worked for me. But there is a failure in CI.

bdwyer2

comment created time in a month

Pull request review commenttensorflow/community

RFC: tf.data Service

+# Distributed tf.data service++| Status        | Proposed          |+| :------------ | :------------------------------------------------------ |+| **RFC #**     | [195](https://github.com/tensorflow/community/pull/195) |+| **Author(s)** | Andrew Audibert (aaudibert@google.com) Rohan Jain (rohanj@google.com) |+| **Sponsor**   | Jiri Simsa (jsimsa@google.com)                          |+| **Updated**   | 2019-01-09                                              |++## Objective++Provide an API and implementation of a tf.data service which can process tf.data+datasets in a distributed manner. The service can be run outside the TensorFlow+cluster or be exported as a gRPC service by TensorFlow servers.++Goals:++-   Enable horizontal scaling of dataset computation to improve performance of+    input-bound dataset pipelines.+-   Improve tf.data integration with the tf.distribute API. In particular,+    support dynamic sharding of data across multiple processes.+-   Provide visitation guarantees for distributed training jobs.++Non-goals:++-   Process non-dataset data.+-   Distribute datasets that rely on external / non-serializable state.+-   Support non-graph computation (e.g. py_function).++## Motivation++### Host machine input pipelines can't always keep up with accelerators.++Some input pipelines require significant resources to produce their data, e.g.+due to image transformations. When the host machine isn't powerful enough to+generate input data at the rate the attached accelerator(s) consume the data,+the accelerator(s) will idle. This slows down training time, and also wastes+valuable accelerator resources. The tf.data service solves this problem by using+N input workers to feed M accelerators. The number of input workers can be+scaled up or down as needed to keep up with the accelerators.++### Distributed training requires a distribution-aware input pipeline.++Today tf.data supports the tf.distribute API by providing mechanisms for+sharding, cloning, and re-batching. The tf.distribute API uses these primitives+to implement their own version of a distributed dataset. If distributed datasets+become a core feature of tf.data, tf.data can provide a public API for+tf.distribute (and users who wish to implement their own distribution) to use+instead. This will also allow us to support feature requests that require+cross-worker coordination, such as dynamic sharding.++## User Benefit++### Input-bound models++Users with input-bound models can leverage the tf.data service to distribute+input processing across horizontally-scaling compute resources. This can improve+utilization for valuable accelerator resources, reducing total cost.++### Dynamic load balancing++Today, the tf.distribute API statically shards data across accelerators. This+can lead to suboptimal utilization because some shards may contain more data+than others. The tf.data service provides a mechanism for dynamically sharding,+reducing the data imbalance across accelerators.++### Visitation guarantees++Model accuracy can often be improved when each training sample is trained on+exactly once per epoch. The tf.data service can coordinate across workers to+provide this guarantee.++## Design Proposal++The tf.data service is a master-worker system which iterates through datasets,+producing outputs to be consumed by accelerators. The service is comprised of a+few components:++*   User-facing Python API for interacting with the tf.data service.+*   Dataset splitting API for determining how to split up datasets for parallel+    processing.+*   Master and worker gRPC services.++### Architecture++The tf.data service is comprised of master and worker gRPC services which could+be run in a couple of different configurations:++#### Glossary++**Master**: The single master coordinating the tf.data service.++**Worker**: A tf.data service worker which performs dataset processing and+provides dataset elements to consumers over RPC.++**Consumer**: A machine which consumes data from the tf.data service. The+consumer may be attached to a GPU or TPU, or use data for on-CPU training.++#### Separate Cluster Architecture++Each server is run on a separate host from the TensorFlow cluster. This+configuration gives users a way to provide horizontally scaling CPU for+processing their input pipelines and quickly feeding data to accelerators.++#### Embedded Cluster Architecture++Each TensorFlow server runs the tf.data worker gRPC service, and one server also+runs the master gRPC service. This lets users leverage the tf.data service+without needing to provision additional compute resources. and gives all the+benefits of the tf.data service except for horizontal scaling.++#### Hybrid Architecture++Users could run tf.data workers embedded in their TensorFlow cluster, and also+run additional tf.data workers (and potentially the tf.data master) outside the+cluster. This allows for horizontal worker scaling, while still leveraging the+compute resources of the TensorFlow cluster for input processing.++### User-facing Python API++This API is how users will interact with the tf.data service from their Python+code.++```python+def tf.data.experimental.service.distribute(address):+  """Marks that a dataset should be processed by the tf.data service.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))++  Args:+    address: The address of the tf.data service master.++  Returns:+    A function that can be passed to `dataset.apply()`.+  """++def tf.data.experimental.service.create_iteration(+    dataset, num_consumers=1, num_tasks=None, deterministic=False):+  """Begins distributed iteration over a dataset.++  It is expected that the dataset contains at least one `.distribute(address)`+  transformation, otherwise this method will print a warning and do nothing.++  `create_iteration` will first register the dataset with the tf.data service+  if it isn't already registered. It will then request the creation of+  `num_consumers` dataset iterators which divide the dataset `num_consumers`+  ways. The returned object can be used to read from one of the+  iterators using+  `tf.data.experimental.service.make_iterator(ds, obj, consumer_index)`.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))+  if consumer_index == 0:+    # The iteration object is a byte array which needs to be shared among all+    # consumers. Here we suppose there are broadcast_send and broadcast_recv+    # method available.+    iteration_id = tf.data.experimental.service.create_iteration(ds, address, 3)+    broadcast_send(iteration_id)+  else:+    iteration_id = broadcast_recv()+  it = tf.data.experimental.service.make_iterator(+      ds, iteration_id, consumer_index)+  for element in it:+    # process element++  Args:+    dataset: The dataset to begin iteration over.+    num_consumers: The number of consumers to divide the dataset between. Set+      this if you require determinism. If None, a single iterator id is returned,+      and any number of consumers can read from that iterator id. The data+      produced by the dataset will be fed to consumers on a first-come+      first-served basis.+    num_tasks: The number of tasks to use for processing. Tasks run for+      the duration of an epoch, and each worker should typically process a single+      task. Normally it is best to leave this as None so that the master can+      choose a reasonable number of tasks. Setting `num_tasks` is useful for+      producing deterministic results.+    deterministic: Whether the iteration should be performed+      deterministically. Fully deterministic output also requires setting+      `num_tasks` to a fixed number, and that the input dataset is itself+      deterministic.++  Returns:+    An iteration_id which can be used to created iterators via+      `tf.data.experimental.service.make_iterator`+  """++def tf.data.experimental.service.make_iterator(+    dataset, iteration, consumer_index):+  """Creates an iterator for reading from the specified dataset.++  Args:+    dataset: The dataset to read from.+    iteration: An iteration_id object generated by+      `tf.data.experimental.service.create_iteration`.+    consumer_index: The consumer index within the iteration to read from. If

Yeah, the section added is helpful.

aaudiber

comment created time in a month

Pull request review commenttensorflow/community

RFC: tf.data Service

+# Distributed tf.data service++| Status        | Proposed          |+| :------------ | :------------------------------------------------------ |+| **RFC #**     | [195](https://github.com/tensorflow/community/pull/195) |+| **Author(s)** | Andrew Audibert (aaudibert@google.com) Rohan Jain (rohanj@google.com) |+| **Sponsor**   | Jiri Simsa (jsimsa@google.com)                          |+| **Updated**   | 2019-01-09                                              |++## Objective++Provide an API and implementation of a tf.data service which can process tf.data+datasets in a distributed manner. The service can be run outside the TensorFlow+cluster or be exported as a gRPC service by TensorFlow servers.++Goals:++-   Enable horizontal scaling of dataset computation to improve performance of+    input-bound dataset pipelines.+-   Improve tf.data integration with the tf.distribute API. In particular,+    support dynamic sharding of data across multiple processes.+-   Provide visitation guarantees for distributed training jobs.++Non-goals:++-   Process non-dataset data.+-   Distribute datasets that rely on external / non-serializable state.+-   Support non-graph computation (e.g. py_function).++## Motivation++### Host machine input pipelines can't always keep up with accelerators.++Some input pipelines require significant resources to produce their data, e.g.+due to image transformations. When the host machine isn't powerful enough to+generate input data at the rate the attached accelerator(s) consume the data,+the accelerator(s) will idle. This slows down training time, and also wastes+valuable accelerator resources. The tf.data service solves this problem by using+N input workers to feed M accelerators. The number of input workers can be+scaled up or down as needed to keep up with the accelerators.++### Distributed training requires a distribution-aware input pipeline.++Today tf.data supports the tf.distribute API by providing mechanisms for+sharding, cloning, and re-batching. The tf.distribute API uses these primitives+to implement their own version of a distributed dataset. If distributed datasets+become a core feature of tf.data, tf.data can provide a public API for+tf.distribute (and users who wish to implement their own distribution) to use+instead. This will also allow us to support feature requests that require+cross-worker coordination, such as dynamic sharding.++## User Benefit++### Input-bound models++Users with input-bound models can leverage the tf.data service to distribute+input processing across horizontally-scaling compute resources. This can improve+utilization for valuable accelerator resources, reducing total cost.++### Dynamic load balancing++Today, the tf.distribute API statically shards data across accelerators. This+can lead to suboptimal utilization because some shards may contain more data+than others. The tf.data service provides a mechanism for dynamically sharding,+reducing the data imbalance across accelerators.++### Visitation guarantees++Model accuracy can often be improved when each training sample is trained on+exactly once per epoch. The tf.data service can coordinate across workers to+provide this guarantee.++## Design Proposal++The tf.data service is a master-worker system which iterates through datasets,+producing outputs to be consumed by accelerators. The service is comprised of a+few components:++*   User-facing Python API for interacting with the tf.data service.+*   Dataset splitting API for determining how to split up datasets for parallel+    processing.+*   Master and worker gRPC services.++### Architecture++The tf.data service is comprised of master and worker gRPC services which could+be run in a couple of different configurations:++#### Glossary++**Master**: The single master coordinating the tf.data service.++**Worker**: A tf.data service worker which performs dataset processing and+provides dataset elements to consumers over RPC.++**Consumer**: A machine which consumes data from the tf.data service. The+consumer may be attached to a GPU or TPU, or use data for on-CPU training.++#### Separate Cluster Architecture++Each server is run on a separate host from the TensorFlow cluster. This+configuration gives users a way to provide horizontally scaling CPU for+processing their input pipelines and quickly feeding data to accelerators.++#### Embedded Cluster Architecture++Each TensorFlow server runs the tf.data worker gRPC service, and one server also+runs the master gRPC service. This lets users leverage the tf.data service+without needing to provision additional compute resources. and gives all the+benefits of the tf.data service except for horizontal scaling.++#### Hybrid Architecture++Users could run tf.data workers embedded in their TensorFlow cluster, and also+run additional tf.data workers (and potentially the tf.data master) outside the+cluster. This allows for horizontal worker scaling, while still leveraging the+compute resources of the TensorFlow cluster for input processing.++### User-facing Python API++This API is how users will interact with the tf.data service from their Python+code.++```python+def tf.data.experimental.service.distribute(address):+  """Marks that a dataset should be processed by the tf.data service.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))++  Args:+    address: The address of the tf.data service master.++  Returns:+    A function that can be passed to `dataset.apply()`.+  """++def tf.data.experimental.service.create_iteration(+    dataset, num_consumers=1, num_tasks=None, deterministic=False):+  """Begins distributed iteration over a dataset.++  It is expected that the dataset contains at least one `.distribute(address)`+  transformation, otherwise this method will print a warning and do nothing.++  `create_iteration` will first register the dataset with the tf.data service+  if it isn't already registered. It will then request the creation of+  `num_consumers` dataset iterators which divide the dataset `num_consumers`+  ways. The returned object can be used to read from one of the+  iterators using+  `tf.data.experimental.service.make_iterator(ds, obj, consumer_index)`.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))+  if consumer_index == 0:+    # The iteration object is a byte array which needs to be shared among all+    # consumers. Here we suppose there are broadcast_send and broadcast_recv+    # method available.+    iteration_id = tf.data.experimental.service.create_iteration(ds, address, 3)+    broadcast_send(iteration_id)+  else:+    iteration_id = broadcast_recv()+  it = tf.data.experimental.service.make_iterator(+      ds, iteration_id, consumer_index)

Gotcha!

aaudiber

comment created time in a month

Pull request review commenttensorflow/community

RFC: tf.data Service

+# Distributed tf.data service++| Status        | Proposed          |+| :------------ | :------------------------------------------------------ |+| **RFC #**     | [195](https://github.com/tensorflow/community/pull/195) |+| **Author(s)** | Andrew Audibert (aaudibert@google.com) Rohan Jain (rohanj@google.com) |+| **Sponsor**   | Jiri Simsa (jsimsa@google.com)                          |+| **Updated**   | 2019-01-09                                              |++## Objective++Provide an API and implementation of a tf.data service which can process tf.data+datasets in a distributed manner. The service can be run outside the TensorFlow+cluster or be exported as a gRPC service by TensorFlow servers.++Goals:++-   Enable horizontal scaling of dataset computation to improve performance of+    input-bound dataset pipelines.+-   Improve tf.data integration with the tf.distribute API. In particular,+    support dynamic sharding of data across multiple processes.+-   Provide visitation guarantees for distributed training jobs.++Non-goals:++-   Process non-dataset data.+-   Distribute datasets that rely on external / non-serializable state.+-   Support non-graph computation (e.g. py_function).++## Motivation++### Host machine input pipelines can't always keep up with accelerators.++Some input pipelines require significant resources to produce their data, e.g.+due to image transformations. When the host machine isn't powerful enough to+generate input data at the rate the attached accelerator(s) consume the data,+the accelerator(s) will idle. This slows down training time, and also wastes+valuable accelerator resources. The tf.data service solves this problem by using+N input workers to feed M accelerators. The number of input workers can be+scaled up or down as needed to keep up with the accelerators.++### Distributed training requires a distribution-aware input pipeline.++Today tf.data supports the tf.distribute API by providing mechanisms for+sharding, cloning, and re-batching. The tf.distribute API uses these primitives+to implement their own version of a distributed dataset. If distributed datasets+become a core feature of tf.data, tf.data can provide a public API for+tf.distribute (and users who wish to implement their own distribution) to use+instead. This will also allow us to support feature requests that require+cross-worker coordination, such as dynamic sharding.++## User Benefit++### Input-bound models++Users with input-bound models can leverage the tf.data service to distribute+input processing across horizontally-scaling compute resources. This can improve+utilization for valuable accelerator resources, reducing total cost.++### Dynamic load balancing++Today, the tf.distribute API statically shards data across accelerators. This+can lead to suboptimal utilization because some shards may contain more data+than others. The tf.data service provides a mechanism for dynamically sharding,+reducing the data imbalance across accelerators.++### Visitation guarantees++Model accuracy can often be improved when each training sample is trained on+exactly once per epoch. The tf.data service can coordinate across workers to+provide this guarantee.++## Design Proposal++The tf.data service is a master-worker system which iterates through datasets,+producing outputs to be consumed by accelerators. The service is comprised of a+few components:++*   User-facing Python API for interacting with the tf.data service.+*   Dataset splitting API for determining how to split up datasets for parallel+    processing.+*   Master and worker gRPC services.++### Architecture++The tf.data service is comprised of master and worker gRPC services which could+be run in a couple of different configurations:++#### Glossary++**Master**: The single master coordinating the tf.data service.++**Worker**: A tf.data service worker which performs dataset processing and+provides dataset elements to consumers over RPC.++**Consumer**: A machine which consumes data from the tf.data service. The+consumer may be attached to a GPU or TPU, or use data for on-CPU training.++#### Separate Cluster Architecture++Each server is run on a separate host from the TensorFlow cluster. This+configuration gives users a way to provide horizontally scaling CPU for+processing their input pipelines and quickly feeding data to accelerators.++#### Embedded Cluster Architecture++Each TensorFlow server runs the tf.data worker gRPC service, and one server also+runs the master gRPC service. This lets users leverage the tf.data service+without needing to provision additional compute resources. and gives all the+benefits of the tf.data service except for horizontal scaling.++#### Hybrid Architecture++Users could run tf.data workers embedded in their TensorFlow cluster, and also+run additional tf.data workers (and potentially the tf.data master) outside the+cluster. This allows for horizontal worker scaling, while still leveraging the+compute resources of the TensorFlow cluster for input processing.++### User-facing Python API++This API is how users will interact with the tf.data service from their Python+code.++```python+def tf.data.experimental.service.distribute(address):+  """Marks that a dataset should be processed by the tf.data service.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))++  Args:+    address: The address of the tf.data service master.++  Returns:+    A function that can be passed to `dataset.apply()`.+  """++def tf.data.experimental.service.create_iteration(+    dataset, num_consumers=1, num_tasks=None, deterministic=False):+  """Begins distributed iteration over a dataset.++  It is expected that the dataset contains at least one `.distribute(address)`+  transformation, otherwise this method will print a warning and do nothing.++  `create_iteration` will first register the dataset with the tf.data service+  if it isn't already registered. It will then request the creation of+  `num_consumers` dataset iterators which divide the dataset `num_consumers`+  ways. The returned object can be used to read from one of the+  iterators using+  `tf.data.experimental.service.make_iterator(ds, obj, consumer_index)`.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))+  if consumer_index == 0:

Thanks for the link!

aaudiber

comment created time in a month

Pull request review commenttensorflow/community

RFC: tf.data Service

+# Distributed tf.data service++| Status        | Proposed          |+| :------------ | :------------------------------------------------------ |+| **RFC #**     | [195](https://github.com/tensorflow/community/pull/195) |+| **Author(s)** | Andrew Audibert (aaudibert@google.com) Rohan Jain (rohanj@google.com) |+| **Sponsor**   | Jiri Simsa (jsimsa@google.com)                          |+| **Updated**   | 2019-01-09                                              |++## Objective++Provide an API and implementation of a tf.data service which can process tf.data+datasets in a distributed manner. The service can be run outside the TensorFlow+cluster or be exported as a gRPC service by TensorFlow servers.++Goals:++-   Enable horizontal scaling of dataset computation to improve performance of+    input-bound dataset pipelines.+-   Improve tf.data integration with the tf.distribute API. In particular,+    support dynamic sharding of data across multiple processes.+-   Provide visitation guarantees for distributed training jobs.++Non-goals:++-   Process non-dataset data.+-   Distribute datasets that rely on external / non-serializable state.+-   Support non-graph computation (e.g. py_function).++## Motivation++### Host machine input pipelines can't always keep up with accelerators.++Some input pipelines require significant resources to produce their data, e.g.+due to image transformations. When the host machine isn't powerful enough to+generate input data at the rate the attached accelerator(s) consume the data,+the accelerator(s) will idle. This slows down training time, and also wastes+valuable accelerator resources. The tf.data service solves this problem by using+N input workers to feed M accelerators. The number of input workers can be+scaled up or down as needed to keep up with the accelerators.++### Distributed training requires a distribution-aware input pipeline.

The distributed dataset will be very useful for the inference as well!

aaudiber

comment created time in a month

Pull request review commenttensorflow/community

RFC: tf.data Service

+# Distributed tf.data service++| Status        | Proposed          |+| :------------ | :------------------------------------------------------ |+| **RFC #**     | [195](https://github.com/tensorflow/community/pull/195) |+| **Author(s)** | Andrew Audibert (aaudibert@google.com) Rohan Jain (rohanj@google.com) |+| **Sponsor**   | Jiri Simsa (jsimsa@google.com)                          |+| **Updated**   | 2019-01-09                                              |++## Objective++Provide an API and implementation of a tf.data service which can process tf.data+datasets in a distributed manner. The service can be run outside the TensorFlow+cluster or be exported as a gRPC service by TensorFlow servers.++Goals:++-   Enable horizontal scaling of dataset computation to improve performance of+    input-bound dataset pipelines.+-   Improve tf.data integration with the tf.distribute API. In particular,+    support dynamic sharding of data across multiple processes.+-   Provide visitation guarantees for distributed training jobs.++Non-goals:++-   Process non-dataset data.+-   Distribute datasets that rely on external / non-serializable state.+-   Support non-graph computation (e.g. py_function).++## Motivation++### Host machine input pipelines can't always keep up with accelerators.++Some input pipelines require significant resources to produce their data, e.g.+due to image transformations. When the host machine isn't powerful enough to+generate input data at the rate the attached accelerator(s) consume the data,+the accelerator(s) will idle. This slows down training time, and also wastes+valuable accelerator resources. The tf.data service solves this problem by using+N input workers to feed M accelerators. The number of input workers can be+scaled up or down as needed to keep up with the accelerators.++### Distributed training requires a distribution-aware input pipeline.++Today tf.data supports the tf.distribute API by providing mechanisms for+sharding, cloning, and re-batching. The tf.distribute API uses these primitives+to implement their own version of a distributed dataset. If distributed datasets+become a core feature of tf.data, tf.data can provide a public API for+tf.distribute (and users who wish to implement their own distribution) to use+instead. This will also allow us to support feature requests that require+cross-worker coordination, such as dynamic sharding.++## User Benefit++### Input-bound models++Users with input-bound models can leverage the tf.data service to distribute+input processing across horizontally-scaling compute resources. This can improve+utilization for valuable accelerator resources, reducing total cost.++### Dynamic load balancing++Today, the tf.distribute API statically shards data across accelerators. This+can lead to suboptimal utilization because some shards may contain more data+than others. The tf.data service provides a mechanism for dynamically sharding,+reducing the data imbalance across accelerators.++### Visitation guarantees++Model accuracy can often be improved when each training sample is trained on+exactly once per epoch. The tf.data service can coordinate across workers to+provide this guarantee.++## Design Proposal++The tf.data service is a master-worker system which iterates through datasets,+producing outputs to be consumed by accelerators. The service is comprised of a+few components:++*   User-facing Python API for interacting with the tf.data service.+*   Dataset splitting API for determining how to split up datasets for parallel+    processing.+*   Master and worker gRPC services.++### Architecture++The tf.data service is comprised of master and worker gRPC services which could+be run in a couple of different configurations:++#### Glossary++**Master**: The single master coordinating the tf.data service.++**Worker**: A tf.data service worker which performs dataset processing and+provides dataset elements to consumers over RPC.++**Consumer**: A machine which consumes data from the tf.data service. The+consumer may be attached to a GPU or TPU, or use data for on-CPU training.++#### Separate Cluster Architecture++Each server is run on a separate host from the TensorFlow cluster. This+configuration gives users a way to provide horizontally scaling CPU for+processing their input pipelines and quickly feeding data to accelerators.++#### Embedded Cluster Architecture++Each TensorFlow server runs the tf.data worker gRPC service, and one server also+runs the master gRPC service. This lets users leverage the tf.data service+without needing to provision additional compute resources. and gives all the+benefits of the tf.data service except for horizontal scaling.++#### Hybrid Architecture++Users could run tf.data workers embedded in their TensorFlow cluster, and also+run additional tf.data workers (and potentially the tf.data master) outside the+cluster. This allows for horizontal worker scaling, while still leveraging the+compute resources of the TensorFlow cluster for input processing.++### User-facing Python API++This API is how users will interact with the tf.data service from their Python+code.++```python+def tf.data.experimental.service.distribute(address):+  """Marks that a dataset should be processed by the tf.data service.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))++  Args:+    address: The address of the tf.data service master.++  Returns:+    A function that can be passed to `dataset.apply()`.+  """++def tf.data.experimental.service.create_iteration(+    dataset, num_consumers=1, num_tasks=None, deterministic=False):+  """Begins distributed iteration over a dataset.++  It is expected that the dataset contains at least one `.distribute(address)`+  transformation, otherwise this method will print a warning and do nothing.++  `create_iteration` will first register the dataset with the tf.data service+  if it isn't already registered. It will then request the creation of+  `num_consumers` dataset iterators which divide the dataset `num_consumers`+  ways. The returned object can be used to read from one of the+  iterators using+  `tf.data.experimental.service.make_iterator(ds, obj, consumer_index)`.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))+  if consumer_index == 0:+    # The iteration object is a byte array which needs to be shared among all+    # consumers. Here we suppose there are broadcast_send and broadcast_recv+    # method available.+    iteration_id = tf.data.experimental.service.create_iteration(ds, address, 3)+    broadcast_send(iteration_id)+  else:+    iteration_id = broadcast_recv()+  it = tf.data.experimental.service.make_iterator(+      ds, iteration_id, consumer_index)+  for element in it:+    # process element++  Args:+    dataset: The dataset to begin iteration over.+    num_consumers: The number of consumers to divide the dataset between. Set+      this if you require determinism. If None, a single iterator id is returned,+      and any number of consumers can read from that iterator id. The data+      produced by the dataset will be fed to consumers on a first-come+      first-served basis.+    num_tasks: The number of tasks to use for processing. Tasks run for+      the duration of an epoch, and each worker should typically process a single+      task. Normally it is best to leave this as None so that the master can+      choose a reasonable number of tasks. Setting `num_tasks` is useful for+      producing deterministic results.+    deterministic: Whether the iteration should be performed+      deterministically. Fully deterministic output also requires setting+      `num_tasks` to a fixed number, and that the input dataset is itself+      deterministic.++  Returns:+    An iteration_id which can be used to created iterators via+      `tf.data.experimental.service.make_iterator`+  """++def tf.data.experimental.service.make_iterator(+    dataset, iteration, consumer_index):+  """Creates an iterator for reading from the specified dataset.++  Args:+    dataset: The dataset to read from.+    iteration: An iteration_id object generated by+      `tf.data.experimental.service.create_iteration`.+    consumer_index: The consumer index within the iteration to read from. If+      the iteration was created with `n` consumers, `consumers_index` must be+      less than `n`.++  Returns:+    A Python iterator which iterates over the dataset elements.+  """+```++### Dataset splitting API++To parallelize dataset processing, the tf.data service needs a way to split up+datasets. We will achieve this by adding a splitting API that allows source+datasets to express how they can be split.++Our goals for the API are++*   Performance: The splitting API can be used to performantly split and process+    datasets.+*   Extensibility: User-defined datasets can be split as long as they implement+    the splitting API.+*   Minimize Surprises: Users write their datasets as though they will not be+    split, so introducing splitting can easily lead to unexpected outcomes. To+    mitigate this, we will be conservative about which dataset transformations+    support splitting.++The API will be used internally by the tf.data service to distribute datasets.+It will be entirely in C++, and we don't currently have any plans to expose+splitting through Python.++The API focuses on producing and consuming `Split`s. A `Split` is a variant+Tensor that can be subclassed to represent arbitrary types of splitting.++```cpp+class Split {+ public:+  virtual std::string DebugString() const = 0;+  // Methods to support being used as a Variant tensor.+  virtual std::string TypeName() const = 0;+  virtual void Encode(VariantTensorData* data) const = 0;+  virtual bool Decode(const VariantTensorData& data) = 0;+};+```++To iterate over splits for a dataset, we will use a new+`DatasetBase::MakeSplitGenerator()` method. This method creates a+`SplitGenerator`, which is responsible for generating all of the splits for the+dataset. We use an intermediate `SplitGenerator` object instead of generating+splits directly because there could be a large number of splits, and the+`SplitGenerator` gives us as way to tune split size in response to pipeline+performance.++```cpp+class SplitGenerator {+ public:+  virtual Status GetNext(std::unique_ptr<Split>* split,+                         bool* end_of_splits) = 0;+  // Instructs the SplitGenerator to adjust the size of future splits by the+  // specified percent. 100% means no change, 50% means half-sized splits, and+  // 200% means double-sized splits. The SplitGenerator will make a best effort+  // to incorporate the feedback when creating splits.+  virtual void AdjustSplitSize(int percent) = 0;+};+```++It is tempting to process each split independently, but this would cause issues+when splits are small. tf.data pipelines need to populate internal buffers for+shuffling, prefetching, and batching. If we use a separate pipeline to process+each split, our shuffling will be lower quality, we will have performance jitter+as we keep needing to refill prefetch buffers from scratching, and we will+produce many more partial batches (each split might not even have enough data to+fill a full batch). To avoid these issues, we use a small number of tasks, where+each task processes many splits as a single pipeline.++To enable processing of multiple splits in a dataset, we will add an optional+`SplitProvider` field to the `IteratorContext` passed to+`IteratorBase::Initialize`. The `SplitProvider` produces splits which tell the+iterator what source data to iterate over. For example, if splits are+represented by filenames, and a SplitProvider produces `["file1", "file6",+"file11"]`, an iterator initialized by that `SplitProvider` should process those+three files only.++```cpp+class SplitProvider {+ public:+  virtual Status GetNext(std::unique_ptr<Split>* split,+                         bool* end_of_splits) = 0;+};+```++When processing datasets, tf.data service workers will use `SplitProvider`s+which provide splits by querying the tf.data service master for which splits to+process. A few splits will be prefetched to hide the latency of needing to+request a new split from the master.++#### Supported Datasets++Not all dataset sources and transformations are easily splittable. For example,+`take`, `skip`, and `scan` require a global view of the dataset to produce+correct results. Datasets which require multiple input datasets such as `zip`+are also difficult to support, since we don't have a good way of aligning the+splits of multiple input datasets. Users who rely on these unsupported datasets+will need to move those datasets to come after the distributed part of their+pipeline.++Initially, we will support splitting for the following dataset sources and+transformations:++*   `batch`, `CsvDataset`, `dense_to_sparse_batch`, `filter`,+    `FixedLengthRecordDataset`, `flat_map`, `from_tensor_slices`,+    `group_by_window`, `ignore_errors`, `interleave`, `list_files`, `map`,+    `range`, `repeat`, `padded_batch`, `prefetch`, `shuffle`, `SSTableDataset`,+    `TextLineDataset`, `TFRecordDataset`, `unbatch`, `window`.++### Master and worker services++This section discusses the design for the master and worker services. These+services are used by the Python API to provide distributed dataset processing,+and these services use the splitting API as a part of their implementation.++#### Master API++The master is responsible for registering datasets, generating and tracking+iteration and worker ids, and generating dataset splits for processing on+workers.++Below is a sketch of the Master API. This API is not public and is subject to+change.++```cpp+/// ---- Methods called by consumers ----++// Registers a dataset and returns an id for the dataset. If the dataset is+// already registered, its dataset id is returned.+int GetOrRegisterDataset(GraphDef dataset);++// Creates and returns `num_consumers` iterator ids which partition the+// specified dataset. This also creates an internal `iteration_id` used to+// track the overall dataset iteration. `num_tasks` defines how many tasks to+// create. If `num_tasks` is -1, it is up to the master to determine how many+// tasks to create.+list<int> CreateIterators(int dataset_id, int num_consumers,+                          int num_tasks);++// Returns the list of tasks processing data for `iterator_id`. Consumers query+// this to find which worker addresses to read data from.+list<TaskInfo> GetWorkersForiterator(int iterator_id);

Could you please give more details about how to decide which workers the consumer should read data from? Could we enable the end-users to customize the strategy?

nit: GetWorkersForiterator -> GetWorkersForIterator.

aaudiber

comment created time in a month

Pull request review commenttensorflow/community

RFC: tf.data Service

+# Distributed tf.data service++| Status        | Proposed          |+| :------------ | :------------------------------------------------------ |+| **RFC #**     | [195](https://github.com/tensorflow/community/pull/195) |+| **Author(s)** | Andrew Audibert (aaudibert@google.com) Rohan Jain (rohanj@google.com) |+| **Sponsor**   | Jiri Simsa (jsimsa@google.com)                          |+| **Updated**   | 2019-01-09                                              |++## Objective++Provide an API and implementation of a tf.data service which can process tf.data+datasets in a distributed manner. The service can be run outside the TensorFlow+cluster or be exported as a gRPC service by TensorFlow servers.++Goals:++-   Enable horizontal scaling of dataset computation to improve performance of+    input-bound dataset pipelines.+-   Improve tf.data integration with the tf.distribute API. In particular,+    support dynamic sharding of data across multiple processes.+-   Provide visitation guarantees for distributed training jobs.++Non-goals:++-   Process non-dataset data.+-   Distribute datasets that rely on external / non-serializable state.+-   Support non-graph computation (e.g. py_function).++## Motivation++### Host machine input pipelines can't always keep up with accelerators.++Some input pipelines require significant resources to produce their data, e.g.+due to image transformations. When the host machine isn't powerful enough to+generate input data at the rate the attached accelerator(s) consume the data,+the accelerator(s) will idle. This slows down training time, and also wastes+valuable accelerator resources. The tf.data service solves this problem by using+N input workers to feed M accelerators. The number of input workers can be+scaled up or down as needed to keep up with the accelerators.++### Distributed training requires a distribution-aware input pipeline.++Today tf.data supports the tf.distribute API by providing mechanisms for+sharding, cloning, and re-batching. The tf.distribute API uses these primitives+to implement their own version of a distributed dataset. If distributed datasets+become a core feature of tf.data, tf.data can provide a public API for+tf.distribute (and users who wish to implement their own distribution) to use+instead. This will also allow us to support feature requests that require+cross-worker coordination, such as dynamic sharding.++## User Benefit++### Input-bound models++Users with input-bound models can leverage the tf.data service to distribute+input processing across horizontally-scaling compute resources. This can improve+utilization for valuable accelerator resources, reducing total cost.++### Dynamic load balancing++Today, the tf.distribute API statically shards data across accelerators. This+can lead to suboptimal utilization because some shards may contain more data+than others. The tf.data service provides a mechanism for dynamically sharding,+reducing the data imbalance across accelerators.++### Visitation guarantees++Model accuracy can often be improved when each training sample is trained on+exactly once per epoch. The tf.data service can coordinate across workers to+provide this guarantee.++## Design Proposal++The tf.data service is a master-worker system which iterates through datasets,+producing outputs to be consumed by accelerators. The service is comprised of a+few components:++*   User-facing Python API for interacting with the tf.data service.+*   Dataset splitting API for determining how to split up datasets for parallel+    processing.+*   Master and worker gRPC services.++### Architecture++The tf.data service is comprised of master and worker gRPC services which could+be run in a couple of different configurations:++#### Glossary++**Master**: The single master coordinating the tf.data service.++**Worker**: A tf.data service worker which performs dataset processing and+provides dataset elements to consumers over RPC.++**Consumer**: A machine which consumes data from the tf.data service. The+consumer may be attached to a GPU or TPU, or use data for on-CPU training.++#### Separate Cluster Architecture++Each server is run on a separate host from the TensorFlow cluster. This+configuration gives users a way to provide horizontally scaling CPU for+processing their input pipelines and quickly feeding data to accelerators.++#### Embedded Cluster Architecture++Each TensorFlow server runs the tf.data worker gRPC service, and one server also+runs the master gRPC service. This lets users leverage the tf.data service+without needing to provision additional compute resources. and gives all the+benefits of the tf.data service except for horizontal scaling.++#### Hybrid Architecture++Users could run tf.data workers embedded in their TensorFlow cluster, and also+run additional tf.data workers (and potentially the tf.data master) outside the+cluster. This allows for horizontal worker scaling, while still leveraging the+compute resources of the TensorFlow cluster for input processing.++### User-facing Python API++This API is how users will interact with the tf.data service from their Python+code.++```python+def tf.data.experimental.service.distribute(address):+  """Marks that a dataset should be processed by the tf.data service.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))++  Args:+    address: The address of the tf.data service master.++  Returns:+    A function that can be passed to `dataset.apply()`.+  """++def tf.data.experimental.service.create_iteration(+    dataset, num_consumers=1, num_tasks=None, deterministic=False):+  """Begins distributed iteration over a dataset.++  It is expected that the dataset contains at least one `.distribute(address)`+  transformation, otherwise this method will print a warning and do nothing.++  `create_iteration` will first register the dataset with the tf.data service+  if it isn't already registered. It will then request the creation of+  `num_consumers` dataset iterators which divide the dataset `num_consumers`+  ways. The returned object can be used to read from one of the+  iterators using+  `tf.data.experimental.service.make_iterator(ds, obj, consumer_index)`.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))+  if consumer_index == 0:+    # The iteration object is a byte array which needs to be shared among all+    # consumers. Here we suppose there are broadcast_send and broadcast_recv+    # method available.+    iteration_id = tf.data.experimental.service.create_iteration(ds, address, 3)+    broadcast_send(iteration_id)+  else:+    iteration_id = broadcast_recv()+  it = tf.data.experimental.service.make_iterator(+      ds, iteration_id, consumer_index)+  for element in it:+    # process element++  Args:+    dataset: The dataset to begin iteration over.+    num_consumers: The number of consumers to divide the dataset between. Set+      this if you require determinism. If None, a single iterator id is returned,+      and any number of consumers can read from that iterator id. The data+      produced by the dataset will be fed to consumers on a first-come+      first-served basis.+    num_tasks: The number of tasks to use for processing. Tasks run for+      the duration of an epoch, and each worker should typically process a single+      task. Normally it is best to leave this as None so that the master can+      choose a reasonable number of tasks. Setting `num_tasks` is useful for+      producing deterministic results.+    deterministic: Whether the iteration should be performed+      deterministically. Fully deterministic output also requires setting+      `num_tasks` to a fixed number, and that the input dataset is itself+      deterministic.++  Returns:+    An iteration_id which can be used to created iterators via+      `tf.data.experimental.service.make_iterator`+  """++def tf.data.experimental.service.make_iterator(+    dataset, iteration, consumer_index):+  """Creates an iterator for reading from the specified dataset.++  Args:+    dataset: The dataset to read from.+    iteration: An iteration_id object generated by+      `tf.data.experimental.service.create_iteration`.+    consumer_index: The consumer index within the iteration to read from. If+      the iteration was created with `n` consumers, `consumers_index` must be+      less than `n`.++  Returns:+    A Python iterator which iterates over the dataset elements.+  """+```++### Dataset splitting API++To parallelize dataset processing, the tf.data service needs a way to split up+datasets. We will achieve this by adding a splitting API that allows source+datasets to express how they can be split.++Our goals for the API are++*   Performance: The splitting API can be used to performantly split and process+    datasets.+*   Extensibility: User-defined datasets can be split as long as they implement+    the splitting API.+*   Minimize Surprises: Users write their datasets as though they will not be+    split, so introducing splitting can easily lead to unexpected outcomes. To+    mitigate this, we will be conservative about which dataset transformations+    support splitting.++The API will be used internally by the tf.data service to distribute datasets.+It will be entirely in C++, and we don't currently have any plans to expose+splitting through Python.++The API focuses on producing and consuming `Split`s. A `Split` is a variant+Tensor that can be subclassed to represent arbitrary types of splitting.++```cpp+class Split {+ public:+  virtual std::string DebugString() const = 0;+  // Methods to support being used as a Variant tensor.+  virtual std::string TypeName() const = 0;+  virtual void Encode(VariantTensorData* data) const = 0;+  virtual bool Decode(const VariantTensorData& data) = 0;+};+```++To iterate over splits for a dataset, we will use a new+`DatasetBase::MakeSplitGenerator()` method. This method creates a+`SplitGenerator`, which is responsible for generating all of the splits for the+dataset. We use an intermediate `SplitGenerator` object instead of generating+splits directly because there could be a large number of splits, and the+`SplitGenerator` gives us as way to tune split size in response to pipeline+performance.++```cpp+class SplitGenerator {+ public:+  virtual Status GetNext(std::unique_ptr<Split>* split,+                         bool* end_of_splits) = 0;+  // Instructs the SplitGenerator to adjust the size of future splits by the+  // specified percent. 100% means no change, 50% means half-sized splits, and+  // 200% means double-sized splits. The SplitGenerator will make a best effort+  // to incorporate the feedback when creating splits.+  virtual void AdjustSplitSize(int percent) = 0;+};+```++It is tempting to process each split independently, but this would cause issues+when splits are small. tf.data pipelines need to populate internal buffers for+shuffling, prefetching, and batching. If we use a separate pipeline to process+each split, our shuffling will be lower quality, we will have performance jitter+as we keep needing to refill prefetch buffers from scratching, and we will+produce many more partial batches (each split might not even have enough data to+fill a full batch). To avoid these issues, we use a small number of tasks, where+each task processes many splits as a single pipeline.++To enable processing of multiple splits in a dataset, we will add an optional+`SplitProvider` field to the `IteratorContext` passed to+`IteratorBase::Initialize`. The `SplitProvider` produces splits which tell the+iterator what source data to iterate over. For example, if splits are+represented by filenames, and a SplitProvider produces `["file1", "file6",+"file11"]`, an iterator initialized by that `SplitProvider` should process those+three files only.++```cpp+class SplitProvider {+ public:+  virtual Status GetNext(std::unique_ptr<Split>* split,+                         bool* end_of_splits) = 0;+};+```++When processing datasets, tf.data service workers will use `SplitProvider`s+which provide splits by querying the tf.data service master for which splits to+process. A few splits will be prefetched to hide the latency of needing to+request a new split from the master.++#### Supported Datasets++Not all dataset sources and transformations are easily splittable. For example,+`take`, `skip`, and `scan` require a global view of the dataset to produce+correct results. Datasets which require multiple input datasets such as `zip`+are also difficult to support, since we don't have a good way of aligning the+splits of multiple input datasets. Users who rely on these unsupported datasets+will need to move those datasets to come after the distributed part of their+pipeline.++Initially, we will support splitting for the following dataset sources and+transformations:++*   `batch`, `CsvDataset`, `dense_to_sparse_batch`, `filter`,+    `FixedLengthRecordDataset`, `flat_map`, `from_tensor_slices`,+    `group_by_window`, `ignore_errors`, `interleave`, `list_files`, `map`,+    `range`, `repeat`, `padded_batch`, `prefetch`, `shuffle`, `SSTableDataset`,+    `TextLineDataset`, `TFRecordDataset`, `unbatch`, `window`.++### Master and worker services++This section discusses the design for the master and worker services. These+services are used by the Python API to provide distributed dataset processing,+and these services use the splitting API as a part of their implementation.++#### Master API++The master is responsible for registering datasets, generating and tracking+iteration and worker ids, and generating dataset splits for processing on+workers.++Below is a sketch of the Master API. This API is not public and is subject to+change.++```cpp+/// ---- Methods called by consumers ----++// Registers a dataset and returns an id for the dataset. If the dataset is+// already registered, its dataset id is returned.+int GetOrRegisterDataset(GraphDef dataset);++// Creates and returns `num_consumers` iterator ids which partition the+// specified dataset. This also creates an internal `iteration_id` used to+// track the overall dataset iteration. `num_tasks` defines how many tasks to+// create. If `num_tasks` is -1, it is up to the master to determine how many+// tasks to create.+list<int> CreateIterators(int dataset_id, int num_consumers,+                          int num_tasks);

Will it be better to define num_tasks as the number of tasks per worker (defaults to 1)? This may be easier to set. Otherwise, users need to count how many workers in the cluster.

aaudiber

comment created time in a month

Pull request review commenttensorflow/community

RFC: tf.data Service

+# Distributed tf.data service++| Status        | Proposed          |+| :------------ | :------------------------------------------------------ |+| **RFC #**     | [195](https://github.com/tensorflow/community/pull/195) |+| **Author(s)** | Andrew Audibert (aaudibert@google.com) Rohan Jain (rohanj@google.com) |+| **Sponsor**   | Jiri Simsa (jsimsa@google.com)                          |+| **Updated**   | 2019-01-09                                              |++## Objective++Provide an API and implementation of a tf.data service which can process tf.data+datasets in a distributed manner. The service can be run outside the TensorFlow+cluster or be exported as a gRPC service by TensorFlow servers.++Goals:++-   Enable horizontal scaling of dataset computation to improve performance of+    input-bound dataset pipelines.+-   Improve tf.data integration with the tf.distribute API. In particular,+    support dynamic sharding of data across multiple processes.+-   Provide visitation guarantees for distributed training jobs.++Non-goals:++-   Process non-dataset data.+-   Distribute datasets that rely on external / non-serializable state.+-   Support non-graph computation (e.g. py_function).++## Motivation++### Host machine input pipelines can't always keep up with accelerators.++Some input pipelines require significant resources to produce their data, e.g.+due to image transformations. When the host machine isn't powerful enough to+generate input data at the rate the attached accelerator(s) consume the data,+the accelerator(s) will idle. This slows down training time, and also wastes+valuable accelerator resources. The tf.data service solves this problem by using+N input workers to feed M accelerators. The number of input workers can be+scaled up or down as needed to keep up with the accelerators.++### Distributed training requires a distribution-aware input pipeline.++Today tf.data supports the tf.distribute API by providing mechanisms for+sharding, cloning, and re-batching. The tf.distribute API uses these primitives+to implement their own version of a distributed dataset. If distributed datasets+become a core feature of tf.data, tf.data can provide a public API for+tf.distribute (and users who wish to implement their own distribution) to use+instead. This will also allow us to support feature requests that require+cross-worker coordination, such as dynamic sharding.++## User Benefit++### Input-bound models++Users with input-bound models can leverage the tf.data service to distribute+input processing across horizontally-scaling compute resources. This can improve+utilization for valuable accelerator resources, reducing total cost.++### Dynamic load balancing++Today, the tf.distribute API statically shards data across accelerators. This+can lead to suboptimal utilization because some shards may contain more data+than others. The tf.data service provides a mechanism for dynamically sharding,+reducing the data imbalance across accelerators.++### Visitation guarantees++Model accuracy can often be improved when each training sample is trained on+exactly once per epoch. The tf.data service can coordinate across workers to+provide this guarantee.++## Design Proposal++The tf.data service is a master-worker system which iterates through datasets,+producing outputs to be consumed by accelerators. The service is comprised of a+few components:++*   User-facing Python API for interacting with the tf.data service.+*   Dataset splitting API for determining how to split up datasets for parallel+    processing.+*   Master and worker gRPC services.++### Architecture++The tf.data service is comprised of master and worker gRPC services which could+be run in a couple of different configurations:++#### Glossary++**Master**: The single master coordinating the tf.data service.++**Worker**: A tf.data service worker which performs dataset processing and+provides dataset elements to consumers over RPC.++**Consumer**: A machine which consumes data from the tf.data service. The+consumer may be attached to a GPU or TPU, or use data for on-CPU training.++#### Separate Cluster Architecture++Each server is run on a separate host from the TensorFlow cluster. This+configuration gives users a way to provide horizontally scaling CPU for+processing their input pipelines and quickly feeding data to accelerators.++#### Embedded Cluster Architecture++Each TensorFlow server runs the tf.data worker gRPC service, and one server also+runs the master gRPC service. This lets users leverage the tf.data service+without needing to provision additional compute resources. and gives all the+benefits of the tf.data service except for horizontal scaling.++#### Hybrid Architecture++Users could run tf.data workers embedded in their TensorFlow cluster, and also+run additional tf.data workers (and potentially the tf.data master) outside the+cluster. This allows for horizontal worker scaling, while still leveraging the+compute resources of the TensorFlow cluster for input processing.++### User-facing Python API++This API is how users will interact with the tf.data service from their Python+code.++```python+def tf.data.experimental.service.distribute(address):+  """Marks that a dataset should be processed by the tf.data service.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))++  Args:+    address: The address of the tf.data service master.++  Returns:+    A function that can be passed to `dataset.apply()`.+  """++def tf.data.experimental.service.create_iteration(+    dataset, num_consumers=1, num_tasks=None, deterministic=False):+  """Begins distributed iteration over a dataset.++  It is expected that the dataset contains at least one `.distribute(address)`+  transformation, otherwise this method will print a warning and do nothing.++  `create_iteration` will first register the dataset with the tf.data service+  if it isn't already registered. It will then request the creation of+  `num_consumers` dataset iterators which divide the dataset `num_consumers`+  ways. The returned object can be used to read from one of the+  iterators using+  `tf.data.experimental.service.make_iterator(ds, obj, consumer_index)`.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))+  if consumer_index == 0:+    # The iteration object is a byte array which needs to be shared among all+    # consumers. Here we suppose there are broadcast_send and broadcast_recv+    # method available.+    iteration_id = tf.data.experimental.service.create_iteration(ds, address, 3)+    broadcast_send(iteration_id)+  else:+    iteration_id = broadcast_recv()+  it = tf.data.experimental.service.make_iterator(+      ds, iteration_id, consumer_index)+  for element in it:+    # process element++  Args:+    dataset: The dataset to begin iteration over.+    num_consumers: The number of consumers to divide the dataset between. Set+      this if you require determinism. If None, a single iterator id is returned,+      and any number of consumers can read from that iterator id. The data+      produced by the dataset will be fed to consumers on a first-come+      first-served basis.+    num_tasks: The number of tasks to use for processing. Tasks run for+      the duration of an epoch, and each worker should typically process a single+      task. Normally it is best to leave this as None so that the master can+      choose a reasonable number of tasks. Setting `num_tasks` is useful for+      producing deterministic results.+    deterministic: Whether the iteration should be performed+      deterministically. Fully deterministic output also requires setting+      `num_tasks` to a fixed number, and that the input dataset is itself+      deterministic.++  Returns:+    An iteration_id which can be used to created iterators via+      `tf.data.experimental.service.make_iterator`+  """++def tf.data.experimental.service.make_iterator(+    dataset, iteration, consumer_index):+  """Creates an iterator for reading from the specified dataset.++  Args:+    dataset: The dataset to read from.+    iteration: An iteration_id object generated by+      `tf.data.experimental.service.create_iteration`.+    consumer_index: The consumer index within the iteration to read from. If+      the iteration was created with `n` consumers, `consumers_index` must be+      less than `n`.++  Returns:+    A Python iterator which iterates over the dataset elements.+  """+```++### Dataset splitting API++To parallelize dataset processing, the tf.data service needs a way to split up+datasets. We will achieve this by adding a splitting API that allows source+datasets to express how they can be split.++Our goals for the API are++*   Performance: The splitting API can be used to performantly split and process+    datasets.+*   Extensibility: User-defined datasets can be split as long as they implement+    the splitting API.+*   Minimize Surprises: Users write their datasets as though they will not be+    split, so introducing splitting can easily lead to unexpected outcomes. To+    mitigate this, we will be conservative about which dataset transformations+    support splitting.++The API will be used internally by the tf.data service to distribute datasets.+It will be entirely in C++, and we don't currently have any plans to expose+splitting through Python.++The API focuses on producing and consuming `Split`s. A `Split` is a variant+Tensor that can be subclassed to represent arbitrary types of splitting.++```cpp+class Split {+ public:+  virtual std::string DebugString() const = 0;+  // Methods to support being used as a Variant tensor.+  virtual std::string TypeName() const = 0;+  virtual void Encode(VariantTensorData* data) const = 0;+  virtual bool Decode(const VariantTensorData& data) = 0;+};+```++To iterate over splits for a dataset, we will use a new+`DatasetBase::MakeSplitGenerator()` method. This method creates a+`SplitGenerator`, which is responsible for generating all of the splits for the+dataset. We use an intermediate `SplitGenerator` object instead of generating+splits directly because there could be a large number of splits, and the+`SplitGenerator` gives us as way to tune split size in response to pipeline+performance.++```cpp+class SplitGenerator {+ public:+  virtual Status GetNext(std::unique_ptr<Split>* split,+                         bool* end_of_splits) = 0;+  // Instructs the SplitGenerator to adjust the size of future splits by the+  // specified percent. 100% means no change, 50% means half-sized splits, and+  // 200% means double-sized splits. The SplitGenerator will make a best effort+  // to incorporate the feedback when creating splits.+  virtual void AdjustSplitSize(int percent) = 0;+};+```++It is tempting to process each split independently, but this would cause issues+when splits are small. tf.data pipelines need to populate internal buffers for+shuffling, prefetching, and batching. If we use a separate pipeline to process+each split, our shuffling will be lower quality, we will have performance jitter+as we keep needing to refill prefetch buffers from scratching, and we will+produce many more partial batches (each split might not even have enough data to+fill a full batch). To avoid these issues, we use a small number of tasks, where+each task processes many splits as a single pipeline.++To enable processing of multiple splits in a dataset, we will add an optional+`SplitProvider` field to the `IteratorContext` passed to+`IteratorBase::Initialize`. The `SplitProvider` produces splits which tell the+iterator what source data to iterate over. For example, if splits are+represented by filenames, and a SplitProvider produces `["file1", "file6",+"file11"]`, an iterator initialized by that `SplitProvider` should process those+three files only.++```cpp+class SplitProvider {+ public:+  virtual Status GetNext(std::unique_ptr<Split>* split,+                         bool* end_of_splits) = 0;+};+```++When processing datasets, tf.data service workers will use `SplitProvider`s+which provide splits by querying the tf.data service master for which splits to+process. A few splits will be prefetched to hide the latency of needing to+request a new split from the master.++#### Supported Datasets++Not all dataset sources and transformations are easily splittable. For example,+`take`, `skip`, and `scan` require a global view of the dataset to produce+correct results. Datasets which require multiple input datasets such as `zip`+are also difficult to support, since we don't have a good way of aligning the+splits of multiple input datasets. Users who rely on these unsupported datasets+will need to move those datasets to come after the distributed part of their+pipeline.++Initially, we will support splitting for the following dataset sources and+transformations:++*   `batch`, `CsvDataset`, `dense_to_sparse_batch`, `filter`,+    `FixedLengthRecordDataset`, `flat_map`, `from_tensor_slices`,+    `group_by_window`, `ignore_errors`, `interleave`, `list_files`, `map`,+    `range`, `repeat`, `padded_batch`, `prefetch`, `shuffle`, `SSTableDataset`,+    `TextLineDataset`, `TFRecordDataset`, `unbatch`, `window`.++### Master and worker services++This section discusses the design for the master and worker services. These+services are used by the Python API to provide distributed dataset processing,+and these services use the splitting API as a part of their implementation.++#### Master API++The master is responsible for registering datasets, generating and tracking+iteration and worker ids, and generating dataset splits for processing on+workers.++Below is a sketch of the Master API. This API is not public and is subject to+change.++```cpp+/// ---- Methods called by consumers ----++// Registers a dataset and returns an id for the dataset. If the dataset is+// already registered, its dataset id is returned.+int GetOrRegisterDataset(GraphDef dataset);++// Creates and returns `num_consumers` iterator ids which partition the

IIUC, the Split-related classes decide how to split/partition the dataset. How does num_consumers work here?

aaudiber

comment created time in a month

Pull request review commenttensorflow/community

RFC: tf.data Service

+# Distributed tf.data service++| Status        | Proposed          |+| :------------ | :------------------------------------------------------ |+| **RFC #**     | [195](https://github.com/tensorflow/community/pull/195) |+| **Author(s)** | Andrew Audibert (aaudibert@google.com) Rohan Jain (rohanj@google.com) |+| **Sponsor**   | Jiri Simsa (jsimsa@google.com)                          |+| **Updated**   | 2019-01-09                                              |++## Objective++Provide an API and implementation of a tf.data service which can process tf.data+datasets in a distributed manner. The service can be run outside the TensorFlow+cluster or be exported as a gRPC service by TensorFlow servers.++Goals:++-   Enable horizontal scaling of dataset computation to improve performance of+    input-bound dataset pipelines.+-   Improve tf.data integration with the tf.distribute API. In particular,+    support dynamic sharding of data across multiple processes.+-   Provide visitation guarantees for distributed training jobs.++Non-goals:++-   Process non-dataset data.+-   Distribute datasets that rely on external / non-serializable state.+-   Support non-graph computation (e.g. py_function).++## Motivation++### Host machine input pipelines can't always keep up with accelerators.++Some input pipelines require significant resources to produce their data, e.g.+due to image transformations. When the host machine isn't powerful enough to+generate input data at the rate the attached accelerator(s) consume the data,+the accelerator(s) will idle. This slows down training time, and also wastes+valuable accelerator resources. The tf.data service solves this problem by using+N input workers to feed M accelerators. The number of input workers can be+scaled up or down as needed to keep up with the accelerators.++### Distributed training requires a distribution-aware input pipeline.++Today tf.data supports the tf.distribute API by providing mechanisms for+sharding, cloning, and re-batching. The tf.distribute API uses these primitives+to implement their own version of a distributed dataset. If distributed datasets+become a core feature of tf.data, tf.data can provide a public API for+tf.distribute (and users who wish to implement their own distribution) to use+instead. This will also allow us to support feature requests that require+cross-worker coordination, such as dynamic sharding.++## User Benefit++### Input-bound models++Users with input-bound models can leverage the tf.data service to distribute+input processing across horizontally-scaling compute resources. This can improve+utilization for valuable accelerator resources, reducing total cost.++### Dynamic load balancing++Today, the tf.distribute API statically shards data across accelerators. This+can lead to suboptimal utilization because some shards may contain more data+than others. The tf.data service provides a mechanism for dynamically sharding,+reducing the data imbalance across accelerators.++### Visitation guarantees++Model accuracy can often be improved when each training sample is trained on+exactly once per epoch. The tf.data service can coordinate across workers to+provide this guarantee.++## Design Proposal++The tf.data service is a master-worker system which iterates through datasets,+producing outputs to be consumed by accelerators. The service is comprised of a+few components:++*   User-facing Python API for interacting with the tf.data service.+*   Dataset splitting API for determining how to split up datasets for parallel+    processing.+*   Master and worker gRPC services.++### Architecture++The tf.data service is comprised of master and worker gRPC services which could+be run in a couple of different configurations:++#### Glossary++**Master**: The single master coordinating the tf.data service.++**Worker**: A tf.data service worker which performs dataset processing and+provides dataset elements to consumers over RPC.++**Consumer**: A machine which consumes data from the tf.data service. The+consumer may be attached to a GPU or TPU, or use data for on-CPU training.++#### Separate Cluster Architecture++Each server is run on a separate host from the TensorFlow cluster. This+configuration gives users a way to provide horizontally scaling CPU for+processing their input pipelines and quickly feeding data to accelerators.++#### Embedded Cluster Architecture++Each TensorFlow server runs the tf.data worker gRPC service, and one server also+runs the master gRPC service. This lets users leverage the tf.data service+without needing to provision additional compute resources. and gives all the+benefits of the tf.data service except for horizontal scaling.++#### Hybrid Architecture++Users could run tf.data workers embedded in their TensorFlow cluster, and also+run additional tf.data workers (and potentially the tf.data master) outside the+cluster. This allows for horizontal worker scaling, while still leveraging the+compute resources of the TensorFlow cluster for input processing.++### User-facing Python API++This API is how users will interact with the tf.data service from their Python+code.++```python+def tf.data.experimental.service.distribute(address):+  """Marks that a dataset should be processed by the tf.data service.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))++  Args:+    address: The address of the tf.data service master.++  Returns:+    A function that can be passed to `dataset.apply()`.+  """++def tf.data.experimental.service.create_iteration(+    dataset, num_consumers=1, num_tasks=None, deterministic=False):+  """Begins distributed iteration over a dataset.++  It is expected that the dataset contains at least one `.distribute(address)`+  transformation, otherwise this method will print a warning and do nothing.++  `create_iteration` will first register the dataset with the tf.data service+  if it isn't already registered. It will then request the creation of+  `num_consumers` dataset iterators which divide the dataset `num_consumers`+  ways. The returned object can be used to read from one of the+  iterators using+  `tf.data.experimental.service.make_iterator(ds, obj, consumer_index)`.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))+  if consumer_index == 0:+    # The iteration object is a byte array which needs to be shared among all+    # consumers. Here we suppose there are broadcast_send and broadcast_recv+    # method available.+    iteration_id = tf.data.experimental.service.create_iteration(ds, address, 3)+    broadcast_send(iteration_id)+  else:+    iteration_id = broadcast_recv()+  it = tf.data.experimental.service.make_iterator(+      ds, iteration_id, consumer_index)+  for element in it:+    # process element++  Args:+    dataset: The dataset to begin iteration over.+    num_consumers: The number of consumers to divide the dataset between. Set+      this if you require determinism. If None, a single iterator id is returned,+      and any number of consumers can read from that iterator id. The data+      produced by the dataset will be fed to consumers on a first-come+      first-served basis.+    num_tasks: The number of tasks to use for processing. Tasks run for+      the duration of an epoch, and each worker should typically process a single+      task. Normally it is best to leave this as None so that the master can+      choose a reasonable number of tasks. Setting `num_tasks` is useful for+      producing deterministic results.+    deterministic: Whether the iteration should be performed+      deterministically. Fully deterministic output also requires setting+      `num_tasks` to a fixed number, and that the input dataset is itself+      deterministic.++  Returns:+    An iteration_id which can be used to created iterators via+      `tf.data.experimental.service.make_iterator`+  """++def tf.data.experimental.service.make_iterator(+    dataset, iteration, consumer_index):+  """Creates an iterator for reading from the specified dataset.++  Args:+    dataset: The dataset to read from.+    iteration: An iteration_id object generated by+      `tf.data.experimental.service.create_iteration`.+    consumer_index: The consumer index within the iteration to read from. If+      the iteration was created with `n` consumers, `consumers_index` must be+      less than `n`.++  Returns:+    A Python iterator which iterates over the dataset elements.+  """+```++### Dataset splitting API++To parallelize dataset processing, the tf.data service needs a way to split up+datasets. We will achieve this by adding a splitting API that allows source+datasets to express how they can be split.++Our goals for the API are++*   Performance: The splitting API can be used to performantly split and process

Do we need to consider the data locality of the splits?

aaudiber

comment created time in a month

Pull request review commenttensorflow/community

RFC: tf.data Service

+# Distributed tf.data service++| Status        | Proposed          |+| :------------ | :------------------------------------------------------ |+| **RFC #**     | [195](https://github.com/tensorflow/community/pull/195) |+| **Author(s)** | Andrew Audibert (aaudibert@google.com) Rohan Jain (rohanj@google.com) |+| **Sponsor**   | Jiri Simsa (jsimsa@google.com)                          |+| **Updated**   | 2019-01-09                                              |++## Objective++Provide an API and implementation of a tf.data service which can process tf.data+datasets in a distributed manner. The service can be run outside the TensorFlow+cluster or be exported as a gRPC service by TensorFlow servers.++Goals:++-   Enable horizontal scaling of dataset computation to improve performance of+    input-bound dataset pipelines.+-   Improve tf.data integration with the tf.distribute API. In particular,+    support dynamic sharding of data across multiple processes.+-   Provide visitation guarantees for distributed training jobs.++Non-goals:++-   Process non-dataset data.+-   Distribute datasets that rely on external / non-serializable state.+-   Support non-graph computation (e.g. py_function).++## Motivation++### Host machine input pipelines can't always keep up with accelerators.++Some input pipelines require significant resources to produce their data, e.g.+due to image transformations. When the host machine isn't powerful enough to+generate input data at the rate the attached accelerator(s) consume the data,+the accelerator(s) will idle. This slows down training time, and also wastes+valuable accelerator resources. The tf.data service solves this problem by using+N input workers to feed M accelerators. The number of input workers can be+scaled up or down as needed to keep up with the accelerators.++### Distributed training requires a distribution-aware input pipeline.++Today tf.data supports the tf.distribute API by providing mechanisms for+sharding, cloning, and re-batching. The tf.distribute API uses these primitives+to implement their own version of a distributed dataset. If distributed datasets+become a core feature of tf.data, tf.data can provide a public API for+tf.distribute (and users who wish to implement their own distribution) to use+instead. This will also allow us to support feature requests that require+cross-worker coordination, such as dynamic sharding.++## User Benefit++### Input-bound models++Users with input-bound models can leverage the tf.data service to distribute+input processing across horizontally-scaling compute resources. This can improve+utilization for valuable accelerator resources, reducing total cost.++### Dynamic load balancing++Today, the tf.distribute API statically shards data across accelerators. This+can lead to suboptimal utilization because some shards may contain more data+than others. The tf.data service provides a mechanism for dynamically sharding,+reducing the data imbalance across accelerators.++### Visitation guarantees++Model accuracy can often be improved when each training sample is trained on+exactly once per epoch. The tf.data service can coordinate across workers to+provide this guarantee.++## Design Proposal++The tf.data service is a master-worker system which iterates through datasets,+producing outputs to be consumed by accelerators. The service is comprised of a+few components:++*   User-facing Python API for interacting with the tf.data service.+*   Dataset splitting API for determining how to split up datasets for parallel+    processing.+*   Master and worker gRPC services.++### Architecture++The tf.data service is comprised of master and worker gRPC services which could+be run in a couple of different configurations:++#### Glossary++**Master**: The single master coordinating the tf.data service.++**Worker**: A tf.data service worker which performs dataset processing and+provides dataset elements to consumers over RPC.++**Consumer**: A machine which consumes data from the tf.data service. The+consumer may be attached to a GPU or TPU, or use data for on-CPU training.++#### Separate Cluster Architecture++Each server is run on a separate host from the TensorFlow cluster. This+configuration gives users a way to provide horizontally scaling CPU for+processing their input pipelines and quickly feeding data to accelerators.++#### Embedded Cluster Architecture++Each TensorFlow server runs the tf.data worker gRPC service, and one server also+runs the master gRPC service. This lets users leverage the tf.data service+without needing to provision additional compute resources. and gives all the+benefits of the tf.data service except for horizontal scaling.++#### Hybrid Architecture++Users could run tf.data workers embedded in their TensorFlow cluster, and also+run additional tf.data workers (and potentially the tf.data master) outside the+cluster. This allows for horizontal worker scaling, while still leveraging the+compute resources of the TensorFlow cluster for input processing.++### User-facing Python API++This API is how users will interact with the tf.data service from their Python+code.++```python+def tf.data.experimental.service.distribute(address):+  """Marks that a dataset should be processed by the tf.data service.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))++  Args:+    address: The address of the tf.data service master.++  Returns:+    A function that can be passed to `dataset.apply()`.+  """++def tf.data.experimental.service.create_iteration(+    dataset, num_consumers=1, num_tasks=None, deterministic=False):+  """Begins distributed iteration over a dataset.++  It is expected that the dataset contains at least one `.distribute(address)`+  transformation, otherwise this method will print a warning and do nothing.++  `create_iteration` will first register the dataset with the tf.data service+  if it isn't already registered. It will then request the creation of+  `num_consumers` dataset iterators which divide the dataset `num_consumers`+  ways. The returned object can be used to read from one of the+  iterators using+  `tf.data.experimental.service.make_iterator(ds, obj, consumer_index)`.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))+  if consumer_index == 0:+    # The iteration object is a byte array which needs to be shared among all+    # consumers. Here we suppose there are broadcast_send and broadcast_recv+    # method available.+    iteration_id = tf.data.experimental.service.create_iteration(ds, address, 3)+    broadcast_send(iteration_id)+  else:+    iteration_id = broadcast_recv()+  it = tf.data.experimental.service.make_iterator(+      ds, iteration_id, consumer_index)+  for element in it:+    # process element++  Args:+    dataset: The dataset to begin iteration over.+    num_consumers: The number of consumers to divide the dataset between. Set+      this if you require determinism. If None, a single iterator id is returned,+      and any number of consumers can read from that iterator id. The data+      produced by the dataset will be fed to consumers on a first-come+      first-served basis.+    num_tasks: The number of tasks to use for processing. Tasks run for+      the duration of an epoch, and each worker should typically process a single+      task. Normally it is best to leave this as None so that the master can+      choose a reasonable number of tasks. Setting `num_tasks` is useful for+      producing deterministic results.+    deterministic: Whether the iteration should be performed+      deterministically. Fully deterministic output also requires setting+      `num_tasks` to a fixed number, and that the input dataset is itself+      deterministic.++  Returns:+    An iteration_id which can be used to created iterators via+      `tf.data.experimental.service.make_iterator`+  """++def tf.data.experimental.service.make_iterator(+    dataset, iteration, consumer_index):+  """Creates an iterator for reading from the specified dataset.++  Args:+    dataset: The dataset to read from.+    iteration: An iteration_id object generated by+      `tf.data.experimental.service.create_iteration`.+    consumer_index: The consumer index within the iteration to read from. If

Could you please expand this a bit about how dataset, iteration, split, iterator and consumer_index work with each other?

aaudiber

comment created time in a month

Pull request review commenttensorflow/community

RFC: tf.data Service

+# Distributed tf.data service++| Status        | Proposed          |+| :------------ | :------------------------------------------------------ |+| **RFC #**     | [195](https://github.com/tensorflow/community/pull/195) |+| **Author(s)** | Andrew Audibert (aaudibert@google.com) Rohan Jain (rohanj@google.com) |+| **Sponsor**   | Jiri Simsa (jsimsa@google.com)                          |+| **Updated**   | 2019-01-09                                              |++## Objective++Provide an API and implementation of a tf.data service which can process tf.data+datasets in a distributed manner. The service can be run outside the TensorFlow+cluster or be exported as a gRPC service by TensorFlow servers.++Goals:++-   Enable horizontal scaling of dataset computation to improve performance of+    input-bound dataset pipelines.+-   Improve tf.data integration with the tf.distribute API. In particular,+    support dynamic sharding of data across multiple processes.+-   Provide visitation guarantees for distributed training jobs.++Non-goals:++-   Process non-dataset data.+-   Distribute datasets that rely on external / non-serializable state.+-   Support non-graph computation (e.g. py_function).++## Motivation++### Host machine input pipelines can't always keep up with accelerators.++Some input pipelines require significant resources to produce their data, e.g.+due to image transformations. When the host machine isn't powerful enough to+generate input data at the rate the attached accelerator(s) consume the data,+the accelerator(s) will idle. This slows down training time, and also wastes+valuable accelerator resources. The tf.data service solves this problem by using+N input workers to feed M accelerators. The number of input workers can be+scaled up or down as needed to keep up with the accelerators.++### Distributed training requires a distribution-aware input pipeline.++Today tf.data supports the tf.distribute API by providing mechanisms for+sharding, cloning, and re-batching. The tf.distribute API uses these primitives+to implement their own version of a distributed dataset. If distributed datasets+become a core feature of tf.data, tf.data can provide a public API for+tf.distribute (and users who wish to implement their own distribution) to use+instead. This will also allow us to support feature requests that require+cross-worker coordination, such as dynamic sharding.++## User Benefit++### Input-bound models++Users with input-bound models can leverage the tf.data service to distribute+input processing across horizontally-scaling compute resources. This can improve+utilization for valuable accelerator resources, reducing total cost.++### Dynamic load balancing++Today, the tf.distribute API statically shards data across accelerators. This+can lead to suboptimal utilization because some shards may contain more data+than others. The tf.data service provides a mechanism for dynamically sharding,+reducing the data imbalance across accelerators.++### Visitation guarantees++Model accuracy can often be improved when each training sample is trained on+exactly once per epoch. The tf.data service can coordinate across workers to+provide this guarantee.++## Design Proposal++The tf.data service is a master-worker system which iterates through datasets,+producing outputs to be consumed by accelerators. The service is comprised of a+few components:++*   User-facing Python API for interacting with the tf.data service.+*   Dataset splitting API for determining how to split up datasets for parallel+    processing.+*   Master and worker gRPC services.++### Architecture++The tf.data service is comprised of master and worker gRPC services which could+be run in a couple of different configurations:++#### Glossary++**Master**: The single master coordinating the tf.data service.++**Worker**: A tf.data service worker which performs dataset processing and+provides dataset elements to consumers over RPC.++**Consumer**: A machine which consumes data from the tf.data service. The+consumer may be attached to a GPU or TPU, or use data for on-CPU training.++#### Separate Cluster Architecture++Each server is run on a separate host from the TensorFlow cluster. This+configuration gives users a way to provide horizontally scaling CPU for+processing their input pipelines and quickly feeding data to accelerators.++#### Embedded Cluster Architecture++Each TensorFlow server runs the tf.data worker gRPC service, and one server also+runs the master gRPC service. This lets users leverage the tf.data service+without needing to provision additional compute resources. and gives all the+benefits of the tf.data service except for horizontal scaling.++#### Hybrid Architecture++Users could run tf.data workers embedded in their TensorFlow cluster, and also+run additional tf.data workers (and potentially the tf.data master) outside the+cluster. This allows for horizontal worker scaling, while still leveraging the+compute resources of the TensorFlow cluster for input processing.++### User-facing Python API++This API is how users will interact with the tf.data service from their Python+code.++```python+def tf.data.experimental.service.distribute(address):+  """Marks that a dataset should be processed by the tf.data service.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))++  Args:+    address: The address of the tf.data service master.++  Returns:+    A function that can be passed to `dataset.apply()`.+  """++def tf.data.experimental.service.create_iteration(+    dataset, num_consumers=1, num_tasks=None, deterministic=False):+  """Begins distributed iteration over a dataset.++  It is expected that the dataset contains at least one `.distribute(address)`+  transformation, otherwise this method will print a warning and do nothing.++  `create_iteration` will first register the dataset with the tf.data service+  if it isn't already registered. It will then request the creation of+  `num_consumers` dataset iterators which divide the dataset `num_consumers`+  ways. The returned object can be used to read from one of the+  iterators using+  `tf.data.experimental.service.make_iterator(ds, obj, consumer_index)`.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))+  if consumer_index == 0:+    # The iteration object is a byte array which needs to be shared among all+    # consumers. Here we suppose there are broadcast_send and broadcast_recv+    # method available.+    iteration_id = tf.data.experimental.service.create_iteration(ds, address, 3)+    broadcast_send(iteration_id)+  else:+    iteration_id = broadcast_recv()+  it = tf.data.experimental.service.make_iterator(+      ds, iteration_id, consumer_index)+  for element in it:+    # process element++  Args:+    dataset: The dataset to begin iteration over.+    num_consumers: The number of consumers to divide the dataset between. Set+      this if you require determinism. If None, a single iterator id is returned,+      and any number of consumers can read from that iterator id. The data+      produced by the dataset will be fed to consumers on a first-come+      first-served basis.+    num_tasks: The number of tasks to use for processing. Tasks run for+      the duration of an epoch, and each worker should typically process a single+      task. Normally it is best to leave this as None so that the master can+      choose a reasonable number of tasks. Setting `num_tasks` is useful for

How does the master choose the best number of tasks? Do we need a resource configuration file (e.g. CPU and memory) for the master, workers, and consumers?

aaudiber

comment created time in a month

Pull request review commenttensorflow/community

RFC: tf.data Service

+# Distributed tf.data service++| Status        | Proposed          |+| :------------ | :------------------------------------------------------ |+| **RFC #**     | [195](https://github.com/tensorflow/community/pull/195) |+| **Author(s)** | Andrew Audibert (aaudibert@google.com) Rohan Jain (rohanj@google.com) |+| **Sponsor**   | Jiri Simsa (jsimsa@google.com)                          |+| **Updated**   | 2019-01-09                                              |++## Objective++Provide an API and implementation of a tf.data service which can process tf.data+datasets in a distributed manner. The service can be run outside the TensorFlow+cluster or be exported as a gRPC service by TensorFlow servers.++Goals:++-   Enable horizontal scaling of dataset computation to improve performance of+    input-bound dataset pipelines.+-   Improve tf.data integration with the tf.distribute API. In particular,+    support dynamic sharding of data across multiple processes.+-   Provide visitation guarantees for distributed training jobs.++Non-goals:++-   Process non-dataset data.+-   Distribute datasets that rely on external / non-serializable state.+-   Support non-graph computation (e.g. py_function).++## Motivation++### Host machine input pipelines can't always keep up with accelerators.++Some input pipelines require significant resources to produce their data, e.g.+due to image transformations. When the host machine isn't powerful enough to+generate input data at the rate the attached accelerator(s) consume the data,+the accelerator(s) will idle. This slows down training time, and also wastes+valuable accelerator resources. The tf.data service solves this problem by using+N input workers to feed M accelerators. The number of input workers can be+scaled up or down as needed to keep up with the accelerators.++### Distributed training requires a distribution-aware input pipeline.++Today tf.data supports the tf.distribute API by providing mechanisms for+sharding, cloning, and re-batching. The tf.distribute API uses these primitives+to implement their own version of a distributed dataset. If distributed datasets+become a core feature of tf.data, tf.data can provide a public API for+tf.distribute (and users who wish to implement their own distribution) to use+instead. This will also allow us to support feature requests that require+cross-worker coordination, such as dynamic sharding.++## User Benefit++### Input-bound models++Users with input-bound models can leverage the tf.data service to distribute+input processing across horizontally-scaling compute resources. This can improve+utilization for valuable accelerator resources, reducing total cost.++### Dynamic load balancing++Today, the tf.distribute API statically shards data across accelerators. This+can lead to suboptimal utilization because some shards may contain more data+than others. The tf.data service provides a mechanism for dynamically sharding,+reducing the data imbalance across accelerators.++### Visitation guarantees++Model accuracy can often be improved when each training sample is trained on+exactly once per epoch. The tf.data service can coordinate across workers to+provide this guarantee.++## Design Proposal++The tf.data service is a master-worker system which iterates through datasets,+producing outputs to be consumed by accelerators. The service is comprised of a+few components:++*   User-facing Python API for interacting with the tf.data service.+*   Dataset splitting API for determining how to split up datasets for parallel+    processing.+*   Master and worker gRPC services.++### Architecture++The tf.data service is comprised of master and worker gRPC services which could+be run in a couple of different configurations:++#### Glossary++**Master**: The single master coordinating the tf.data service.++**Worker**: A tf.data service worker which performs dataset processing and+provides dataset elements to consumers over RPC.++**Consumer**: A machine which consumes data from the tf.data service. The+consumer may be attached to a GPU or TPU, or use data for on-CPU training.++#### Separate Cluster Architecture++Each server is run on a separate host from the TensorFlow cluster. This+configuration gives users a way to provide horizontally scaling CPU for+processing their input pipelines and quickly feeding data to accelerators.++#### Embedded Cluster Architecture++Each TensorFlow server runs the tf.data worker gRPC service, and one server also+runs the master gRPC service. This lets users leverage the tf.data service+without needing to provision additional compute resources. and gives all the+benefits of the tf.data service except for horizontal scaling.++#### Hybrid Architecture++Users could run tf.data workers embedded in their TensorFlow cluster, and also+run additional tf.data workers (and potentially the tf.data master) outside the+cluster. This allows for horizontal worker scaling, while still leveraging the+compute resources of the TensorFlow cluster for input processing.++### User-facing Python API++This API is how users will interact with the tf.data service from their Python+code.++```python+def tf.data.experimental.service.distribute(address):+  """Marks that a dataset should be processed by the tf.data service.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))++  Args:+    address: The address of the tf.data service master.++  Returns:+    A function that can be passed to `dataset.apply()`.+  """++def tf.data.experimental.service.create_iteration(+    dataset, num_consumers=1, num_tasks=None, deterministic=False):+  """Begins distributed iteration over a dataset.++  It is expected that the dataset contains at least one `.distribute(address)`+  transformation, otherwise this method will print a warning and do nothing.++  `create_iteration` will first register the dataset with the tf.data service+  if it isn't already registered. It will then request the creation of+  `num_consumers` dataset iterators which divide the dataset `num_consumers`+  ways. The returned object can be used to read from one of the+  iterators using+  `tf.data.experimental.service.make_iterator(ds, obj, consumer_index)`.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))+  if consumer_index == 0:+    # The iteration object is a byte array which needs to be shared among all+    # consumers. Here we suppose there are broadcast_send and broadcast_recv+    # method available.+    iteration_id = tf.data.experimental.service.create_iteration(ds, address, 3)+    broadcast_send(iteration_id)+  else:+    iteration_id = broadcast_recv()+  it = tf.data.experimental.service.make_iterator(+      ds, iteration_id, consumer_index)

Will the consumer have the same consumer_index per epoch? If yes, how does the consumer get different inputs/splits per epoch during the training?

aaudiber

comment created time in a month

Pull request review commenttensorflow/community

RFC: tf.data Service

+# Distributed tf.data service++| Status        | Proposed          |+| :------------ | :------------------------------------------------------ |+| **RFC #**     | [195](https://github.com/tensorflow/community/pull/195) |+| **Author(s)** | Andrew Audibert (aaudibert@google.com) Rohan Jain (rohanj@google.com) |+| **Sponsor**   | Jiri Simsa (jsimsa@google.com)                          |+| **Updated**   | 2019-01-09                                              |++## Objective++Provide an API and implementation of a tf.data service which can process tf.data+datasets in a distributed manner. The service can be run outside the TensorFlow+cluster or be exported as a gRPC service by TensorFlow servers.++Goals:++-   Enable horizontal scaling of dataset computation to improve performance of+    input-bound dataset pipelines.+-   Improve tf.data integration with the tf.distribute API. In particular,+    support dynamic sharding of data across multiple processes.+-   Provide visitation guarantees for distributed training jobs.++Non-goals:++-   Process non-dataset data.+-   Distribute datasets that rely on external / non-serializable state.+-   Support non-graph computation (e.g. py_function).++## Motivation++### Host machine input pipelines can't always keep up with accelerators.++Some input pipelines require significant resources to produce their data, e.g.+due to image transformations. When the host machine isn't powerful enough to+generate input data at the rate the attached accelerator(s) consume the data,+the accelerator(s) will idle. This slows down training time, and also wastes+valuable accelerator resources. The tf.data service solves this problem by using+N input workers to feed M accelerators. The number of input workers can be+scaled up or down as needed to keep up with the accelerators.++### Distributed training requires a distribution-aware input pipeline.++Today tf.data supports the tf.distribute API by providing mechanisms for+sharding, cloning, and re-batching. The tf.distribute API uses these primitives+to implement their own version of a distributed dataset. If distributed datasets+become a core feature of tf.data, tf.data can provide a public API for+tf.distribute (and users who wish to implement their own distribution) to use+instead. This will also allow us to support feature requests that require+cross-worker coordination, such as dynamic sharding.++## User Benefit++### Input-bound models++Users with input-bound models can leverage the tf.data service to distribute+input processing across horizontally-scaling compute resources. This can improve+utilization for valuable accelerator resources, reducing total cost.++### Dynamic load balancing++Today, the tf.distribute API statically shards data across accelerators. This+can lead to suboptimal utilization because some shards may contain more data+than others. The tf.data service provides a mechanism for dynamically sharding,+reducing the data imbalance across accelerators.++### Visitation guarantees++Model accuracy can often be improved when each training sample is trained on+exactly once per epoch. The tf.data service can coordinate across workers to+provide this guarantee.++## Design Proposal++The tf.data service is a master-worker system which iterates through datasets,+producing outputs to be consumed by accelerators. The service is comprised of a+few components:++*   User-facing Python API for interacting with the tf.data service.+*   Dataset splitting API for determining how to split up datasets for parallel+    processing.+*   Master and worker gRPC services.++### Architecture++The tf.data service is comprised of master and worker gRPC services which could+be run in a couple of different configurations:++#### Glossary++**Master**: The single master coordinating the tf.data service.++**Worker**: A tf.data service worker which performs dataset processing and+provides dataset elements to consumers over RPC.++**Consumer**: A machine which consumes data from the tf.data service. The+consumer may be attached to a GPU or TPU, or use data for on-CPU training.++#### Separate Cluster Architecture++Each server is run on a separate host from the TensorFlow cluster. This+configuration gives users a way to provide horizontally scaling CPU for+processing their input pipelines and quickly feeding data to accelerators.++#### Embedded Cluster Architecture++Each TensorFlow server runs the tf.data worker gRPC service, and one server also+runs the master gRPC service. This lets users leverage the tf.data service+without needing to provision additional compute resources. and gives all the+benefits of the tf.data service except for horizontal scaling.++#### Hybrid Architecture++Users could run tf.data workers embedded in their TensorFlow cluster, and also+run additional tf.data workers (and potentially the tf.data master) outside the+cluster. This allows for horizontal worker scaling, while still leveraging the+compute resources of the TensorFlow cluster for input processing.++### User-facing Python API++This API is how users will interact with the tf.data service from their Python+code.++```python+def tf.data.experimental.service.distribute(address):+  """Marks that a dataset should be processed by the tf.data service.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))++  Args:+    address: The address of the tf.data service master.++  Returns:+    A function that can be passed to `dataset.apply()`.+  """++def tf.data.experimental.service.create_iteration(+    dataset, num_consumers=1, num_tasks=None, deterministic=False):+  """Begins distributed iteration over a dataset.++  It is expected that the dataset contains at least one `.distribute(address)`+  transformation, otherwise this method will print a warning and do nothing.++  `create_iteration` will first register the dataset with the tf.data service+  if it isn't already registered. It will then request the creation of+  `num_consumers` dataset iterators which divide the dataset `num_consumers`+  ways. The returned object can be used to read from one of the+  iterators using+  `tf.data.experimental.service.make_iterator(ds, obj, consumer_index)`.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))+  if consumer_index == 0:+    # The iteration object is a byte array which needs to be shared among all+    # consumers. Here we suppose there are broadcast_send and broadcast_recv+    # method available.+    iteration_id = tf.data.experimental.service.create_iteration(ds, address, 3)+    broadcast_send(iteration_id)+  else:+    iteration_id = broadcast_recv()+  it = tf.data.experimental.service.make_iterator(+      ds, iteration_id, consumer_index)+  for element in it:+    # process element++  Args:+    dataset: The dataset to begin iteration over.+    num_consumers: The number of consumers to divide the dataset between. Set+      this if you require determinism. If None, a single iterator id is returned,

Will a single iterator id be changed to a single iteration id? IIUC, create_iteration(..) will always return a single iteration id no matter num_consumers is None or not. num_consumers determines the number of iterators.

aaudiber

comment created time in a month

Pull request review commenttensorflow/community

RFC: tf.data Service

+# Distributed tf.data service++| Status        | Proposed          |+| :------------ | :------------------------------------------------------ |+| **RFC #**     | [195](https://github.com/tensorflow/community/pull/195) |+| **Author(s)** | Andrew Audibert (aaudibert@google.com) Rohan Jain (rohanj@google.com) |+| **Sponsor**   | Jiri Simsa (jsimsa@google.com)                          |+| **Updated**   | 2019-01-09                                              |++## Objective++Provide an API and implementation of a tf.data service which can process tf.data+datasets in a distributed manner. The service can be run outside the TensorFlow+cluster or be exported as a gRPC service by TensorFlow servers.++Goals:++-   Enable horizontal scaling of dataset computation to improve performance of+    input-bound dataset pipelines.+-   Improve tf.data integration with the tf.distribute API. In particular,+    support dynamic sharding of data across multiple processes.+-   Provide visitation guarantees for distributed training jobs.++Non-goals:++-   Process non-dataset data.+-   Distribute datasets that rely on external / non-serializable state.+-   Support non-graph computation (e.g. py_function).++## Motivation++### Host machine input pipelines can't always keep up with accelerators.++Some input pipelines require significant resources to produce their data, e.g.+due to image transformations. When the host machine isn't powerful enough to+generate input data at the rate the attached accelerator(s) consume the data,+the accelerator(s) will idle. This slows down training time, and also wastes+valuable accelerator resources. The tf.data service solves this problem by using+N input workers to feed M accelerators. The number of input workers can be+scaled up or down as needed to keep up with the accelerators.++### Distributed training requires a distribution-aware input pipeline.++Today tf.data supports the tf.distribute API by providing mechanisms for+sharding, cloning, and re-batching. The tf.distribute API uses these primitives+to implement their own version of a distributed dataset. If distributed datasets+become a core feature of tf.data, tf.data can provide a public API for+tf.distribute (and users who wish to implement their own distribution) to use+instead. This will also allow us to support feature requests that require+cross-worker coordination, such as dynamic sharding.++## User Benefit++### Input-bound models++Users with input-bound models can leverage the tf.data service to distribute+input processing across horizontally-scaling compute resources. This can improve+utilization for valuable accelerator resources, reducing total cost.++### Dynamic load balancing++Today, the tf.distribute API statically shards data across accelerators. This+can lead to suboptimal utilization because some shards may contain more data+than others. The tf.data service provides a mechanism for dynamically sharding,+reducing the data imbalance across accelerators.++### Visitation guarantees++Model accuracy can often be improved when each training sample is trained on+exactly once per epoch. The tf.data service can coordinate across workers to+provide this guarantee.++## Design Proposal++The tf.data service is a master-worker system which iterates through datasets,+producing outputs to be consumed by accelerators. The service is comprised of a+few components:++*   User-facing Python API for interacting with the tf.data service.+*   Dataset splitting API for determining how to split up datasets for parallel+    processing.+*   Master and worker gRPC services.++### Architecture++The tf.data service is comprised of master and worker gRPC services which could+be run in a couple of different configurations:++#### Glossary++**Master**: The single master coordinating the tf.data service.++**Worker**: A tf.data service worker which performs dataset processing and+provides dataset elements to consumers over RPC.++**Consumer**: A machine which consumes data from the tf.data service. The+consumer may be attached to a GPU or TPU, or use data for on-CPU training.++#### Separate Cluster Architecture++Each server is run on a separate host from the TensorFlow cluster. This+configuration gives users a way to provide horizontally scaling CPU for+processing their input pipelines and quickly feeding data to accelerators.++#### Embedded Cluster Architecture++Each TensorFlow server runs the tf.data worker gRPC service, and one server also+runs the master gRPC service. This lets users leverage the tf.data service+without needing to provision additional compute resources. and gives all the+benefits of the tf.data service except for horizontal scaling.++#### Hybrid Architecture++Users could run tf.data workers embedded in their TensorFlow cluster, and also+run additional tf.data workers (and potentially the tf.data master) outside the+cluster. This allows for horizontal worker scaling, while still leveraging the+compute resources of the TensorFlow cluster for input processing.++### User-facing Python API++This API is how users will interact with the tf.data service from their Python+code.++```python+def tf.data.experimental.service.distribute(address):+  """Marks that a dataset should be processed by the tf.data service.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))++  Args:+    address: The address of the tf.data service master.++  Returns:+    A function that can be passed to `dataset.apply()`.+  """++def tf.data.experimental.service.create_iteration(+    dataset, num_consumers=1, num_tasks=None, deterministic=False):+  """Begins distributed iteration over a dataset.++  It is expected that the dataset contains at least one `.distribute(address)`+  transformation, otherwise this method will print a warning and do nothing.++  `create_iteration` will first register the dataset with the tf.data service+  if it isn't already registered. It will then request the creation of+  `num_consumers` dataset iterators which divide the dataset `num_consumers`+  ways. The returned object can be used to read from one of the+  iterators using+  `tf.data.experimental.service.make_iterator(ds, obj, consumer_index)`.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))+  if consumer_index == 0:+    # The iteration object is a byte array which needs to be shared among all+    # consumers. Here we suppose there are broadcast_send and broadcast_recv+    # method available.+    iteration_id = tf.data.experimental.service.create_iteration(ds, address, 3)

It looks like the inputs (ds, address, 3) here do not match the definition of the function create_iteration( dataset, num_consumers=1, num_tasks=None, deterministic=False).

aaudiber

comment created time in a month

Pull request review commenttensorflow/community

RFC: tf.data Service

+# Distributed tf.data service++| Status        | Proposed          |+| :------------ | :------------------------------------------------------ |+| **RFC #**     | [195](https://github.com/tensorflow/community/pull/195) |+| **Author(s)** | Andrew Audibert (aaudibert@google.com) Rohan Jain (rohanj@google.com) |+| **Sponsor**   | Jiri Simsa (jsimsa@google.com)                          |+| **Updated**   | 2019-01-09                                              |++## Objective++Provide an API and implementation of a tf.data service which can process tf.data+datasets in a distributed manner. The service can be run outside the TensorFlow+cluster or be exported as a gRPC service by TensorFlow servers.++Goals:++-   Enable horizontal scaling of dataset computation to improve performance of+    input-bound dataset pipelines.+-   Improve tf.data integration with the tf.distribute API. In particular,+    support dynamic sharding of data across multiple processes.+-   Provide visitation guarantees for distributed training jobs.++Non-goals:++-   Process non-dataset data.+-   Distribute datasets that rely on external / non-serializable state.+-   Support non-graph computation (e.g. py_function).++## Motivation++### Host machine input pipelines can't always keep up with accelerators.++Some input pipelines require significant resources to produce their data, e.g.+due to image transformations. When the host machine isn't powerful enough to+generate input data at the rate the attached accelerator(s) consume the data,+the accelerator(s) will idle. This slows down training time, and also wastes+valuable accelerator resources. The tf.data service solves this problem by using+N input workers to feed M accelerators. The number of input workers can be+scaled up or down as needed to keep up with the accelerators.++### Distributed training requires a distribution-aware input pipeline.++Today tf.data supports the tf.distribute API by providing mechanisms for+sharding, cloning, and re-batching. The tf.distribute API uses these primitives+to implement their own version of a distributed dataset. If distributed datasets+become a core feature of tf.data, tf.data can provide a public API for+tf.distribute (and users who wish to implement their own distribution) to use+instead. This will also allow us to support feature requests that require+cross-worker coordination, such as dynamic sharding.++## User Benefit++### Input-bound models++Users with input-bound models can leverage the tf.data service to distribute+input processing across horizontally-scaling compute resources. This can improve+utilization for valuable accelerator resources, reducing total cost.++### Dynamic load balancing++Today, the tf.distribute API statically shards data across accelerators. This+can lead to suboptimal utilization because some shards may contain more data+than others. The tf.data service provides a mechanism for dynamically sharding,+reducing the data imbalance across accelerators.++### Visitation guarantees++Model accuracy can often be improved when each training sample is trained on+exactly once per epoch. The tf.data service can coordinate across workers to+provide this guarantee.++## Design Proposal++The tf.data service is a master-worker system which iterates through datasets,+producing outputs to be consumed by accelerators. The service is comprised of a+few components:++*   User-facing Python API for interacting with the tf.data service.+*   Dataset splitting API for determining how to split up datasets for parallel+    processing.+*   Master and worker gRPC services.++### Architecture++The tf.data service is comprised of master and worker gRPC services which could+be run in a couple of different configurations:++#### Glossary++**Master**: The single master coordinating the tf.data service.++**Worker**: A tf.data service worker which performs dataset processing and+provides dataset elements to consumers over RPC.++**Consumer**: A machine which consumes data from the tf.data service. The+consumer may be attached to a GPU or TPU, or use data for on-CPU training.++#### Separate Cluster Architecture++Each server is run on a separate host from the TensorFlow cluster. This+configuration gives users a way to provide horizontally scaling CPU for+processing their input pipelines and quickly feeding data to accelerators.++#### Embedded Cluster Architecture++Each TensorFlow server runs the tf.data worker gRPC service, and one server also+runs the master gRPC service. This lets users leverage the tf.data service+without needing to provision additional compute resources. and gives all the+benefits of the tf.data service except for horizontal scaling.++#### Hybrid Architecture++Users could run tf.data workers embedded in their TensorFlow cluster, and also+run additional tf.data workers (and potentially the tf.data master) outside the+cluster. This allows for horizontal worker scaling, while still leveraging the+compute resources of the TensorFlow cluster for input processing.++### User-facing Python API++This API is how users will interact with the tf.data service from their Python+code.++```python+def tf.data.experimental.service.distribute(address):+  """Marks that a dataset should be processed by the tf.data service.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))++  Args:+    address: The address of the tf.data service master.++  Returns:+    A function that can be passed to `dataset.apply()`.+  """++def tf.data.experimental.service.create_iteration(+    dataset, num_consumers=1, num_tasks=None, deterministic=False):+  """Begins distributed iteration over a dataset.++  It is expected that the dataset contains at least one `.distribute(address)`+  transformation, otherwise this method will print a warning and do nothing.++  `create_iteration` will first register the dataset with the tf.data service+  if it isn't already registered. It will then request the creation of+  `num_consumers` dataset iterators which divide the dataset `num_consumers`+  ways. The returned object can be used to read from one of the+  iterators using+  `tf.data.experimental.service.make_iterator(ds, obj, consumer_index)`.++  ds = ... # dataset to distribute+  ds = ds.apply(tf.data.experimental.service.distribute(address))+  if consumer_index == 0:

Does consumer_index == 0 mean the code is running on the master node and then broadcast the dataset iterators to all consumers? Or any server can broadcast the dataset iterators?

Will it be better that we add some configurations about which node (hostname/ip address) is the master, which nodes are workers, and which nodes are consumers.

aaudiber

comment created time in a month

Pull request review commenttensorflow/community

RFC: tf.data Service

+# Distributed tf.data service++| Status        | Proposed          |+| :------------ | :------------------------------------------------------ |+| **RFC #**     | [195](https://github.com/tensorflow/community/pull/195) |+| **Author(s)** | Andrew Audibert (aaudibert@google.com) Rohan Jain (rohanj@google.com) |+| **Sponsor**   | Jiri Simsa (jsimsa@google.com)                          |+| **Updated**   | 2019-01-09                                              |++## Objective++Provide an API and implementation of a tf.data service which can process tf.data+datasets in a distributed manner. The service can be run outside the TensorFlow+cluster or be exported as a gRPC service by TensorFlow servers.++Goals:++-   Enable horizontal scaling of dataset computation to improve performance of+    input-bound dataset pipelines.+-   Improve tf.data integration with the tf.distribute API. In particular,+    support dynamic sharding of data across multiple processes.+-   Provide visitation guarantees for distributed training jobs.++Non-goals:++-   Process non-dataset data.+-   Distribute datasets that rely on external / non-serializable state.+-   Support non-graph computation (e.g. py_function).++## Motivation++### Host machine input pipelines can't always keep up with accelerators.++Some input pipelines require significant resources to produce their data, e.g.+due to image transformations. When the host machine isn't powerful enough to+generate input data at the rate the attached accelerator(s) consume the data,+the accelerator(s) will idle. This slows down training time, and also wastes+valuable accelerator resources. The tf.data service solves this problem by using+N input workers to feed M accelerators. The number of input workers can be

How to schedule which workers are fed into which accelerators (e.g. when N is not equal to M) in the three different architectures(Separate, Embedded, Hybrid)?

aaudiber

comment created time in a month

issue commenttensorflow/tensorflow

Linking error when building Tensorflow 2.1

I met a similar problem on CentOS 7 with Bazel 1.2.1 and GCC 8.3.1. Here is the error message:

ERROR: /home/xxx/.cache/bazel/_bazel_xxx/26d8964cd405c0a736925fad9b93a973/external/swig/BUILD.bazel:5:1: Linking of rule '@swig//:swig' failed (Exit 1)
bazel-out/host/bin/external/swig/_objs/swig/allocate.o:allocate.cxx:function Allocate::~Allocate(): error: undefined reference to 'operator delete(void*, unsigned long)'
bazel-out/host/bin/external/swig/_objs/swig/contract.o:contract.cxx:function Contracts::~Contracts(): error: undefined reference to 'operator delete(void*, unsigned long)'
bazel-out/host/bin/external/swig/_objs/swig/lang.o:lang.cxx:function Language::~Language(): error: undefined reference to 'operator delete(void*, unsigned long)'
bazel-out/host/bin/external/swig/_objs/swig/module.o:module.cxx:function Swig_register_module(char const*, Language* (*)()) [clone .cold.0]: error: undefined reference to 'operator delete(void*, unsigned long)'
DnPlas

comment created time in a month

PR opened tensorflow/tensorflow

Fix the comparison of integer expressions of different signedness

This PR fixes a warning when building TF from source:

tensorflow/core/platform/numbers.cc:65:21: warning: comparison of integer expressions of different signedness: 'int' and 'std::basic_string<char>::size_type' {aka 'long unsigned int'} [-Wsign-compare]
+1 -1

0 comment

1 changed file

pr created time in a month

create barnchfeihugis/tensorflow

branch : int_size_t_comparison

created branch time in a month

push eventIBM/MAX-Question-Answering

Brendan Dwyer

commit sha b76d5c6aad2c73937f41dca45eac627ae027a8b9

Use CDN (#29)

view details

push time in 2 months

PR merged IBM/MAX-Question-Answering

Use CDN
+1 -1

0 comment

1 changed file

bdwyer2

pr closed time in 2 months

push eventIBM/MAX-Inception-ResNet-v2

Brendan Dwyer

commit sha 4090b4788d74b359eba2b5121e07083649d98c7a

Use CDN (#39)

view details

push time in 2 months

PR merged IBM/MAX-Inception-ResNet-v2

Use CDN
+1 -1

0 comment

1 changed file

bdwyer2

pr closed time in 2 months

startedDoctorWkt/acwj

started time in 2 months

PR opened tensorflow/tensorflow

Refactor ParallelInterleaveTest to be parameterized

This PR refactors ParallelInterleaveTest to be parameterized.

+81 -142

0 comment

1 changed file

pr created time in 2 months

push eventfeihugis/tensorflow

Yanan Cao

commit sha b9d33cd9febc9519f7b3dd41389865c57d9037b8

Add legalize TF control flow pass to lowering pipeline PiperOrigin-RevId: 285271143 Change-Id: I02420f7fa996abb9ad8fa26d930324b9a0894730

view details

Scott Main

commit sha 17107c60ce3f3d0ac5ea88834619f87eb12c1273

Fix the broken mnist.py location and enforce use of TF 1.x. PiperOrigin-RevId: 285274547 Change-Id: I841324f257add6972d08bf7adc2cde5b64d96944

view details

Yuanzhong Xu

commit sha 2b70f980ca540bf2be55183b6b35fcbbd64a3be6

[MLIR:TF/XLA] Side-effect-analysis: improve test coverage and minor changes. PiperOrigin-RevId: 285275698 Change-Id: I2a898360c89eb31ad672f003971f2d19b7795a9d

view details

Scott Main

commit sha 9c4b6d9df820669638edecb0e0f147c683c812e5

Remove obsolete pip install; latest 1.x is all we need here PiperOrigin-RevId: 285277629 Change-Id: Ie50b1cef9a1a37cf319052ac611a56c79282f344

view details

Hye Soo Yang

commit sha 356de62421989253daed47b37960b3adcf0774cc

Fix for b/146078486. In py3, range() returns iterable object. PiperOrigin-RevId: 285282691 Change-Id: I4e6520e5fa6e5df62e88c8fb8a0296ba86fb8465

view details

Brian Atkinson

commit sha 2fcfa6085bb2440b0a9619c8037a65a44ee92bdb

Move additional_deps to deps for cuda_py_test. PiperOrigin-RevId: 285283853 Change-Id: I2534d9fb51955cc9a86d1900ec60fc265f451ddc

view details

River Riddle

commit sha 458001c04efd4c62e891b0729a9cf7b1e4d68583

NFC: Cleanup the various Op::print methods. This cleans up the implementation of the various operation print methods. This is done via a combination of code cleanup, adding new streaming methods to the printer(e.g. operand ranges), etc. PiperOrigin-RevId: 285285181 Change-Id: Ia2702f711b8775907e727888d2e32761c5e18f2a

view details

Ken Franko

commit sha cff2d1388566f45dd84e62b34d80bdd6ee0b436d

Update documentation for MaxPooling1D including examples. PiperOrigin-RevId: 285286147 Change-Id: I48babfbe9d4cc89df2b031cd473de45682559f00

view details

Yunxing Dai

commit sha d585ddcf54ebac10eb7feb7c6c89dc3e12b76a73

Limit the number of candidates in multi output fusion. This is so that we can avoid the quadratic behavior. PiperOrigin-RevId: 285286626 Change-Id: I171009f7abc555d09e71d8fe19ee04031ed3d20e

view details

Rick Chao

commit sha e63ae8db2291a0addc15911f9468ff425e2fc87c

Removal of obsolete multi_worker_optimizer_comparison_test. PiperOrigin-RevId: 285287989 Change-Id: I7f4c66c344de33d16c4d572be50aed90a4d8faaf

view details

Brian Atkinson

commit sha 13088cc7ba35153e37c3b0ad7c17f6d969dba1ac

Move additional_deps to deps for cuda_py_test. PiperOrigin-RevId: 285289102 Change-Id: Ic1ca096f27689fda42c9fc98505adbe5b7489993

view details

A. Unique TensorFlower

commit sha 1be7afb1c6f6ffdd37d5bf089f3271bcd118569e

Implement a `build_defs.bzl` shim to simplify new platform porting. This shim accepts a flag, `build_for_embedded`, which enables additional rules besides `native.cc_library` to build the sources specified in the TFLM build graph. PiperOrigin-RevId: 285291476 Change-Id: Ie65a81cff5083b32c5c3f84845c26b02060b481f

view details

Thomas O'Malley

commit sha 7042b781f7cfb99d11e05004ae2cca783b05597e

Add `LossesContainer` class to encapsulate compiled loss computation in v2. PiperOrigin-RevId: 285292927 Change-Id: Ida7450ed0fd7d6f2de37d30fe91f651f127bcbce

view details

Andy Ly

commit sha 1aaabcd65fde6b581fec16a2957ecff7bd69a23e

Fix populating tensors to be passed to InferenceContext in TF MLIR shape inference pass. By preallocating the tensors vector, there will be no undefined behavior (resizing vector) for input tensors being copied and used for shape inference. PiperOrigin-RevId: 285293074 Change-Id: I1ab87f71105a444c5bbb2946dba4a540da8139c0

view details

Pavithra Vijay

commit sha 1f3044b02f3243864e7d636dff9d2a85189382e5

Clarify how the strings 'accuracy', 'acc', 'crossentropy', 'ce' are interpreted in the compile API doc. PiperOrigin-RevId: 285295606 Change-Id: I30e6120cd6303e0b5e9716c613006f09c302c67c

view details

Advait Jain

commit sha 1f958065e245851dd64a21f83c66e21309a5d415

Add issue template for Tensorflow for Microcontrollers. This template: * tags the micro team on github * adds the comp:micro label PiperOrigin-RevId: 285297116 Change-Id: I04304cf1ca1702afc2adf63a66b14c622359872d

view details

Yanan Cao

commit sha 5c18808ff8b848ffba90695a228e9cb6f839f4fb

Propagate shapes from While/If op operands into their single-use function arguments PiperOrigin-RevId: 285298763 Change-Id: I739c25bbb999d58a2b0ee6ea319a611f4485076d

view details

Feng Liu

commit sha ff2f4e3bcc0872a34e9480d19d02d94587fd9d47

Add a debugging op to verify numericals at runtime. The this op takes a built-in option `tolerance`, and if the error of the two operands is larger than this threshold value, a failure will return and the error message will display the mismatched location. NO_IFTTT=this is the first time the op is added, so by default it is in version 1. PiperOrigin-RevId: 285299738 Change-Id: I93e33303d92d4ccdfa7b47f25423cb5dccb27598

view details

Robert David

commit sha 35c31c8a65cff17e2c4c991b57bc5abaa8edab52

Add TF_LITE_ENSUREs after GetVariableTensor calls to check if the tensor is actually a variable tensor. PiperOrigin-RevId: 285300493 Change-Id: I374cbfd3a76d04ab948da1bd3b4a9c3aab061738

view details

Francois Chollet

commit sha 5eb81ea161515e18a039241b3e4eb8efbfbaa354

Unify V1/V2 layer naming in internal imports. PiperOrigin-RevId: 285302761 Change-Id: Ib704512d4076487ff39ededc867532917cbebc52

view details

push time in 2 months

push eventIBM/MAX-Sports-Video-Classifier

Brendan Dwyer

commit sha 2a77640737270f62f25549130e04014537f21c8e

Use CDN (#36)

view details

push time in 2 months

push eventIBM/MAX-Named-Entity-Tagger

Brendan Dwyer

commit sha f6ee99607c66c0604d768b88606e8abbe34ec5b3

Use CDN

view details

Fei Hu

commit sha 4b423e0407fd647474046784e095e34338f621ad

Merge pull request #30 from IBM/cdn Use CDN

view details

push time in 2 months

PR merged IBM/MAX-Named-Entity-Tagger

Reviewers
Use CDN
+1 -1

0 comment

1 changed file

bdwyer2

pr closed time in 2 months

push eventIBM/MAX-Human-Pose-Estimator

Brendan Dwyer

commit sha c5c98a72499fac4f4370305cc97c9fe7dedf46d8

Use CDN (#32)

view details

push time in 2 months

PR merged IBM/MAX-Human-Pose-Estimator

Use CDN
+1 -1

0 comment

1 changed file

bdwyer2

pr closed time in 2 months

startedchubaofs/chubaofs

started time in 2 months

PR closed IBM/MAX-Breast-Cancer-Mitosis-Detector

[ImgBot] Optimize images

Beep boop. Your images are optimized!

Your image file size has been reduced by 35% 🎉

<details> <summary> Details </summary>

File Before After Percent reduction
/docs/swagger-screenshot.png 109.54kb 63.70kb 41.85%
/docs/deploy-max-to-ibm-cloud-with-kubernetes-button.png 55.45kb 42.87kb 22.69%
Total : 164.99kb 106.56kb 35.41%

</details>


📝docs | :octocat: repo | 🙋issues | 🏅swag | 🏪marketplace

+0 -0

0 comment

2 changed files

imgbot[bot]

pr closed time in 2 months

push eventIBM/MAX-Breast-Cancer-Mitosis-Detector

Brendan Dwyer

commit sha b72a9c428d3b071fda5fe78fe142a290cf251e12

Use CDN (#35)

view details

push time in 2 months

startedEbookFoundation/free-programming-books

started time in 2 months

startedmemspace/zefyr

started time in 2 months

startedgiswqs/qgis-earthengine-examples

started time in 2 months

startedIBM/node-red-tensorflowjs

started time in 2 months

fork feihugis/ray

A fast and simple framework for building and running distributed applications. Ray is packaged with RLlib, a scalable reinforcement learning library, and Tune, a scalable hyperparameter tuning library.

https://ray.readthedocs.io/en/latest/

fork in 2 months

startedray-project/ray

started time in 2 months

startedonnx/onnx

started time in 2 months

startedapache/incubator-tvm

started time in 2 months

startedstreamlit/streamlit

started time in 2 months

startedcybertronai/gradient-checkpointing

started time in 3 months

startedawslabs/autogluon

started time in 3 months

startedpytorch/elastic

started time in 3 months

starteddoocs/md

started time in 3 months

pull request commenttensorflow/tensorflow

Switch a bunch of tf.data python tests to use TF combinations

@jsimsa Thanks a lot for your help with my PRs! Please feel free to let me know if there is anything else I can help with.

feihugis

comment created time in 3 months

startedstandardnotes/web

started time in 3 months

pull request commenttensorflow/tensorflow

Switch a bunch of tf.data python tests to use TF combinations

@rthadur The failed test in Ubuntu CPU is not related. An error happened when migrating the change to the internal tests. Could you please help retrigger the internal tests?

feihugis

comment created time in 3 months

startedapache/incubator-superset

started time in 3 months

startedhorance-liu/clean-cpp

started time in 3 months

pull request commenttensorflow/tensorflow

Switch a bunch of tf.data python tests to use TF combinations

@jsimsa Thanks for your suggestion! The test cases for testNumThreadsDeprecated are updated here. Could you please take a look?

feihugis

comment created time in 3 months

push eventfeihugis/tensorflow

Fei Hu

commit sha c45ea870d61ec7146bea04caea6d318dfec6e227

Update the test cases for testNumThreadsDeprecated

view details

push time in 3 months

Pull request review commenttensorflow/tensorflow

Switch a bunch of tf.data python tests to use TF combinations

 def get_thread_id(_):       # perform work.       self.assertLessEqual(len(thread_ids), num_threads) -  @parameterized.named_parameters(-      ("1", 1, None),-      ("2", 2, None),-      ("3", 4, None),-      ("4", 8, None),-      ("5", 16, None),-      ("6", 4, -1),-      ("7", 4, 0),-      ("8", 4, 1),-      ("9", 4, 4),-  )+  @combinations.generate(+      combinations.times(+          test_base.default_test_combinations(),+          combinations.combine(+              num_threads=[1, 2, 4, 8, 16], max_intra_op_parallelism=[None]) ++          combinations.combine(+              num_threads=[4], max_intra_op_parallelism=[0, 1, 4]) ++          combinations.combine(

Yeah, sounds good! I will change it to your suggestion.

feihugis

comment created time in 3 months

Pull request review commenttensorflow/tensorflow

Switch a bunch of tf.data python tests to use TF combinations

 def get_thread_id(_):       # perform work.       self.assertLessEqual(len(thread_ids), num_threads) -  @parameterized.named_parameters(-      ("1", 1, None),-      ("2", 2, None),-      ("3", 4, None),-      ("4", 8, None),-      ("5", 16, None),-      ("6", 4, -1),-      ("7", 4, 0),-      ("8", 4, 1),-      ("9", 4, 4),-  )+  @combinations.generate(+      combinations.times(+          test_base.default_test_combinations(),+          combinations.combine(+              num_threads=[1, 2, 4, 8, 16], max_intra_op_parallelism=[None]) ++          combinations.combine(+              num_threads=[4], max_intra_op_parallelism=[0, 1, 4]) ++          combinations.combine(

How about this:

  @combinations.generate(
      combinations.times(
          test_base.default_test_combinations(),
          combinations.combine(
              num_threads=[1, 2, 4, 8, 16], max_intra_op_parallelism=[None]) +
          combinations.combine(
              num_threads=[4], max_intra_op_parallelism=[0, 1, 4]) +
          combinations.combine(
              num_threads=[4],
              max_intra_op_parallelism=combinations.NamedObject("MinusOne", -1))
      ))
  def testNumThreadsDeprecated(self, num_threads, max_intra_op_parallelism):
    if isinstance(max_intra_op_parallelism, combinations.NamedObject):
      max_intra_op_parallelism = max_intra_op_parallelism._obj

    def override_threadpool_fn(dataset):
      return threadpool.override_threadpool(
          dataset,
          threadpool.PrivateThreadPool(
              num_threads,
              max_intra_op_parallelism=max_intra_op_parallelism,
              display_name="private_thread_pool_%d" % num_threads))

    self._testNumThreadsHelper(num_threads, override_threadpool_fn)
feihugis

comment created time in 3 months

starteddragen1860/Deep-Learning-with-TensorFlow-book

started time in 3 months

Pull request review commenttensorflow/tensorflow

Switch a bunch of tf.data python tests to use TF combinations

 def get_thread_id(_):       # perform work.       self.assertLessEqual(len(thread_ids), num_threads) -  @parameterized.named_parameters(-      ("1", 1, None),-      ("2", 2, None),-      ("3", 4, None),-      ("4", 8, None),-      ("5", 16, None),-      ("6", 4, -1),-      ("7", 4, 0),-      ("8", 4, 1),-      ("9", 4, 4),-  )+  @combinations.generate(+      combinations.times(+          test_base.default_test_combinations(),+          combinations.combine(+              num_threads=[1, 2, 4, 8, 16], max_intra_op_parallelism=[None]) ++          combinations.combine(+              num_threads=[4], max_intra_op_parallelism=[0, 1, 4]) ++          combinations.combine(

That means it needs to be

combinations.combine(
    num_threads=[4],
    max_intra_op_parallelism=combinations.NamedObject("MinusOne", lambda: -1))
feihugis

comment created time in 3 months

more