8383import org .tensorflow .op .nn .Relu ;
8484import org .tensorflow .op .nn .Relu6 ;
8585import org .tensorflow .op .nn .Selu ;
86- import org .tensorflow .op .nn .SigmoidCrossEntropyWithLogits ;
8786import org .tensorflow .op .nn .Softmax ;
8887import org .tensorflow .op .nn .SoftmaxCrossEntropyWithLogits ;
8988import org .tensorflow .op .nn .Softsign ;
103102 * @see {@link Ops}
104103 */
105104public final class NnOps {
106- public final NnRawOps raw ;
107-
108105 private final Scope scope ;
109106
110107 private final Ops ops ;
111108
112109 NnOps (Ops ops ) {
113110 this .scope = ops .scope ();
114111 this .ops = ops ;
115- raw = new NnRawOps (ops );
116112 }
117113
118114 /**
@@ -1815,55 +1811,6 @@ public <T extends TNumber> Selu<T> selu(Operand<T> features) {
18151811 return Selu .create (scope , features );
18161812 }
18171813
1818- /**
1819- * Computes sigmoid cross entropy given <code>logits</code>.
1820- *
1821- * <p>Measures the probability error in discrete classification tasks in which each class is
1822- * independent and not mutually exclusive. For instance, one could perform multilabel
1823- * classification where a picture can contain both an elephant and a dog at the same time.
1824- *
1825- * <p>For brevity, let <code>x = logits</code>, <code>z = labels</code>. The logistic loss in
1826- * pseudo-code is
1827- *
1828- * <pre>
1829- * z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
1830- * = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))
1831- * = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x)))
1832- * = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x))
1833- * = (1 - z) * x + log(1 + exp(-x))
1834- * = x - x * z + log(1 + exp(-x))
1835- * </pre>
1836- *
1837- * <p>For <code>x < 0</code>, to avoid overflow in <code>exp(-x)</code>, we reformulate the above
1838- *
1839- * <pre>
1840- * x - x * z + log(1 + exp(-x))
1841- * = log(exp(x)) - x * z + log(1 + exp(-x))
1842- * = - x * z + log(1 + exp(x))
1843- * </pre>
1844- *
1845- * <p>Hence, to ensure stability and avoid overflow, the implementation uses this equivalent
1846- * formulation
1847- *
1848- * <pre>
1849- * max(x, 0) - x * z + log(1 + exp(-abs(x)))
1850- * </pre>
1851- *
1852- * <p></ode>logits</code> and <code>labels</code> must have the same type and shape.
1853- *
1854- * <p>
1855- *
1856- * @param labels the labels
1857- * @param logits the logits of type float32 or float64
1858- * @param <T> the type of labels and logits
1859- * @return the component-wise logistic losses.
1860- * @throws IllegalArgumentException if logits' and labels' do not have the same shape
1861- */
1862- public <T extends TNumber > Operand <T > sigmoidCrossEntropyWithLogits (Operand <T > labels ,
1863- Operand <T > logits ) {
1864- return SigmoidCrossEntropyWithLogits .sigmoidCrossEntropyWithLogits (scope , labels , logits );
1865- }
1866-
18671814 /**
18681815 * Computes softmax activations.
18691816 * For each batch {@code i} and class {@code j} we have
@@ -1881,53 +1828,20 @@ public <T extends TNumber> Softmax<T> softmax(Operand<T> logits) {
18811828 }
18821829
18831830 /**
1884- * Computes softmax cross entropy between <code>logits</code> and <code>labels</code>.
1885- *
1886- * <p>Measures the probability error in discrete classification tasks in which the classes are
1887- * mutually exclusive (each entry is in exactly one class). For example, each CIFAR-10 image is
1888- * labeled with one and only one label: an image can be a dog or a truck, but not both.
1889- *
1890- * <p><b>NOTE:</b>
1891- *
1892- * <p>While the classes are mutually exclusive, their probabilities need not be. All that is
1893- * required is that each row of <code>labels</code> is a valid probability distribution. If they
1894- * are not, the computation of the gradient will be incorrect.
1895- *
1896- * <p>If using exclusive <code>labels</code> (wherein one and only one class is true at a time),
1897- * see {@link org.tensorflow.op.NnOps#sparseSoftmaxCrossEntropyWithLogits}
1898- *
1899- * <p>Usage:
1900- *
1901- * <pre>
1902- * Operand<TFloat32> logits =
1903- * tf.constant(new float[][] {{4.0F, 2.0F, 1.0F}, {0.0F, 5.0F, 1.0F}} );
1904- * Operand<TFloat32> labels =
1905- * tf.constant(new float[][] {{1.0F, 0.0F, 0.0F}, {0.0F, 0.8F, 0.2F}} );
1906- * Operand<TFloat32> output =
1907- * tf.nn.softmaxCrossEntropyWithLogits(labels, logits, -1);
1908- * // output Shape = [2]
1909- * // dataType = FLOAT (1)
1910- * // values { 0.169846, 0.824745 }
1911- * </pre>
1912- *
1913- * <p>Backpropagation will happen into both <code>logits</code> and <code>labels</code>. To
1914- * disallow backpropagation into <code>labels</code>, pass label tensors through <code>
1915- * tf.stopGradient</code> before feeding it to this function.
1831+ * Computes softmax cross entropy cost and gradients to backpropagate.
1832+ * Inputs are the logits, not probabilities.
19161833 *
1917- * @param labels Each vector along the class dimension should hold a valid probability
1918- * distribution e.g. for the case in which labels are of shape <code>[batch_size, num_classes]
1919- * </code>, each row of <code>labels[i]</code> must be a valid probability distribution.
1920- * @param logits Per-label activations, typically a linear output. These activation energies are
1921- * interpreted as unnormalized log probabilities.
1922- * @param axis The class dimension. -1 is the last dimension.
1923- * @param <T> the number type of the operands
1924- * @return the softmax cross entropy loss. Its type is the same as <code>logits</code> and its
1925- * shape is the same as <code>labels</code> except that it does not have the last dimension of
1926- * <code>labels</code>.
1834+ * @param <T> data type for {@code loss} output
1835+ * @param features batch_size x num_classes matrix
1836+ * @param labels batch_size x num_classes matrix
1837+ * The caller must ensure that each batch of labels represents a valid
1838+ * probability distribution.
1839+ * @param <T> data type for {@code SoftmaxCrossEntropyWithLogits} output and operands
1840+ * @return a new instance of SoftmaxCrossEntropyWithLogits
19271841 */
1928- public <T extends TNumber , U extends TNumber > Operand <T > softmaxCrossEntropyWithLogits (
1929- Operand <U > labels , Operand <T > logits , int axis ) {
1930- return SoftmaxCrossEntropyWithLogits .softmaxCrossEntropyWithLogits (scope , labels , logits , axis );
1842+ public <T extends TNumber > SoftmaxCrossEntropyWithLogits <T > softmaxCrossEntropyWithLogits (
1843+ Operand <T > features , Operand <T > labels ) {
1844+ return SoftmaxCrossEntropyWithLogits .create (scope , features , labels );
19311845 }
19321846
19331847 /**
@@ -2114,50 +2028,23 @@ public <T extends TType> SpaceToDepth<T> spaceToDepth(Operand<T> input, Long blo
21142028 }
21152029
21162030 /**
2117- * Computes sparse softmax cross entropy between <code>logits</code> and <code>labels</code>.
2118- *
2119- * <p>Measures the probability error in discrete classification tasks in which the classes are
2120- * mutually exclusive (each entry is in exactly one class). For example, each CIFAR-10 image is
2121- * labeled with one and only one label: an image can be a dog or a truck, but not both.
2122- *
2123- * <p><b>NOTE:</b>
2124- *
2125- * <p>For this operation, the probability of a given label is considered exclusive. That is, soft
2126- * classes are not allowed, and the <code>labels</code> vector must provide a single specific
2127- * index for the true class for each row of <code>logits</code> (each minibatch entry). For soft
2128- * softmax classification with a probability distribution for each entry, {@link
2129- * org.tensorflow.op.NnOps#softmaxCrossEntropyWithLogits}.
2130- *
2131- * <p><b>WARNING:</b>
2031+ * Computes softmax cross entropy cost and gradients to backpropagate.
2032+ * Unlike {@code SoftmaxCrossEntropyWithLogits}, this operation does not accept
2033+ * a matrix of label probabilities, but rather a single label per row
2034+ * of features. This label is considered to have probability 1.0 for the
2035+ * given row.
2036+ * <p>Inputs are the logits, not probabilities.
21322037 *
2133- * <p>This op expects unscaled logits, since it performs a <code>softmax</code> on <code>logits
2134- * </code> internally for efficiency. Do not call this op with the output of <code>softmax</code>,
2135- * as it will produce incorrect results.
2136- *
2137- * <p>A common use case is to have logits of shape <code>[batchSize, numClasses]</code> and have
2138- * labels of shape <code>[batchSize]</code>, but higher dimensions are supported, in which case
2139- * the <code>dim</code>-th dimension is assumed to be of size <code>numClasses</code>. <code>
2140- * logits</code> must have the <cod>dataType</cod> of <code>TFloat16</code>, <code>TFloat32</code>
2141- * , or <code>TFloat64</code>, and <code>labels</code> must have the dtype of <code>TInt32</code>
2142- * or <code>TInt64</code>.
2143- *
2144- * @param labels <code>Tensor</code> of shape <code>[d_0, d_1, ..., d_{r-1}]</code> (where <code>r
2145- * </code> is rank of <code>labels</code> and result) and the dataType is <code>TInt32</code>
2146- * or <code>TInt64</code>. Each entry in <code>labels</code> must be an index in <code>[0,
2147- * numClasses)</code>. Other values will raise an exception when this op is run on CPU, and
2148- * return <code>NaN</code> for corresponding loss and gradient rows on GPU.
2149- * @param logits Per-label activations (typically a linear output) of shape <code>[d_0, d_1, ...,
2150- * d_{r-1}, numClasses]</code> and dataType of <code>TFloat16</code>, <code>TFloat32</code>,
2151- * or <code>TFloat64</code>. These activation energies are interpreted as unnormalized log
2152- * probabilities.
2153- * @return A <code>Tensor</code> of the same shape as <code>labels</code> and of the same type as
2154- * <code>logits</code> with the softmax cross entropy loss.
2155- * @throws IllegalArgumentException If logits are scalars (need to have rank >= 1) or if the rank
2156- * of the labels is not equal to the rank of the logits minus one.
2157- */
2158- public <T extends TNumber , U extends TNumber > Operand sparseSoftmaxCrossEntropyWithLogits (
2159- Operand <T > labels , Operand <U > logits ) {
2160- return SparseSoftmaxCrossEntropyWithLogits .sparseSoftmaxCrossEntropyWithLogits (scope , labels , logits );
2038+ * @param <T> data type for {@code loss} output
2039+ * @param features batch_size x num_classes matrix
2040+ * @param labels batch_size vector with values in [0, num_classes).
2041+ * This is the label for the given minibatch entry.
2042+ * @param <T> data type for {@code SparseSoftmaxCrossEntropyWithLogits} output and operands
2043+ * @return a new instance of SparseSoftmaxCrossEntropyWithLogits
2044+ */
2045+ public <T extends TNumber > SparseSoftmaxCrossEntropyWithLogits <T > sparseSoftmaxCrossEntropyWithLogits (
2046+ Operand <T > features , Operand <? extends TNumber > labels ) {
2047+ return SparseSoftmaxCrossEntropyWithLogits .create (scope , features , labels );
21612048 }
21622049
21632050 /**
0 commit comments