Benchmarks for naive_bayes¶
BernoulliNB-newsgroups¶
Benchmark setup
from sklearn.naive_bayes import BernoulliNB from deps import load_data kwargs = {'binarize': 1} X, y, X_t, y_t = load_data('newsgroups') obj = BernoulliNB(**kwargs)
Benchmark statement
obj.fit(X, y)
Execution time
Memory usage
Additional output
cProfile
398 function calls in 0.667 seconds
Ordered by: cumulative time
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.667 0.667 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/vbench/benchmark.py:286(f)
1 0.000 0.000 0.667 0.667 <f>:1(<module>)
1 0.282 0.282 0.667 0.667 /tmp/vb_sklearn/sklearn/naive_bayes.py:217(fit)
1 0.000 0.000 0.316 0.316 /tmp/vb_sklearn/sklearn/naive_bayes.py:428(_count)
1 0.005 0.005 0.211 0.211 /tmp/vb_sklearn/sklearn/naive_bayes.py:280(_count)
1 0.000 0.000 0.178 0.178 /tmp/vb_sklearn/sklearn/utils/extmath.py:70(safe_sparse_dot)
1 0.000 0.000 0.178 0.178 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:302(__rmul__)
1 0.000 0.000 0.177 0.177 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:229(__mul__)
1 0.000 0.000 0.177 0.177 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:263(_mul_multivector)
1 0.000 0.000 0.163 0.163 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sparsetools/csc.py:216(csc_matvecs)
1 0.163 0.163 0.163 0.163 {_csc.csc_matvecs}
1 0.076 0.076 0.105 0.105 /tmp/vb_sklearn/sklearn/preprocessing.py:406(binarize)
1 0.000 0.000 0.061 0.061 /tmp/vb_sklearn/sklearn/utils/validation.py:70(atleast2d_or_csr)
1 0.000 0.000 0.056 0.056 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/coo.py:281(tocsr)
1 0.000 0.000 0.040 0.040 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sparsetools/coo.py:75(coo_tocsr)
1 0.040 0.040 0.040 0.040 {_coo.coo_tocsr}
4 0.030 0.008 0.030 0.008 {method 'sum' of 'numpy.ndarray' objects}
1 0.000 0.000 0.029 0.029 /tmp/vb_sklearn/sklearn/utils/validation.py:94(check_arrays)
1 0.000 0.000 0.029 0.029 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/data.py:59(copy)
1 0.000 0.000 0.025 0.025 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:1379(sum)
1 0.000 0.000 0.017 0.017 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:643(_with_data)
3 0.017 0.006 0.017 0.006 {method 'copy' of 'numpy.ndarray' objects}
2 0.015 0.008 0.015 0.008 {numpy.core.multiarray.zeros}
1 0.000 0.000 0.015 0.015 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:567(sum_duplicates)
3 0.000 0.000 0.012 0.004 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:20(__init__)
1 0.011 0.011 0.011 0.011 {method 'astype' of 'numpy.ndarray' objects}
1 0.000 0.000 0.010 0.010 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sparsetools/csr.py:567(csr_sum_duplicates)
1 0.010 0.010 0.010 0.010 {_csr.csr_sum_duplicates}
1 0.000 0.000 0.008 0.008 /tmp/vb_sklearn/sklearn/base.py:332(fit_transform)
1 0.006 0.006 0.007 0.007 /tmp/vb_sklearn/sklearn/preprocessing.py:719(transform)
1 0.000 0.000 0.005 0.005 /tmp/vb_sklearn/sklearn/utils/validation.py:10(assert_all_finite)
1 0.000 0.000 0.005 0.005 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:613(sort_indices)
1 0.000 0.000 0.005 0.005 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:581(__get_sorted)
1 0.000 0.000 0.005 0.005 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:310(sum)
1 0.000 0.000 0.005 0.005 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sparsetools/csr.py:85(csr_has_sorted_indices)
1 0.005 0.005 0.005 0.005 {_csr.csr_has_sorted_indices}
1 0.000 0.000 0.003 0.003 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:1508(any)
1 0.003 0.003 0.003 0.003 {method 'any' of 'numpy.ndarray' objects}
1 0.000 0.000 0.001 0.001 /tmp/vb_sklearn/sklearn/preprocessing.py:695(fit)
1 0.000 0.000 0.001 0.001 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/lib/arraysetops.py:90(unique)
1 0.001 0.001 0.001 0.001 {method 'sort' of 'numpy.ndarray' objects}
3 0.000 0.000 0.001 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:101(check_format)
10 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:553(isspmatrix)
12 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:116(_isinstance)
4 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:622(prune)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/csr.py:107(transpose)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:18(upcast)
25 0.000 0.000 0.000 0.000 {numpy.core.multiarray.array}
2 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/preprocessing.py:508(_is_multilabel)
23 0.000 0.000 0.000 0.000 {isinstance}
24 0.000 0.000 0.000 0.000 {method 'split' of 'str' objects}
1 0.000 0.000 0.000 0.000 {method 'flatten' of 'numpy.ndarray' objects}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/abc.py:128(__instancecheck__)
12 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:167(asarray)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:77(isscalarlike)
4 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/coo.py:194(getnnz)
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
1 0.000 0.000 0.000 0.000 {numpy.core.multiarray.concatenate}
3 0.000 0.000 0.000 0.000 {numpy.core.multiarray.empty}
3 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/data.py:17(__init__)
9 0.000 0.000 0.000 0.000 {hasattr}
3 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:96(isshape)
3 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:50(to_native)
3 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:59(set_shape)
7 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:370(__getattr__)
56 0.000 0.000 0.000 0.000 {len}
3 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:51(__init__)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:124(isdense)
4 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/_weakrefset.py:68(__contains__)
16 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:85(getnnz)
20 0.000 0.000 0.000 0.000 {numpy.core.multiarray.can_cast}
2 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/utils/validation.py:87(_num_samples)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:1574(isscalar)
21 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:81(get_shape)
21 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:2116(rank)
3 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/preprocessing.py:504(_is_label_indicator_matrix)
8 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/csr.py:180(_swap)
3 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:54(getdtype)
1 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/preprocessing.py:684(__init__)
2 0.000 0.000 0.000 0.000 {method 'transpose' of 'numpy.ndarray' objects}
1 0.000 0.000 0.000 0.000 {method 'reshape' of 'numpy.ndarray' objects}
3 0.000 0.000 0.000 0.000 {method 'newbyteorder' of 'numpy.dtype' objects}
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:237(asanyarray)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:173(__len__)
3 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/data.py:20(_get_dtype)
2 0.000 0.000 0.000 0.000 {method 'ravel' of 'numpy.ndarray' objects}
3 0.000 0.000 0.000 0.000 {getattr}
3 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/csc.py:173(_swap)
1 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/preprocessing.py:691(_check_fitted)
1 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/csr.py:129(tocsr)
4 0.000 0.000 0.000 0.000 {method 'pop' of 'dict' objects}
LineProfiler
Timer unit: 1e-06 s
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: predict at line 46
Total time: 0 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
46 def predict(self, X):
47 """
48 Perform classification on an array of test vectors X.
49
50 Parameters
51 ----------
52 X : array-like, shape = [n_samples, n_features]
53
54 Returns
55 -------
56 C : array, shape = [n_samples]
57 Predicted target values for X
58 """
59 jll = self._joint_log_likelihood(X)
60 return self.classes_[np.argmax(jll, axis=1)]
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: fit at line 217
Total time: 0.798803 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
217 def fit(self, X, y, sample_weight=None, class_prior=None):
218 """Fit Naive Bayes classifier according to X, y
219
220 Parameters
221 ----------
222 X : {array-like, sparse matrix}, shape = [n_samples, n_features]
223 Training vectors, where n_samples is the number of samples and
224 n_features is the number of features.
225
226 y : array-like, shape = [n_samples]
227 Target values.
228
229 sample_weight : array-like, shape = [n_samples], optional
230 Weights applied to individual samples (1. for unweighted).
231
232 class_prior : array, shape [n_classes]
233 Custom prior probability per class.
234 Overrides the fit_prior parameter.
235
236 Returns
237 -------
238 self : object
239 Returns self.
240 """
241 1 67008 67008.0 8.4 X = atleast2d_or_csr(X)
242
243 1 19 19.0 0.0 labelbin = LabelBinarizer()
244 1 17919 17919.0 2.2 Y = labelbin.fit_transform(y)
245 1 5 5.0 0.0 self.classes_ = labelbin.classes_
246 1 4 4.0 0.0 n_classes = len(self.classes_)
247 1 4 4.0 0.0 if Y.shape[1] == 1:
248 Y = np.concatenate((1 - Y, Y), axis=1)
249
250 1 12 12.0 0.0 if X.shape[0] != Y.shape[0]:
251 msg = "X and y have incompatible shapes."
252 if issparse(X):
253 msg += "\nNote: Sparse matrices cannot be indexed w/ boolean \
254 masks (use `indices=True` in CV)."
255 raise ValueError(msg)
256
257 1 3 3.0 0.0 if sample_weight is not None:
258 Y *= array2d(sample_weight).T
259
260 1 3 3.0 0.0 if class_prior:
261 if len(class_prior) != n_classes:
262 raise ValueError(
263 "Number of priors must match number of classes")
264 self.class_log_prior_ = np.log(class_prior)
265 1 3 3.0 0.0 elif self.fit_prior:
266 # empirical prior, with sample_weight taken into account
267 1 990 990.0 0.1 y_freq = Y.sum(axis=0)
268 1 57 57.0 0.0 self.class_log_prior_ = np.log(y_freq) - np.log(y_freq.sum())
269 else:
270 self.class_log_prior_ = np.zeros(n_classes) - np.log(n_classes)
271
272 1 374347 374347.0 46.9 N_c, N_c_i = self._count(X, Y)
273
274 1 312674 312674.0 39.1 self.feature_log_prob_ = (np.log(N_c_i + self.alpha)
275 1 22 22.0 0.0 - np.log(N_c.reshape(-1, 1)
276 1 25729 25729.0 3.2 + self.alpha * X.shape[1]))
277
278 1 4 4.0 0.0 return self
Benchmark statement
obj.predict(X_t)
Execution time
Memory usage
Additional output
cProfile
320 function calls in 0.905 seconds
Ordered by: cumulative time
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.905 0.905 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/vbench/benchmark.py:286(f)
1 0.000 0.000 0.905 0.905 <f>:1(<module>)
1 0.000 0.000 0.905 0.905 /tmp/vb_sklearn/sklearn/naive_bayes.py:46(predict)
1 0.546 0.546 0.904 0.904 /tmp/vb_sklearn/sklearn/naive_bayes.py:433(_joint_log_likelihood)
2 0.000 0.000 0.236 0.118 /tmp/vb_sklearn/sklearn/utils/extmath.py:70(safe_sparse_dot)
2 0.000 0.000 0.236 0.118 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:229(__mul__)
2 0.000 0.000 0.236 0.118 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:263(_mul_multivector)
2 0.000 0.000 0.235 0.118 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sparsetools/csr.py:320(csr_matvecs)
2 0.235 0.118 0.235 0.118 {_csr.csr_matvecs}
1 0.047 0.047 0.054 0.054 /tmp/vb_sklearn/sklearn/preprocessing.py:406(binarize)
2 0.040 0.020 0.040 0.020 {method 'sum' of 'numpy.ndarray' objects}
1 0.000 0.000 0.031 0.031 /tmp/vb_sklearn/sklearn/utils/validation.py:70(atleast2d_or_csr)
1 0.000 0.000 0.028 0.028 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/coo.py:281(tocsr)
1 0.000 0.000 0.018 0.018 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sparsetools/coo.py:75(coo_tocsr)
1 0.018 0.018 0.018 0.018 {_coo.coo_tocsr}
1 0.000 0.000 0.009 0.009 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:567(sum_duplicates)
1 0.000 0.000 0.007 0.007 /tmp/vb_sklearn/sklearn/utils/validation.py:94(check_arrays)
1 0.000 0.000 0.007 0.007 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/data.py:59(copy)
1 0.000 0.000 0.006 0.006 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sparsetools/csr.py:567(csr_sum_duplicates)
1 0.006 0.006 0.006 0.006 {_csr.csr_sum_duplicates}
1 0.000 0.000 0.004 0.004 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:643(_with_data)
3 0.004 0.001 0.004 0.001 {method 'copy' of 'numpy.ndarray' objects}
1 0.000 0.000 0.003 0.003 /tmp/vb_sklearn/sklearn/utils/validation.py:10(assert_all_finite)
1 0.000 0.000 0.003 0.003 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:310(sum)
2 0.000 0.000 0.003 0.002 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:20(__init__)
1 0.000 0.000 0.003 0.003 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:613(sort_indices)
1 0.000 0.000 0.003 0.003 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:581(__get_sorted)
1 0.000 0.000 0.003 0.003 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sparsetools/csr.py:85(csr_has_sorted_indices)
1 0.003 0.003 0.003 0.003 {_csr.csr_has_sorted_indices}
1 0.002 0.002 0.002 0.002 {method 'astype' of 'numpy.ndarray' objects}
1 0.000 0.000 0.001 0.001 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:683(argmax)
1 0.001 0.001 0.001 0.001 {method 'argmax' of 'numpy.ndarray' objects}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:101(check_format)
2 0.000 0.000 0.000 0.000 {numpy.core.multiarray.zeros}
3 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:18(upcast)
9 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:553(isspmatrix)
3 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:622(prune)
11 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:116(_isinstance)
22 0.000 0.000 0.000 0.000 {numpy.core.multiarray.array}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:77(isscalarlike)
9 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:167(asarray)
4 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/coo.py:194(getnnz)
22 0.000 0.000 0.000 0.000 {method 'split' of 'str' objects}
17 0.000 0.000 0.000 0.000 {isinstance}
30 0.000 0.000 0.000 0.000 {numpy.core.multiarray.can_cast}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:50(to_native)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/data.py:17(__init__)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:96(isshape)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:124(isdense)
5 0.000 0.000 0.000 0.000 {hasattr}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:59(set_shape)
20 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:81(get_shape)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:237(asanyarray)
3 0.000 0.000 0.000 0.000 {numpy.core.multiarray.empty}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:1574(isscalar)
2 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/utils/validation.py:87(_num_samples)
12 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:85(getnnz)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:51(__init__)
37 0.000 0.000 0.000 0.000 {len}
18 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:2116(rank)
8 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/csr.py:180(_swap)
4 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/data.py:20(_get_dtype)
2 0.000 0.000 0.000 0.000 {method 'newbyteorder' of 'numpy.dtype' objects}
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:370(__getattr__)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:54(getdtype)
4 0.000 0.000 0.000 0.000 {method 'ravel' of 'numpy.ndarray' objects}
2 0.000 0.000 0.000 0.000 {getattr}
4 0.000 0.000 0.000 0.000 {method 'pop' of 'dict' objects}
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/csr.py:129(tocsr)
1 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
LineProfiler
Timer unit: 1e-06 s
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: predict at line 46
Total time: 0.921006 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
46 def predict(self, X):
47 """
48 Perform classification on an array of test vectors X.
49
50 Parameters
51 ----------
52 X : array-like, shape = [n_samples, n_features]
53
54 Returns
55 -------
56 C : array, shape = [n_samples]
57 Predicted target values for X
58 """
59 1 920111 920111.0 99.9 jll = self._joint_log_likelihood(X)
60 1 895 895.0 0.1 return self.classes_[np.argmax(jll, axis=1)]
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: fit at line 217
Total time: 0.798803 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
217 def fit(self, X, y, sample_weight=None, class_prior=None):
218 """Fit Naive Bayes classifier according to X, y
219
220 Parameters
221 ----------
222 X : {array-like, sparse matrix}, shape = [n_samples, n_features]
223 Training vectors, where n_samples is the number of samples and
224 n_features is the number of features.
225
226 y : array-like, shape = [n_samples]
227 Target values.
228
229 sample_weight : array-like, shape = [n_samples], optional
230 Weights applied to individual samples (1. for unweighted).
231
232 class_prior : array, shape [n_classes]
233 Custom prior probability per class.
234 Overrides the fit_prior parameter.
235
236 Returns
237 -------
238 self : object
239 Returns self.
240 """
241 1 67008 67008.0 8.4 X = atleast2d_or_csr(X)
242
243 1 19 19.0 0.0 labelbin = LabelBinarizer()
244 1 17919 17919.0 2.2 Y = labelbin.fit_transform(y)
245 1 5 5.0 0.0 self.classes_ = labelbin.classes_
246 1 4 4.0 0.0 n_classes = len(self.classes_)
247 1 4 4.0 0.0 if Y.shape[1] == 1:
248 Y = np.concatenate((1 - Y, Y), axis=1)
249
250 1 12 12.0 0.0 if X.shape[0] != Y.shape[0]:
251 msg = "X and y have incompatible shapes."
252 if issparse(X):
253 msg += "\nNote: Sparse matrices cannot be indexed w/ boolean \
254 masks (use `indices=True` in CV)."
255 raise ValueError(msg)
256
257 1 3 3.0 0.0 if sample_weight is not None:
258 Y *= array2d(sample_weight).T
259
260 1 3 3.0 0.0 if class_prior:
261 if len(class_prior) != n_classes:
262 raise ValueError(
263 "Number of priors must match number of classes")
264 self.class_log_prior_ = np.log(class_prior)
265 1 3 3.0 0.0 elif self.fit_prior:
266 # empirical prior, with sample_weight taken into account
267 1 990 990.0 0.1 y_freq = Y.sum(axis=0)
268 1 57 57.0 0.0 self.class_log_prior_ = np.log(y_freq) - np.log(y_freq.sum())
269 else:
270 self.class_log_prior_ = np.zeros(n_classes) - np.log(n_classes)
271
272 1 374347 374347.0 46.9 N_c, N_c_i = self._count(X, Y)
273
274 1 312674 312674.0 39.1 self.feature_log_prob_ = (np.log(N_c_i + self.alpha)
275 1 22 22.0 0.0 - np.log(N_c.reshape(-1, 1)
276 1 25729 25729.0 3.2 + self.alpha * X.shape[1]))
277
278 1 4 4.0 0.0 return self
BernoulliNB-madelon¶
Benchmark setup
from sklearn.naive_bayes import BernoulliNB from deps import load_data kwargs = {'binarize': 500} X, y, X_t, y_t = load_data('madelon') obj = BernoulliNB(**kwargs)
Benchmark statement
obj.fit(X, y)
Execution time
Memory usage
Additional output
cProfile
119 function calls in 0.198 seconds
Ordered by: cumulative time
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.198 0.198 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/vbench/benchmark.py:286(f)
1 0.000 0.000 0.198 0.198 <f>:1(<module>)
1 0.000 0.000 0.198 0.198 /tmp/vb_sklearn/sklearn/naive_bayes.py:217(fit)
1 0.000 0.000 0.194 0.194 /tmp/vb_sklearn/sklearn/naive_bayes.py:428(_count)
1 0.179 0.179 0.184 0.184 /tmp/vb_sklearn/sklearn/preprocessing.py:406(binarize)
1 0.003 0.003 0.010 0.010 /tmp/vb_sklearn/sklearn/naive_bayes.py:280(_count)
1 0.000 0.000 0.006 0.006 /tmp/vb_sklearn/sklearn/utils/validation.py:94(check_arrays)
1 0.006 0.006 0.006 0.006 {method 'copy' of 'numpy.ndarray' objects}
1 0.000 0.000 0.005 0.005 /tmp/vb_sklearn/sklearn/utils/extmath.py:70(safe_sparse_dot)
1 0.005 0.005 0.005 0.005 {numpy.core._dotblas.dot}
1 0.000 0.000 0.003 0.003 /tmp/vb_sklearn/sklearn/utils/validation.py:70(atleast2d_or_csr)
4 0.003 0.001 0.003 0.001 {method 'sum' of 'numpy.ndarray' objects}
1 0.000 0.000 0.003 0.003 /tmp/vb_sklearn/sklearn/utils/validation.py:10(assert_all_finite)
1 0.000 0.000 0.002 0.002 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:1508(any)
1 0.002 0.002 0.002 0.002 {method 'any' of 'numpy.ndarray' objects}
1 0.000 0.000 0.001 0.001 /tmp/vb_sklearn/sklearn/base.py:332(fit_transform)
1 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/preprocessing.py:695(fit)
1 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/preprocessing.py:719(transform)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/lib/arraysetops.py:90(unique)
7 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:553(isspmatrix)
7 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:116(_isinstance)
2 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/preprocessing.py:508(_is_multilabel)
2 0.000 0.000 0.000 0.000 {numpy.core.multiarray.concatenate}
12 0.000 0.000 0.000 0.000 {isinstance}
1 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/utils/validation.py:62(array2d)
1 0.000 0.000 0.000 0.000 {method 'sort' of 'numpy.ndarray' objects}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/abc.py:128(__instancecheck__)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:1379(sum)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/shape_base.py:58(atleast_2d)
14 0.000 0.000 0.000 0.000 {method 'split' of 'str' objects}
4 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/_weakrefset.py:68(__contains__)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:237(asanyarray)
3 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:167(asarray)
1 0.000 0.000 0.000 0.000 {numpy.core.multiarray.zeros}
2 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/preprocessing.py:504(_is_label_indicator_matrix)
4 0.000 0.000 0.000 0.000 {numpy.core.multiarray.array}
1 0.000 0.000 0.000 0.000 {method 'flatten' of 'numpy.ndarray' objects}
7 0.000 0.000 0.000 0.000 {hasattr}
2 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/utils/validation.py:87(_num_samples)
1 0.000 0.000 0.000 0.000 {method 'reshape' of 'numpy.ndarray' objects}
1 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/preprocessing.py:684(__init__)
10 0.000 0.000 0.000 0.000 {len}
1 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/preprocessing.py:691(_check_fitted)
4 0.000 0.000 0.000 0.000 {method 'pop' of 'dict' objects}
2 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}
2 0.000 0.000 0.000 0.000 {getattr}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
LineProfiler
Timer unit: 1e-06 s
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: predict at line 46
Total time: 0 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
46 def predict(self, X):
47 """
48 Perform classification on an array of test vectors X.
49
50 Parameters
51 ----------
52 X : array-like, shape = [n_samples, n_features]
53
54 Returns
55 -------
56 C : array, shape = [n_samples]
57 Predicted target values for X
58 """
59 jll = self._joint_log_likelihood(X)
60 return self.classes_[np.argmax(jll, axis=1)]
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: fit at line 217
Total time: 0.187327 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
217 def fit(self, X, y, sample_weight=None, class_prior=None):
218 """Fit Naive Bayes classifier according to X, y
219
220 Parameters
221 ----------
222 X : {array-like, sparse matrix}, shape = [n_samples, n_features]
223 Training vectors, where n_samples is the number of samples and
224 n_features is the number of features.
225
226 y : array-like, shape = [n_samples]
227 Target values.
228
229 sample_weight : array-like, shape = [n_samples], optional
230 Weights applied to individual samples (1. for unweighted).
231
232 class_prior : array, shape [n_classes]
233 Custom prior probability per class.
234 Overrides the fit_prior parameter.
235
236 Returns
237 -------
238 self : object
239 Returns self.
240 """
241 1 2876 2876.0 1.5 X = atleast2d_or_csr(X)
242
243 1 15 15.0 0.0 labelbin = LabelBinarizer()
244 1 580 580.0 0.3 Y = labelbin.fit_transform(y)
245 1 3 3.0 0.0 self.classes_ = labelbin.classes_
246 1 3 3.0 0.0 n_classes = len(self.classes_)
247 1 4 4.0 0.0 if Y.shape[1] == 1:
248 1 59 59.0 0.0 Y = np.concatenate((1 - Y, Y), axis=1)
249
250 1 5 5.0 0.0 if X.shape[0] != Y.shape[0]:
251 msg = "X and y have incompatible shapes."
252 if issparse(X):
253 msg += "\nNote: Sparse matrices cannot be indexed w/ boolean \
254 masks (use `indices=True` in CV)."
255 raise ValueError(msg)
256
257 1 3 3.0 0.0 if sample_weight is not None:
258 Y *= array2d(sample_weight).T
259
260 1 3 3.0 0.0 if class_prior:
261 if len(class_prior) != n_classes:
262 raise ValueError(
263 "Number of priors must match number of classes")
264 self.class_log_prior_ = np.log(class_prior)
265 1 4 4.0 0.0 elif self.fit_prior:
266 # empirical prior, with sample_weight taken into account
267 1 41 41.0 0.0 y_freq = Y.sum(axis=0)
268 1 46 46.0 0.0 self.class_log_prior_ = np.log(y_freq) - np.log(y_freq.sum())
269 else:
270 self.class_log_prior_ = np.zeros(n_classes) - np.log(n_classes)
271
272 1 183449 183449.0 97.9 N_c, N_c_i = self._count(X, Y)
273
274 1 168 168.0 0.1 self.feature_log_prob_ = (np.log(N_c_i + self.alpha)
275 1 10 10.0 0.0 - np.log(N_c.reshape(-1, 1)
276 1 55 55.0 0.0 + self.alpha * X.shape[1]))
277
278 1 3 3.0 0.0 return self
Benchmark statement
obj.predict(X_t)
Execution time
Memory usage
Additional output
cProfile
82 function calls in 0.057 seconds
Ordered by: cumulative time
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.057 0.057 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/vbench/benchmark.py:286(f)
1 0.000 0.000 0.057 0.057 <f>:1(<module>)
1 0.000 0.000 0.057 0.057 /tmp/vb_sklearn/sklearn/naive_bayes.py:46(predict)
1 0.000 0.000 0.057 0.057 /tmp/vb_sklearn/sklearn/naive_bayes.py:433(_joint_log_likelihood)
1 0.051 0.051 0.052 0.052 /tmp/vb_sklearn/sklearn/preprocessing.py:406(binarize)
2 0.000 0.000 0.004 0.002 /tmp/vb_sklearn/sklearn/utils/extmath.py:70(safe_sparse_dot)
2 0.004 0.002 0.004 0.002 {numpy.core._dotblas.dot}
1 0.000 0.000 0.001 0.001 /tmp/vb_sklearn/sklearn/utils/validation.py:70(atleast2d_or_csr)
2 0.001 0.000 0.001 0.000 {method 'sum' of 'numpy.ndarray' objects}
1 0.000 0.000 0.001 0.001 /tmp/vb_sklearn/sklearn/utils/validation.py:10(assert_all_finite)
1 0.000 0.000 0.001 0.001 /tmp/vb_sklearn/sklearn/utils/validation.py:94(check_arrays)
1 0.001 0.001 0.001 0.001 {method 'copy' of 'numpy.ndarray' objects}
8 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:553(isspmatrix)
8 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:116(_isinstance)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:683(argmax)
1 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/utils/validation.py:62(array2d)
1 0.000 0.000 0.000 0.000 {method 'argmax' of 'numpy.ndarray' objects}
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/shape_base.py:58(atleast_2d)
16 0.000 0.000 0.000 0.000 {method 'split' of 'str' objects}
8 0.000 0.000 0.000 0.000 {isinstance}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:167(asarray)
3 0.000 0.000 0.000 0.000 {numpy.core.multiarray.array}
2 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/utils/validation.py:87(_num_samples)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:237(asanyarray)
4 0.000 0.000 0.000 0.000 {hasattr}
4 0.000 0.000 0.000 0.000 {len}
4 0.000 0.000 0.000 0.000 {method 'pop' of 'dict' objects}
2 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
LineProfiler
Timer unit: 1e-06 s
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: predict at line 46
Total time: 0.057355 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
46 def predict(self, X):
47 """
48 Perform classification on an array of test vectors X.
49
50 Parameters
51 ----------
52 X : array-like, shape = [n_samples, n_features]
53
54 Returns
55 -------
56 C : array, shape = [n_samples]
57 Predicted target values for X
58 """
59 1 57295 57295.0 99.9 jll = self._joint_log_likelihood(X)
60 1 60 60.0 0.1 return self.classes_[np.argmax(jll, axis=1)]
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: fit at line 217
Total time: 0.187327 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
217 def fit(self, X, y, sample_weight=None, class_prior=None):
218 """Fit Naive Bayes classifier according to X, y
219
220 Parameters
221 ----------
222 X : {array-like, sparse matrix}, shape = [n_samples, n_features]
223 Training vectors, where n_samples is the number of samples and
224 n_features is the number of features.
225
226 y : array-like, shape = [n_samples]
227 Target values.
228
229 sample_weight : array-like, shape = [n_samples], optional
230 Weights applied to individual samples (1. for unweighted).
231
232 class_prior : array, shape [n_classes]
233 Custom prior probability per class.
234 Overrides the fit_prior parameter.
235
236 Returns
237 -------
238 self : object
239 Returns self.
240 """
241 1 2876 2876.0 1.5 X = atleast2d_or_csr(X)
242
243 1 15 15.0 0.0 labelbin = LabelBinarizer()
244 1 580 580.0 0.3 Y = labelbin.fit_transform(y)
245 1 3 3.0 0.0 self.classes_ = labelbin.classes_
246 1 3 3.0 0.0 n_classes = len(self.classes_)
247 1 4 4.0 0.0 if Y.shape[1] == 1:
248 1 59 59.0 0.0 Y = np.concatenate((1 - Y, Y), axis=1)
249
250 1 5 5.0 0.0 if X.shape[0] != Y.shape[0]:
251 msg = "X and y have incompatible shapes."
252 if issparse(X):
253 msg += "\nNote: Sparse matrices cannot be indexed w/ boolean \
254 masks (use `indices=True` in CV)."
255 raise ValueError(msg)
256
257 1 3 3.0 0.0 if sample_weight is not None:
258 Y *= array2d(sample_weight).T
259
260 1 3 3.0 0.0 if class_prior:
261 if len(class_prior) != n_classes:
262 raise ValueError(
263 "Number of priors must match number of classes")
264 self.class_log_prior_ = np.log(class_prior)
265 1 4 4.0 0.0 elif self.fit_prior:
266 # empirical prior, with sample_weight taken into account
267 1 41 41.0 0.0 y_freq = Y.sum(axis=0)
268 1 46 46.0 0.0 self.class_log_prior_ = np.log(y_freq) - np.log(y_freq.sum())
269 else:
270 self.class_log_prior_ = np.zeros(n_classes) - np.log(n_classes)
271
272 1 183449 183449.0 97.9 N_c, N_c_i = self._count(X, Y)
273
274 1 168 168.0 0.1 self.feature_log_prob_ = (np.log(N_c_i + self.alpha)
275 1 10 10.0 0.0 - np.log(N_c.reshape(-1, 1)
276 1 55 55.0 0.0 + self.alpha * X.shape[1]))
277
278 1 3 3.0 0.0 return self
MultinomialNB-newsgroups¶
Benchmark setup
from sklearn.naive_bayes import MultinomialNB from deps import load_data kwargs = {} X, y, X_t, y_t = load_data('newsgroups') obj = MultinomialNB(**kwargs)
Benchmark statement
obj.fit(X, y)
Execution time
Memory usage
Additional output
cProfile
312 function calls in 0.575 seconds
Ordered by: cumulative time
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.575 0.575 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/vbench/benchmark.py:286(f)
1 0.000 0.000 0.575 0.575 <f>:1(<module>)
1 0.298 0.298 0.575 0.575 /tmp/vb_sklearn/sklearn/naive_bayes.py:217(fit)
1 0.006 0.006 0.207 0.207 /tmp/vb_sklearn/sklearn/naive_bayes.py:280(_count)
1 0.000 0.000 0.174 0.174 /tmp/vb_sklearn/sklearn/utils/extmath.py:70(safe_sparse_dot)
1 0.000 0.000 0.173 0.173 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:302(__rmul__)
1 0.000 0.000 0.173 0.173 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:229(__mul__)
1 0.000 0.000 0.173 0.173 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:263(_mul_multivector)
1 0.000 0.000 0.159 0.159 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sparsetools/csc.py:216(csc_matvecs)
1 0.159 0.159 0.159 0.159 {_csc.csc_matvecs}
1 0.000 0.000 0.060 0.060 /tmp/vb_sklearn/sklearn/utils/validation.py:70(atleast2d_or_csr)
1 0.000 0.000 0.055 0.055 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/coo.py:281(tocsr)
1 0.000 0.000 0.040 0.040 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sparsetools/coo.py:75(coo_tocsr)
1 0.040 0.040 0.040 0.040 {_coo.coo_tocsr}
4 0.030 0.008 0.030 0.008 {method 'sum' of 'numpy.ndarray' objects}
1 0.000 0.000 0.025 0.025 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:1379(sum)
2 0.015 0.007 0.015 0.007 {numpy.core.multiarray.zeros}
1 0.000 0.000 0.015 0.015 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:567(sum_duplicates)
1 0.000 0.000 0.010 0.010 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sparsetools/csr.py:567(csr_sum_duplicates)
1 0.010 0.010 0.010 0.010 {_csr.csr_sum_duplicates}
1 0.000 0.000 0.008 0.008 /tmp/vb_sklearn/sklearn/base.py:332(fit_transform)
1 0.005 0.005 0.007 0.007 /tmp/vb_sklearn/sklearn/preprocessing.py:719(transform)
1 0.000 0.000 0.005 0.005 /tmp/vb_sklearn/sklearn/utils/validation.py:10(assert_all_finite)
1 0.000 0.000 0.005 0.005 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:613(sort_indices)
1 0.000 0.000 0.005 0.005 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:581(__get_sorted)
1 0.000 0.000 0.005 0.005 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:310(sum)
1 0.000 0.000 0.005 0.005 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sparsetools/csr.py:85(csr_has_sorted_indices)
1 0.005 0.005 0.005 0.005 {_csr.csr_has_sorted_indices}
1 0.000 0.000 0.003 0.003 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:1508(any)
1 0.003 0.003 0.003 0.003 {method 'any' of 'numpy.ndarray' objects}
1 0.000 0.000 0.001 0.001 /tmp/vb_sklearn/sklearn/preprocessing.py:695(fit)
1 0.000 0.000 0.001 0.001 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/lib/arraysetops.py:90(unique)
1 0.001 0.001 0.001 0.001 {method 'sort' of 'numpy.ndarray' objects}
2 0.000 0.000 0.001 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:20(__init__)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:101(check_format)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/csr.py:107(transpose)
3 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:622(prune)
7 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:553(isspmatrix)
9 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:116(_isinstance)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:18(upcast)
2 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/preprocessing.py:508(_is_multilabel)
19 0.000 0.000 0.000 0.000 {isinstance}
18 0.000 0.000 0.000 0.000 {numpy.core.multiarray.array}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/abc.py:128(__instancecheck__)
1 0.000 0.000 0.000 0.000 {method 'flatten' of 'numpy.ndarray' objects}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:77(isscalarlike)
4 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/coo.py:194(getnnz)
1 0.000 0.000 0.000 0.000 {numpy.core.multiarray.concatenate}
3 0.000 0.000 0.000 0.000 {numpy.core.multiarray.empty}
8 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:167(asarray)
18 0.000 0.000 0.000 0.000 {method 'split' of 'str' objects}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:96(isshape)
5 0.000 0.000 0.000 0.000 {hasattr}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/data.py:17(__init__)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:50(to_native)
7 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:370(__getattr__)
4 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/_weakrefset.py:68(__contains__)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:124(isdense)
45 0.000 0.000 0.000 0.000 {len}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:59(set_shape)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:1574(isscalar)
20 0.000 0.000 0.000 0.000 {numpy.core.multiarray.can_cast}
18 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:2116(rank)
12 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:85(getnnz)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:51(__init__)
3 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/preprocessing.py:504(_is_label_indicator_matrix)
14 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:81(get_shape)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:237(asanyarray)
5 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/csr.py:180(_swap)
1 0.000 0.000 0.000 0.000 {method 'reshape' of 'numpy.ndarray' objects}
1 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/preprocessing.py:684(__init__)
2 0.000 0.000 0.000 0.000 {method 'transpose' of 'numpy.ndarray' objects}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:54(getdtype)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:173(__len__)
3 0.000 0.000 0.000 0.000 {getattr}
1 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/preprocessing.py:691(_check_fitted)
2 0.000 0.000 0.000 0.000 {method 'newbyteorder' of 'numpy.dtype' objects}
3 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/data.py:20(_get_dtype)
2 0.000 0.000 0.000 0.000 {method 'ravel' of 'numpy.ndarray' objects}
3 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/csc.py:173(_swap)
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
LineProfiler
Timer unit: 1e-06 s
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: predict at line 46
Total time: 0 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
46 def predict(self, X):
47 """
48 Perform classification on an array of test vectors X.
49
50 Parameters
51 ----------
52 X : array-like, shape = [n_samples, n_features]
53
54 Returns
55 -------
56 C : array, shape = [n_samples]
57 Predicted target values for X
58 """
59 jll = self._joint_log_likelihood(X)
60 return self.classes_[np.argmax(jll, axis=1)]
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: fit at line 217
Total time: 0.632248 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
217 def fit(self, X, y, sample_weight=None, class_prior=None):
218 """Fit Naive Bayes classifier according to X, y
219
220 Parameters
221 ----------
222 X : {array-like, sparse matrix}, shape = [n_samples, n_features]
223 Training vectors, where n_samples is the number of samples and
224 n_features is the number of features.
225
226 y : array-like, shape = [n_samples]
227 Target values.
228
229 sample_weight : array-like, shape = [n_samples], optional
230 Weights applied to individual samples (1. for unweighted).
231
232 class_prior : array, shape [n_classes]
233 Custom prior probability per class.
234 Overrides the fit_prior parameter.
235
236 Returns
237 -------
238 self : object
239 Returns self.
240 """
241 1 34970 34970.0 5.5 X = atleast2d_or_csr(X)
242
243 1 13 13.0 0.0 labelbin = LabelBinarizer()
244 1 4543 4543.0 0.7 Y = labelbin.fit_transform(y)
245 1 2 2.0 0.0 self.classes_ = labelbin.classes_
246 1 3 3.0 0.0 n_classes = len(self.classes_)
247 1 2 2.0 0.0 if Y.shape[1] == 1:
248 Y = np.concatenate((1 - Y, Y), axis=1)
249
250 1 7 7.0 0.0 if X.shape[0] != Y.shape[0]:
251 msg = "X and y have incompatible shapes."
252 if issparse(X):
253 msg += "\nNote: Sparse matrices cannot be indexed w/ boolean \
254 masks (use `indices=True` in CV)."
255 raise ValueError(msg)
256
257 1 2 2.0 0.0 if sample_weight is not None:
258 Y *= array2d(sample_weight).T
259
260 1 1 1.0 0.0 if class_prior:
261 if len(class_prior) != n_classes:
262 raise ValueError(
263 "Number of priors must match number of classes")
264 self.class_log_prior_ = np.log(class_prior)
265 1 2 2.0 0.0 elif self.fit_prior:
266 # empirical prior, with sample_weight taken into account
267 1 680 680.0 0.1 y_freq = Y.sum(axis=0)
268 1 38 38.0 0.0 self.class_log_prior_ = np.log(y_freq) - np.log(y_freq.sum())
269 else:
270 self.class_log_prior_ = np.zeros(n_classes) - np.log(n_classes)
271
272 1 248547 248547.0 39.3 N_c, N_c_i = self._count(X, Y)
273
274 1 335076 335076.0 53.0 self.feature_log_prob_ = (np.log(N_c_i + self.alpha)
275 1 16 16.0 0.0 - np.log(N_c.reshape(-1, 1)
276 1 8343 8343.0 1.3 + self.alpha * X.shape[1]))
277
278 1 3 3.0 0.0 return self
Benchmark statement
obj.predict(X_t)
Execution time
Memory usage
Additional output
cProfile
184 function calls in 0.087 seconds
Ordered by: cumulative time
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.087 0.087 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/vbench/benchmark.py:286(f)
1 0.000 0.000 0.087 0.087 <f>:1(<module>)
1 0.000 0.000 0.087 0.087 /tmp/vb_sklearn/sklearn/naive_bayes.py:46(predict)
1 0.000 0.000 0.086 0.086 /tmp/vb_sklearn/sklearn/naive_bayes.py:363(_joint_log_likelihood)
1 0.000 0.000 0.060 0.060 /tmp/vb_sklearn/sklearn/utils/extmath.py:70(safe_sparse_dot)
1 0.000 0.000 0.060 0.060 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:229(__mul__)
1 0.000 0.000 0.060 0.060 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:263(_mul_multivector)
1 0.000 0.000 0.060 0.060 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sparsetools/csr.py:320(csr_matvecs)
1 0.060 0.060 0.060 0.060 {_csr.csr_matvecs}
1 0.000 0.000 0.026 0.026 /tmp/vb_sklearn/sklearn/utils/validation.py:70(atleast2d_or_csr)
1 0.000 0.000 0.024 0.024 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/coo.py:281(tocsr)
1 0.000 0.000 0.018 0.018 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sparsetools/coo.py:75(coo_tocsr)
1 0.018 0.018 0.018 0.018 {_coo.coo_tocsr}
1 0.000 0.000 0.005 0.005 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:567(sum_duplicates)
1 0.000 0.000 0.003 0.003 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sparsetools/csr.py:567(csr_sum_duplicates)
1 0.003 0.003 0.003 0.003 {_csr.csr_sum_duplicates}
1 0.000 0.000 0.002 0.002 /tmp/vb_sklearn/sklearn/utils/validation.py:10(assert_all_finite)
1 0.000 0.000 0.002 0.002 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:310(sum)
1 0.002 0.002 0.002 0.002 {method 'sum' of 'numpy.ndarray' objects}
1 0.000 0.000 0.002 0.002 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:613(sort_indices)
1 0.000 0.000 0.002 0.002 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:581(__get_sorted)
1 0.000 0.000 0.002 0.002 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sparsetools/csr.py:85(csr_has_sorted_indices)
1 0.002 0.002 0.002 0.002 {_csr.csr_has_sorted_indices}
1 0.000 0.000 0.001 0.001 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:683(argmax)
1 0.001 0.001 0.001 0.001 {method 'argmax' of 'numpy.ndarray' objects}
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:20(__init__)
1 0.000 0.000 0.000 0.000 {numpy.core.multiarray.zeros}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:18(upcast)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:101(check_format)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:622(prune)
4 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:553(isspmatrix)
11 0.000 0.000 0.000 0.000 {numpy.core.multiarray.array}
5 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:116(_isinstance)
4 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/coo.py:194(getnnz)
3 0.000 0.000 0.000 0.000 {numpy.core.multiarray.empty}
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/data.py:17(__init__)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:77(isscalarlike)
20 0.000 0.000 0.000 0.000 {numpy.core.multiarray.can_cast}
8 0.000 0.000 0.000 0.000 {isinstance}
26 0.000 0.000 0.000 0.000 {len}
15 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:2116(rank)
10 0.000 0.000 0.000 0.000 {method 'split' of 'str' objects}
4 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:167(asarray)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:96(isshape)
1 0.000 0.000 0.000 0.000 {hasattr}
8 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/compressed.py:85(getnnz)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:51(__init__)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:124(isdense)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:50(to_native)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:59(set_shape)
9 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:81(get_shape)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:237(asanyarray)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:1574(isscalar)
5 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/csr.py:180(_swap)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:370(__getattr__)
3 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/data.py:20(_get_dtype)
1 0.000 0.000 0.000 0.000 {method 'newbyteorder' of 'numpy.dtype' objects}
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:54(getdtype)
2 0.000 0.000 0.000 0.000 {method 'ravel' of 'numpy.ndarray' objects}
1 0.000 0.000 0.000 0.000 {getattr}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
LineProfiler
Timer unit: 1e-06 s
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: predict at line 46
Total time: 0.189377 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
46 def predict(self, X):
47 """
48 Perform classification on an array of test vectors X.
49
50 Parameters
51 ----------
52 X : array-like, shape = [n_samples, n_features]
53
54 Returns
55 -------
56 C : array, shape = [n_samples]
57 Predicted target values for X
58 """
59 1 188362 188362.0 99.5 jll = self._joint_log_likelihood(X)
60 1 1015 1015.0 0.5 return self.classes_[np.argmax(jll, axis=1)]
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: fit at line 217
Total time: 0.632248 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
217 def fit(self, X, y, sample_weight=None, class_prior=None):
218 """Fit Naive Bayes classifier according to X, y
219
220 Parameters
221 ----------
222 X : {array-like, sparse matrix}, shape = [n_samples, n_features]
223 Training vectors, where n_samples is the number of samples and
224 n_features is the number of features.
225
226 y : array-like, shape = [n_samples]
227 Target values.
228
229 sample_weight : array-like, shape = [n_samples], optional
230 Weights applied to individual samples (1. for unweighted).
231
232 class_prior : array, shape [n_classes]
233 Custom prior probability per class.
234 Overrides the fit_prior parameter.
235
236 Returns
237 -------
238 self : object
239 Returns self.
240 """
241 1 34970 34970.0 5.5 X = atleast2d_or_csr(X)
242
243 1 13 13.0 0.0 labelbin = LabelBinarizer()
244 1 4543 4543.0 0.7 Y = labelbin.fit_transform(y)
245 1 2 2.0 0.0 self.classes_ = labelbin.classes_
246 1 3 3.0 0.0 n_classes = len(self.classes_)
247 1 2 2.0 0.0 if Y.shape[1] == 1:
248 Y = np.concatenate((1 - Y, Y), axis=1)
249
250 1 7 7.0 0.0 if X.shape[0] != Y.shape[0]:
251 msg = "X and y have incompatible shapes."
252 if issparse(X):
253 msg += "\nNote: Sparse matrices cannot be indexed w/ boolean \
254 masks (use `indices=True` in CV)."
255 raise ValueError(msg)
256
257 1 2 2.0 0.0 if sample_weight is not None:
258 Y *= array2d(sample_weight).T
259
260 1 1 1.0 0.0 if class_prior:
261 if len(class_prior) != n_classes:
262 raise ValueError(
263 "Number of priors must match number of classes")
264 self.class_log_prior_ = np.log(class_prior)
265 1 2 2.0 0.0 elif self.fit_prior:
266 # empirical prior, with sample_weight taken into account
267 1 680 680.0 0.1 y_freq = Y.sum(axis=0)
268 1 38 38.0 0.0 self.class_log_prior_ = np.log(y_freq) - np.log(y_freq.sum())
269 else:
270 self.class_log_prior_ = np.zeros(n_classes) - np.log(n_classes)
271
272 1 248547 248547.0 39.3 N_c, N_c_i = self._count(X, Y)
273
274 1 335076 335076.0 53.0 self.feature_log_prob_ = (np.log(N_c_i + self.alpha)
275 1 16 16.0 0.0 - np.log(N_c.reshape(-1, 1)
276 1 8343 8343.0 1.3 + self.alpha * X.shape[1]))
277
278 1 3 3.0 0.0 return self
MultinomialNB-madelon¶
Benchmark setup
from sklearn.naive_bayes import MultinomialNB from deps import load_data kwargs = {} X, y, X_t, y_t = load_data('madelon') obj = MultinomialNB(**kwargs)
Benchmark statement
obj.fit(X, y)
Execution time
Memory usage
Additional output
cProfile
91 function calls in 0.014 seconds
Ordered by: cumulative time
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.014 0.014 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/vbench/benchmark.py:286(f)
1 0.000 0.000 0.014 0.014 <f>:1(<module>)
1 0.000 0.000 0.014 0.014 /tmp/vb_sklearn/sklearn/naive_bayes.py:217(fit)
1 0.003 0.003 0.010 0.010 /tmp/vb_sklearn/sklearn/naive_bayes.py:280(_count)
1 0.000 0.000 0.005 0.005 /tmp/vb_sklearn/sklearn/utils/extmath.py:70(safe_sparse_dot)
1 0.005 0.005 0.005 0.005 {numpy.core._dotblas.dot}
1 0.000 0.000 0.003 0.003 /tmp/vb_sklearn/sklearn/utils/validation.py:70(atleast2d_or_csr)
4 0.003 0.001 0.003 0.001 {method 'sum' of 'numpy.ndarray' objects}
1 0.000 0.000 0.003 0.003 /tmp/vb_sklearn/sklearn/utils/validation.py:10(assert_all_finite)
1 0.000 0.000 0.002 0.002 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:1508(any)
1 0.002 0.002 0.002 0.002 {method 'any' of 'numpy.ndarray' objects}
1 0.000 0.000 0.001 0.001 /tmp/vb_sklearn/sklearn/base.py:332(fit_transform)
1 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/preprocessing.py:695(fit)
1 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/preprocessing.py:719(transform)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/lib/arraysetops.py:90(unique)
2 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/preprocessing.py:508(_is_multilabel)
5 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:553(isspmatrix)
2 0.000 0.000 0.000 0.000 {numpy.core.multiarray.concatenate}
10 0.000 0.000 0.000 0.000 {isinstance}
5 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:116(_isinstance)
1 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/utils/validation.py:62(array2d)
1 0.000 0.000 0.000 0.000 {method 'sort' of 'numpy.ndarray' objects}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/abc.py:128(__instancecheck__)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/shape_base.py:58(atleast_2d)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:1379(sum)
4 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/_weakrefset.py:68(__contains__)
10 0.000 0.000 0.000 0.000 {method 'split' of 'str' objects}
1 0.000 0.000 0.000 0.000 {numpy.core.multiarray.zeros}
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:237(asanyarray)
1 0.000 0.000 0.000 0.000 {method 'flatten' of 'numpy.ndarray' objects}
2 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/preprocessing.py:504(_is_label_indicator_matrix)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:167(asarray)
3 0.000 0.000 0.000 0.000 {numpy.core.multiarray.array}
1 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/preprocessing.py:684(__init__)
1 0.000 0.000 0.000 0.000 {method 'reshape' of 'numpy.ndarray' objects}
9 0.000 0.000 0.000 0.000 {len}
3 0.000 0.000 0.000 0.000 {hasattr}
2 0.000 0.000 0.000 0.000 {getattr}
1 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/preprocessing.py:691(_check_fitted)
1 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
LineProfiler
Timer unit: 1e-06 s
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: predict at line 46
Total time: 0 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
46 def predict(self, X):
47 """
48 Perform classification on an array of test vectors X.
49
50 Parameters
51 ----------
52 X : array-like, shape = [n_samples, n_features]
53
54 Returns
55 -------
56 C : array, shape = [n_samples]
57 Predicted target values for X
58 """
59 jll = self._joint_log_likelihood(X)
60 return self.classes_[np.argmax(jll, axis=1)]
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: fit at line 217
Total time: 0.013302 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
217 def fit(self, X, y, sample_weight=None, class_prior=None):
218 """Fit Naive Bayes classifier according to X, y
219
220 Parameters
221 ----------
222 X : {array-like, sparse matrix}, shape = [n_samples, n_features]
223 Training vectors, where n_samples is the number of samples and
224 n_features is the number of features.
225
226 y : array-like, shape = [n_samples]
227 Target values.
228
229 sample_weight : array-like, shape = [n_samples], optional
230 Weights applied to individual samples (1. for unweighted).
231
232 class_prior : array, shape [n_classes]
233 Custom prior probability per class.
234 Overrides the fit_prior parameter.
235
236 Returns
237 -------
238 self : object
239 Returns self.
240 """
241 1 2834 2834.0 21.3 X = atleast2d_or_csr(X)
242
243 1 16 16.0 0.1 labelbin = LabelBinarizer()
244 1 587 587.0 4.4 Y = labelbin.fit_transform(y)
245 1 4 4.0 0.0 self.classes_ = labelbin.classes_
246 1 3 3.0 0.0 n_classes = len(self.classes_)
247 1 4 4.0 0.0 if Y.shape[1] == 1:
248 1 57 57.0 0.4 Y = np.concatenate((1 - Y, Y), axis=1)
249
250 1 5 5.0 0.0 if X.shape[0] != Y.shape[0]:
251 msg = "X and y have incompatible shapes."
252 if issparse(X):
253 msg += "\nNote: Sparse matrices cannot be indexed w/ boolean \
254 masks (use `indices=True` in CV)."
255 raise ValueError(msg)
256
257 1 3 3.0 0.0 if sample_weight is not None:
258 Y *= array2d(sample_weight).T
259
260 1 2 2.0 0.0 if class_prior:
261 if len(class_prior) != n_classes:
262 raise ValueError(
263 "Number of priors must match number of classes")
264 self.class_log_prior_ = np.log(class_prior)
265 1 3 3.0 0.0 elif self.fit_prior:
266 # empirical prior, with sample_weight taken into account
267 1 35 35.0 0.3 y_freq = Y.sum(axis=0)
268 1 47 47.0 0.4 self.class_log_prior_ = np.log(y_freq) - np.log(y_freq.sum())
269 else:
270 self.class_log_prior_ = np.zeros(n_classes) - np.log(n_classes)
271
272 1 9475 9475.0 71.2 N_c, N_c_i = self._count(X, Y)
273
274 1 165 165.0 1.2 self.feature_log_prob_ = (np.log(N_c_i + self.alpha)
275 1 9 9.0 0.1 - np.log(N_c.reshape(-1, 1)
276 1 50 50.0 0.4 + self.alpha * X.shape[1]))
277
278 1 3 3.0 0.0 return self
Benchmark statement
obj.predict(X_t)
Execution time
Memory usage
Additional output
cProfile
42 function calls in 0.003 seconds
Ordered by: cumulative time
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.003 0.003 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/vbench/benchmark.py:286(f)
1 0.000 0.000 0.003 0.003 <f>:1(<module>)
1 0.000 0.000 0.003 0.003 /tmp/vb_sklearn/sklearn/naive_bayes.py:46(predict)
1 0.000 0.000 0.003 0.003 /tmp/vb_sklearn/sklearn/naive_bayes.py:363(_joint_log_likelihood)
1 0.000 0.000 0.002 0.002 /tmp/vb_sklearn/sklearn/utils/extmath.py:70(safe_sparse_dot)
1 0.002 0.002 0.002 0.002 {numpy.core._dotblas.dot}
1 0.000 0.000 0.001 0.001 /tmp/vb_sklearn/sklearn/utils/validation.py:70(atleast2d_or_csr)
1 0.000 0.000 0.001 0.001 /tmp/vb_sklearn/sklearn/utils/validation.py:10(assert_all_finite)
1 0.001 0.001 0.001 0.001 {method 'sum' of 'numpy.ndarray' objects}
4 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:553(isspmatrix)
4 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:116(_isinstance)
1 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/utils/validation.py:62(array2d)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:683(argmax)
1 0.000 0.000 0.000 0.000 {method 'argmax' of 'numpy.ndarray' objects}
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/shape_base.py:58(atleast_2d)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:237(asanyarray)
8 0.000 0.000 0.000 0.000 {method 'split' of 'str' objects}
2 0.000 0.000 0.000 0.000 {numpy.core.multiarray.array}
4 0.000 0.000 0.000 0.000 {isinstance}
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:167(asarray)
3 0.000 0.000 0.000 0.000 {len}
1 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
LineProfiler
Timer unit: 1e-06 s
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: predict at line 46
Total time: 0.003145 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
46 def predict(self, X):
47 """
48 Perform classification on an array of test vectors X.
49
50 Parameters
51 ----------
52 X : array-like, shape = [n_samples, n_features]
53
54 Returns
55 -------
56 C : array, shape = [n_samples]
57 Predicted target values for X
58 """
59 1 3085 3085.0 98.1 jll = self._joint_log_likelihood(X)
60 1 60 60.0 1.9 return self.classes_[np.argmax(jll, axis=1)]
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: fit at line 217
Total time: 0.013302 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
217 def fit(self, X, y, sample_weight=None, class_prior=None):
218 """Fit Naive Bayes classifier according to X, y
219
220 Parameters
221 ----------
222 X : {array-like, sparse matrix}, shape = [n_samples, n_features]
223 Training vectors, where n_samples is the number of samples and
224 n_features is the number of features.
225
226 y : array-like, shape = [n_samples]
227 Target values.
228
229 sample_weight : array-like, shape = [n_samples], optional
230 Weights applied to individual samples (1. for unweighted).
231
232 class_prior : array, shape [n_classes]
233 Custom prior probability per class.
234 Overrides the fit_prior parameter.
235
236 Returns
237 -------
238 self : object
239 Returns self.
240 """
241 1 2834 2834.0 21.3 X = atleast2d_or_csr(X)
242
243 1 16 16.0 0.1 labelbin = LabelBinarizer()
244 1 587 587.0 4.4 Y = labelbin.fit_transform(y)
245 1 4 4.0 0.0 self.classes_ = labelbin.classes_
246 1 3 3.0 0.0 n_classes = len(self.classes_)
247 1 4 4.0 0.0 if Y.shape[1] == 1:
248 1 57 57.0 0.4 Y = np.concatenate((1 - Y, Y), axis=1)
249
250 1 5 5.0 0.0 if X.shape[0] != Y.shape[0]:
251 msg = "X and y have incompatible shapes."
252 if issparse(X):
253 msg += "\nNote: Sparse matrices cannot be indexed w/ boolean \
254 masks (use `indices=True` in CV)."
255 raise ValueError(msg)
256
257 1 3 3.0 0.0 if sample_weight is not None:
258 Y *= array2d(sample_weight).T
259
260 1 2 2.0 0.0 if class_prior:
261 if len(class_prior) != n_classes:
262 raise ValueError(
263 "Number of priors must match number of classes")
264 self.class_log_prior_ = np.log(class_prior)
265 1 3 3.0 0.0 elif self.fit_prior:
266 # empirical prior, with sample_weight taken into account
267 1 35 35.0 0.3 y_freq = Y.sum(axis=0)
268 1 47 47.0 0.4 self.class_log_prior_ = np.log(y_freq) - np.log(y_freq.sum())
269 else:
270 self.class_log_prior_ = np.zeros(n_classes) - np.log(n_classes)
271
272 1 9475 9475.0 71.2 N_c, N_c_i = self._count(X, Y)
273
274 1 165 165.0 1.2 self.feature_log_prob_ = (np.log(N_c_i + self.alpha)
275 1 9 9.0 0.1 - np.log(N_c.reshape(-1, 1)
276 1 50 50.0 0.4 + self.alpha * X.shape[1]))
277
278 1 3 3.0 0.0 return self
GaussianNB-arcene¶
Benchmark setup
from sklearn.naive_bayes import GaussianNB from deps import load_data kwargs = {} X, y, X_t, y_t = load_data('arcene') obj = GaussianNB(**kwargs)
Benchmark statement
obj.fit(X, y)
Execution time
Memory usage
Additional output
cProfile
56 function calls in 0.077 seconds
Ordered by: cumulative time
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.077 0.077 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/vbench/benchmark.py:286(f)
1 0.000 0.000 0.077 0.077 <f>:1(<module>)
1 0.063 0.063 0.077 0.077 /tmp/vb_sklearn/sklearn/naive_bayes.py:135(fit)
2 0.000 0.000 0.011 0.005 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:2470(var)
2 0.011 0.005 0.011 0.005 {method 'var' of 'numpy.ndarray' objects}
2 0.000 0.000 0.002 0.001 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:2299(mean)
2 0.002 0.001 0.002 0.001 {method 'mean' of 'numpy.ndarray' objects}
3 0.000 0.000 0.000 0.000 {numpy.core.multiarray.zeros}
1 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/utils/validation.py:94(check_arrays)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/lib/arraysetops.py:90(unique)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:1379(sum)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:553(isspmatrix)
2 0.000 0.000 0.000 0.000 {method 'sum' of 'numpy.ndarray' objects}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:116(_isinstance)
1 0.000 0.000 0.000 0.000 {numpy.core.multiarray.concatenate}
3 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/utils/validation.py:87(_num_samples)
4 0.000 0.000 0.000 0.000 {isinstance}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:167(asarray)
6 0.000 0.000 0.000 0.000 {hasattr}
1 0.000 0.000 0.000 0.000 {method 'flatten' of 'numpy.ndarray' objects}
1 0.000 0.000 0.000 0.000 {method 'sort' of 'numpy.ndarray' objects}
2 0.000 0.000 0.000 0.000 {numpy.core.multiarray.array}
4 0.000 0.000 0.000 0.000 {method 'pop' of 'dict' objects}
4 0.000 0.000 0.000 0.000 {method 'split' of 'str' objects}
1 0.000 0.000 0.000 0.000 {len}
2 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
LineProfiler
Timer unit: 1e-06 s
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: predict at line 46
Total time: 0 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
46 def predict(self, X):
47 """
48 Perform classification on an array of test vectors X.
49
50 Parameters
51 ----------
52 X : array-like, shape = [n_samples, n_features]
53
54 Returns
55 -------
56 C : array, shape = [n_samples]
57 Predicted target values for X
58 """
59 jll = self._joint_log_likelihood(X)
60 return self.classes_[np.argmax(jll, axis=1)]
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: fit at line 135
Total time: 0.073624 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
135 def fit(self, X, y):
136 """Fit Gaussian Naive Bayes according to X, y
137
138 Parameters
139 ----------
140 X : array-like, shape = [n_samples, n_features]
141 Training vectors, where n_samples is the number of samples
142 and n_features is the number of features.
143
144 y : array-like, shape = [n_samples]
145 Target values.
146
147 Returns
148 -------
149 self : object
150 Returns self.
151 """
152
153 1 77 77.0 0.1 X, y = check_arrays(X, y, sparse_format='dense')
154
155 1 2 2.0 0.0 n_samples, n_features = X.shape
156
157 1 2 2.0 0.0 if n_samples != y.shape[0]:
158 raise ValueError("X and y have incompatible shapes")
159
160 1 59 59.0 0.1 self.classes_ = unique_y = np.unique(y)
161 1 3 3.0 0.0 n_classes = unique_y.shape[0]
162
163 1 23 23.0 0.0 self.theta_ = np.zeros((n_classes, n_features))
164 1 19 19.0 0.0 self.sigma_ = np.zeros((n_classes, n_features))
165 1 6 6.0 0.0 self.class_prior_ = np.zeros(n_classes)
166 1 2 2.0 0.0 epsilon = 1e-9
167 3 56 18.7 0.1 for i, y_i in enumerate(unique_y):
168 2 33507 16753.5 45.5 self.theta_[i, :] = np.mean(X[y == y_i, :], axis=0)
169 2 39765 19882.5 54.0 self.sigma_[i, :] = np.var(X[y == y_i, :], axis=0) + epsilon
170 2 101 50.5 0.1 self.class_prior_[i] = np.float(np.sum(y == y_i)) / n_samples
171 1 2 2.0 0.0 return self
Benchmark statement
obj.predict(X_t)
Execution time
Memory usage
Additional output
cProfile
38 function calls in 0.036 seconds
Ordered by: cumulative time
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.036 0.036 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/vbench/benchmark.py:286(f)
1 0.000 0.000 0.036 0.036 <f>:1(<module>)
1 0.000 0.000 0.036 0.036 /tmp/vb_sklearn/sklearn/naive_bayes.py:46(predict)
1 0.033 0.033 0.036 0.036 /tmp/vb_sklearn/sklearn/naive_bayes.py:173(_joint_log_likelihood)
4 0.000 0.000 0.003 0.001 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:1379(sum)
4 0.003 0.001 0.003 0.001 {method 'sum' of 'numpy.ndarray' objects}
3 0.000 0.000 0.000 0.000 {numpy.core.multiarray.array}
1 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/utils/validation.py:62(array2d)
5 0.000 0.000 0.000 0.000 {isinstance}
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:683(argmax)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:553(isspmatrix)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/shape_base.py:58(atleast_2d)
1 0.000 0.000 0.000 0.000 {method 'argmax' of 'numpy.ndarray' objects}
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:116(_isinstance)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:237(asanyarray)
3 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}
2 0.000 0.000 0.000 0.000 {method 'split' of 'str' objects}
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:167(asarray)
3 0.000 0.000 0.000 0.000 {len}
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:2162(size)
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
LineProfiler
Timer unit: 1e-06 s
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: predict at line 46
Total time: 0.035567 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
46 def predict(self, X):
47 """
48 Perform classification on an array of test vectors X.
49
50 Parameters
51 ----------
52 X : array-like, shape = [n_samples, n_features]
53
54 Returns
55 -------
56 C : array, shape = [n_samples]
57 Predicted target values for X
58 """
59 1 35524 35524.0 99.9 jll = self._joint_log_likelihood(X)
60 1 43 43.0 0.1 return self.classes_[np.argmax(jll, axis=1)]
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: fit at line 135
Total time: 0.073624 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
135 def fit(self, X, y):
136 """Fit Gaussian Naive Bayes according to X, y
137
138 Parameters
139 ----------
140 X : array-like, shape = [n_samples, n_features]
141 Training vectors, where n_samples is the number of samples
142 and n_features is the number of features.
143
144 y : array-like, shape = [n_samples]
145 Target values.
146
147 Returns
148 -------
149 self : object
150 Returns self.
151 """
152
153 1 77 77.0 0.1 X, y = check_arrays(X, y, sparse_format='dense')
154
155 1 2 2.0 0.0 n_samples, n_features = X.shape
156
157 1 2 2.0 0.0 if n_samples != y.shape[0]:
158 raise ValueError("X and y have incompatible shapes")
159
160 1 59 59.0 0.1 self.classes_ = unique_y = np.unique(y)
161 1 3 3.0 0.0 n_classes = unique_y.shape[0]
162
163 1 23 23.0 0.0 self.theta_ = np.zeros((n_classes, n_features))
164 1 19 19.0 0.0 self.sigma_ = np.zeros((n_classes, n_features))
165 1 6 6.0 0.0 self.class_prior_ = np.zeros(n_classes)
166 1 2 2.0 0.0 epsilon = 1e-9
167 3 56 18.7 0.1 for i, y_i in enumerate(unique_y):
168 2 33507 16753.5 45.5 self.theta_[i, :] = np.mean(X[y == y_i, :], axis=0)
169 2 39765 19882.5 54.0 self.sigma_[i, :] = np.var(X[y == y_i, :], axis=0) + epsilon
170 2 101 50.5 0.1 self.class_prior_[i] = np.float(np.sum(y == y_i)) / n_samples
171 1 2 2.0 0.0 return self
GaussianNB-madelon¶
Benchmark setup
from sklearn.naive_bayes import GaussianNB from deps import load_data kwargs = {} X, y, X_t, y_t = load_data('madelon') obj = GaussianNB(**kwargs)
Benchmark statement
obj.fit(X, y)
Execution time
Memory usage
Additional output
cProfile
56 function calls in 0.217 seconds
Ordered by: cumulative time
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.217 0.217 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/vbench/benchmark.py:286(f)
1 0.000 0.000 0.217 0.217 <f>:1(<module>)
1 0.114 0.114 0.217 0.217 /tmp/vb_sklearn/sklearn/naive_bayes.py:135(fit)
2 0.000 0.000 0.072 0.036 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:2470(var)
2 0.072 0.036 0.072 0.036 {method 'var' of 'numpy.ndarray' objects}
2 0.000 0.000 0.031 0.015 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:2299(mean)
2 0.031 0.015 0.031 0.015 {method 'mean' of 'numpy.ndarray' objects}
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/lib/arraysetops.py:90(unique)
1 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/utils/validation.py:94(check_arrays)
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:1379(sum)
1 0.000 0.000 0.000 0.000 {method 'sort' of 'numpy.ndarray' objects}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:553(isspmatrix)
2 0.000 0.000 0.000 0.000 {method 'sum' of 'numpy.ndarray' objects}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:116(_isinstance)
1 0.000 0.000 0.000 0.000 {numpy.core.multiarray.concatenate}
3 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/utils/validation.py:87(_num_samples)
3 0.000 0.000 0.000 0.000 {numpy.core.multiarray.zeros}
4 0.000 0.000 0.000 0.000 {isinstance}
2 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:167(asarray)
1 0.000 0.000 0.000 0.000 {method 'flatten' of 'numpy.ndarray' objects}
4 0.000 0.000 0.000 0.000 {method 'split' of 'str' objects}
6 0.000 0.000 0.000 0.000 {hasattr}
2 0.000 0.000 0.000 0.000 {numpy.core.multiarray.array}
4 0.000 0.000 0.000 0.000 {method 'pop' of 'dict' objects}
2 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}
1 0.000 0.000 0.000 0.000 {len}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
LineProfiler
Timer unit: 1e-06 s
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: predict at line 46
Total time: 0 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
46 def predict(self, X):
47 """
48 Perform classification on an array of test vectors X.
49
50 Parameters
51 ----------
52 X : array-like, shape = [n_samples, n_features]
53
54 Returns
55 -------
56 C : array, shape = [n_samples]
57 Predicted target values for X
58 """
59 jll = self._joint_log_likelihood(X)
60 return self.classes_[np.argmax(jll, axis=1)]
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: fit at line 135
Total time: 0.211534 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
135 def fit(self, X, y):
136 """Fit Gaussian Naive Bayes according to X, y
137
138 Parameters
139 ----------
140 X : array-like, shape = [n_samples, n_features]
141 Training vectors, where n_samples is the number of samples
142 and n_features is the number of features.
143
144 y : array-like, shape = [n_samples]
145 Target values.
146
147 Returns
148 -------
149 self : object
150 Returns self.
151 """
152
153 1 129 129.0 0.1 X, y = check_arrays(X, y, sparse_format='dense')
154
155 1 3 3.0 0.0 n_samples, n_features = X.shape
156
157 1 3 3.0 0.0 if n_samples != y.shape[0]:
158 raise ValueError("X and y have incompatible shapes")
159
160 1 189 189.0 0.1 self.classes_ = unique_y = np.unique(y)
161 1 4 4.0 0.0 n_classes = unique_y.shape[0]
162
163 1 9 9.0 0.0 self.theta_ = np.zeros((n_classes, n_features))
164 1 6 6.0 0.0 self.sigma_ = np.zeros((n_classes, n_features))
165 1 7 7.0 0.0 self.class_prior_ = np.zeros(n_classes)
166 1 3 3.0 0.0 epsilon = 1e-9
167 3 23 7.7 0.0 for i, y_i in enumerate(unique_y):
168 2 86406 43203.0 40.8 self.theta_[i, :] = np.mean(X[y == y_i, :], axis=0)
169 2 124576 62288.0 58.9 self.sigma_[i, :] = np.var(X[y == y_i, :], axis=0) + epsilon
170 2 173 86.5 0.1 self.class_prior_[i] = np.float(np.sum(y == y_i)) / n_samples
171 1 3 3.0 0.0 return self
Benchmark statement
obj.predict(X_t)
Execution time
Memory usage
Additional output
cProfile
38 function calls in 0.018 seconds
Ordered by: cumulative time
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.018 0.018 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/vbench/benchmark.py:286(f)
1 0.000 0.000 0.018 0.018 <f>:1(<module>)
1 0.000 0.000 0.018 0.018 /tmp/vb_sklearn/sklearn/naive_bayes.py:46(predict)
1 0.016 0.016 0.018 0.018 /tmp/vb_sklearn/sklearn/naive_bayes.py:173(_joint_log_likelihood)
4 0.000 0.000 0.002 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:1379(sum)
4 0.002 0.000 0.002 0.000 {method 'sum' of 'numpy.ndarray' objects}
3 0.000 0.000 0.000 0.000 {numpy.core.multiarray.array}
1 0.000 0.000 0.000 0.000 /tmp/vb_sklearn/sklearn/utils/validation.py:62(array2d)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:683(argmax)
1 0.000 0.000 0.000 0.000 {method 'argmax' of 'numpy.ndarray' objects}
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/base.py:553(isspmatrix)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/shape_base.py:58(atleast_2d)
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/scipy/sparse/sputils.py:116(_isinstance)
5 0.000 0.000 0.000 0.000 {isinstance}
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:237(asanyarray)
2 0.000 0.000 0.000 0.000 {method 'split' of 'str' objects}
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/numeric.py:167(asarray)
3 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}
1 0.000 0.000 0.000 0.000 /home/slave/virtualenvs/cpython-2.7.2/lib/python2.7/site-packages/numpy/core/fromnumeric.py:2162(size)
3 0.000 0.000 0.000 0.000 {len}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
LineProfiler
Timer unit: 1e-06 s
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: predict at line 46
Total time: 0.018545 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
46 def predict(self, X):
47 """
48 Perform classification on an array of test vectors X.
49
50 Parameters
51 ----------
52 X : array-like, shape = [n_samples, n_features]
53
54 Returns
55 -------
56 C : array, shape = [n_samples]
57 Predicted target values for X
58 """
59 1 18468 18468.0 99.6 jll = self._joint_log_likelihood(X)
60 1 77 77.0 0.4 return self.classes_[np.argmax(jll, axis=1)]
File: /tmp/vb_sklearn/sklearn/naive_bayes.py
Function: fit at line 135
Total time: 0.211534 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
135 def fit(self, X, y):
136 """Fit Gaussian Naive Bayes according to X, y
137
138 Parameters
139 ----------
140 X : array-like, shape = [n_samples, n_features]
141 Training vectors, where n_samples is the number of samples
142 and n_features is the number of features.
143
144 y : array-like, shape = [n_samples]
145 Target values.
146
147 Returns
148 -------
149 self : object
150 Returns self.
151 """
152
153 1 129 129.0 0.1 X, y = check_arrays(X, y, sparse_format='dense')
154
155 1 3 3.0 0.0 n_samples, n_features = X.shape
156
157 1 3 3.0 0.0 if n_samples != y.shape[0]:
158 raise ValueError("X and y have incompatible shapes")
159
160 1 189 189.0 0.1 self.classes_ = unique_y = np.unique(y)
161 1 4 4.0 0.0 n_classes = unique_y.shape[0]
162
163 1 9 9.0 0.0 self.theta_ = np.zeros((n_classes, n_features))
164 1 6 6.0 0.0 self.sigma_ = np.zeros((n_classes, n_features))
165 1 7 7.0 0.0 self.class_prior_ = np.zeros(n_classes)
166 1 3 3.0 0.0 epsilon = 1e-9
167 3 23 7.7 0.0 for i, y_i in enumerate(unique_y):
168 2 86406 43203.0 40.8 self.theta_[i, :] = np.mean(X[y == y_i, :], axis=0)
169 2 124576 62288.0 58.9 self.sigma_[i, :] = np.var(X[y == y_i, :], axis=0) + epsilon
170 2 173 86.5 0.1 self.class_prior_[i] = np.float(np.sum(y == y_i)) / n_samples
171 1 3 3.0 0.0 return self