# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import typing try: from paddle.cuda_env import * # noqa: F403 from paddle.version import ( # noqa: F401 commit as __git_commit__, full_version as __version__, ) except ImportError: import sys sys.stderr.write( '''Warning with import paddle: you should not import paddle from the source directory; please install paddlepaddle*.whl firstly.''' ) # NOTE(SigureMo): We should place the import of base.core before other modules, # because there are some initialization codes in base/core/__init__.py. from .base import core # noqa: F401 from .batch import batch # Do the *DUPLICATED* monkey-patch for the tensor object. # We need remove the duplicated code here once we fix # the illogical implement in the monkey-patch methods later. from .framework import monkey_patch_math_tensor, monkey_patch_variable from .pir import monkey_patch_dtype, monkey_patch_program, monkey_patch_value monkey_patch_variable() monkey_patch_math_tensor() monkey_patch_value() monkey_patch_program() monkey_patch_dtype() from .base.dataset import * # noqa: F403 from .framework import ( disable_signal_handler, disable_static, enable_static, get_flags, in_dynamic_mode, set_flags, ) from .framework.dtype import ( bfloat16, bool, complex64, complex128, dtype, finfo, float16, float32, float64, iinfo, int8, int16, int32, int64, uint8, ) if typing.TYPE_CHECKING: from .tensor.tensor import Tensor else: Tensor = framework.core.eager.Tensor Tensor.__qualname__ = 'Tensor' import paddle.distributed.fleet import paddle.text import paddle.vision from paddle import ( # noqa: F401 amp, audio, autograd, dataset, decomposition, device, distributed, distribution, geometric, incubate, inference, io, jit, metric, nn, onnx, optimizer, quantization, reader, regularizer, sparse, static, sysconfig, vision, ) # high-level api from . import ( # noqa: F401 _pir_ops, _typing as _typing, callbacks, fft, hub, linalg, signal, ) from .autograd import ( enable_grad, grad, is_grad_enabled, no_grad, set_grad_enabled, ) from .device import ( # noqa: F401 get_cudnn_version, get_device, is_compiled_with_cinn, is_compiled_with_cuda, is_compiled_with_custom_device, is_compiled_with_distribute, is_compiled_with_ipu, is_compiled_with_rocm, is_compiled_with_xpu, set_device, ) from .distributed import DataParallel from .framework import ( # noqa: F401 CPUPlace, CUDAPinnedPlace, CUDAPlace, CustomPlace, IPUPlace, ParamAttr, XPUPlace, async_save, clear_async_save_task_queue, get_default_dtype, load, save, set_default_dtype, ) from .framework.random import ( get_cuda_rng_state, get_rng_state, seed, set_cuda_rng_state, set_rng_state, ) from .hapi import ( Model, flops, summary, ) from .nn.functional.distance import ( pdist, ) from .nn.initializer.lazy_init import LazyGuard from .tensor.attribute import ( imag, is_complex, is_floating_point, is_integer, rank, real, shape, ) from .tensor.creation import ( arange, assign, cauchy_, clone, complex, create_parameter, diag, diag_embed, diagflat, empty, empty_like, eye, full, full_like, geometric_, linspace, logspace, meshgrid, ones, ones_like, polar, to_tensor, tril, tril_, tril_indices, triu, triu_, triu_indices, zeros, zeros_like, ) from .tensor.einsum import einsum from .tensor.linalg import ( # noqa: F401 bincount, bmm, cdist, cholesky, cross, dist, dot, eigvalsh, histogram, histogramdd, matmul, mv, norm, t, t_, transpose, transpose_, ) from .tensor.logic import ( allclose, bitwise_and, bitwise_and_, bitwise_not, bitwise_not_, bitwise_or, bitwise_or_, bitwise_xor, bitwise_xor_, equal, equal_, equal_all, greater_equal, greater_equal_, greater_than, greater_than_, is_empty, is_tensor, isclose, less_equal, less_equal_, less_than, less_than_, logical_and, logical_and_, logical_not, logical_not_, logical_or, logical_or_, logical_xor, logical_xor_, # noqa: F401 not_equal, not_equal_, # noqa: F401 ) from .tensor.manipulation import ( as_complex, as_real, as_strided, atleast_1d, atleast_2d, atleast_3d, block_diag, broadcast_tensors, broadcast_to, cast, cast_, chunk, column_stack, concat, crop, diagonal_scatter, dsplit, dstack, expand, expand_as, flatten, flatten_, flip, flip as reverse, gather, gather_nd, hsplit, hstack, index_add, index_add_, index_fill, index_fill_, index_put, index_put_, masked_fill, masked_fill_, masked_scatter, masked_scatter_, moveaxis, put_along_axis, repeat_interleave, reshape, reshape_, roll, rot90, row_stack, scatter, scatter_, scatter_nd, scatter_nd_add, select_scatter, shard_index, slice, slice_scatter, split, squeeze, squeeze_, stack, strided_slice, take_along_axis, tensor_split, tensordot, tile, tolist, unbind, unflatten, unfold, unique, unique_consecutive, unsqueeze, unsqueeze_, unstack, view, view_as, vsplit, vstack, ) from .tensor.math import ( # noqa: F401 abs, abs_, acos, acos_, acosh, acosh_, add, add_n, addmm, addmm_, all, amax, amin, angle, any, asin, asin_, asinh, asinh_, atan, atan2, atan_, atanh, atanh_, bitwise_left_shift, bitwise_left_shift_, bitwise_right_shift, bitwise_right_shift_, broadcast_shape, ceil, clip, combinations, conj, copysign, copysign_, cos, cos_, cosh, cosh_, count_nonzero, cummax, cummin, cumprod, cumprod_, cumsum, cumsum_, cumulative_trapezoid, deg2rad, diagonal, diff, digamma, digamma_, divide, divide_, erf, erf_, erfinv, exp, expm1, expm1_, floor, floor_divide, floor_divide_, floor_mod, floor_mod_, fmax, fmin, frac, frac_, frexp, gammainc, gammainc_, gammaincc, gammaincc_, gammaln, gammaln_, gcd, gcd_, heaviside, hypot, hypot_, i0, i0_, i0e, i1, i1e, increment, inner, inverse, isfinite, isin, isinf, isnan, isneginf, isposinf, isreal, kron, lcm, lcm_, ldexp, ldexp_, lerp, lgamma, lgamma_, log, log1p, log1p_, log2, log2_, log10, log10_, log_, logaddexp, logcumsumexp, logit, logit_, logsumexp, max, maximum, min, minimum, mm, mod, mod_, multigammaln, multigammaln_, multiplex, multiply, multiply_, nan_to_num, nan_to_num_, nanmean, nansum, neg, neg_, nextafter, outer, polygamma, polygamma_, pow, pow_, prod, rad2deg, reciprocal, reduce_as, remainder, remainder_, renorm, renorm_, round, rsqrt, scale, sgn, sign, signbit, sin, sin_, sinc, sinc_, sinh, sinh_, sqrt, square, square_, stanh, subtract, sum, take, tan, tan_, tanh, tanh_, trace, trapezoid, trunc, trunc_, vander, ) from .tensor.random import ( bernoulli, bernoulli_, binomial, check_shape, multinomial, normal, normal_, poisson, rand, randint, randint_like, randn, randperm, standard_gamma, standard_normal, uniform, ) from .tensor.search import ( argmax, argmin, argsort, bucketize, index_sample, index_select, kthvalue, masked_select, mode, nonzero, searchsorted, sort, topk, where, where_, ) from .tensor.stat import ( mean, median, nanmedian, nanquantile, numel, quantile, std, var, ) from .tensor.to_string import set_printoptions # CINN has to set a flag to include a lib if is_compiled_with_cinn(): import os package_dir = os.path.dirname(os.path.abspath(__file__)) runtime_include_dir = os.path.join(package_dir, "libs") cuh_file = os.path.join(runtime_include_dir, "cinn_cuda_runtime_source.cuh") if os.path.exists(cuh_file): os.environ.setdefault('runtime_include_dir', runtime_include_dir) if is_compiled_with_cuda(): import os import platform if ( platform.system() == 'Linux' and platform.machine() == 'x86_64' and paddle.version.with_pip_cuda_libraries == 'ON' ): package_dir = os.path.dirname(os.path.abspath(__file__)) nvidia_package_path = package_dir + "/.." + "/nvidia" set_flags({"FLAGS_nvidia_package_dir": nvidia_package_path}) cublas_lib_path = package_dir + "/.." + "/nvidia/cublas/lib" set_flags({"FLAGS_cublas_dir": cublas_lib_path}) cudnn_lib_path = package_dir + "/.." + "/nvidia/cudnn/lib" set_flags({"FLAGS_cudnn_dir": cudnn_lib_path}) curand_lib_path = package_dir + "/.." + "/nvidia/curand/lib" set_flags({"FLAGS_curand_dir": curand_lib_path}) cusolver_lib_path = package_dir + "/.." + "/nvidia/cusolver/lib" set_flags({"FLAGS_cusolver_dir": cusolver_lib_path}) cusparse_lib_path = package_dir + "/.." + "/nvidia/cusparse/lib" set_flags({"FLAGS_cusparse_dir": cusparse_lib_path}) nccl_lib_path = package_dir + "/.." + "/nvidia/nccl/lib" set_flags({"FLAGS_nccl_dir": nccl_lib_path}) cupti_dir_lib_path = package_dir + "/.." + "/nvidia/cuda_cupti/lib" set_flags({"FLAGS_cupti_dir": cupti_dir_lib_path}) elif ( platform.system() == 'Windows' and platform.machine() in ('x86_64', 'AMD64') and paddle.version.with_pip_cuda_libraries == 'ON' ): package_dir = os.path.dirname(os.path.abspath(__file__)) win_cuda_bin_path = package_dir + "\\.." + "\\nvidia" set_flags({"FLAGS_win_cuda_bin_dir": win_cuda_bin_path}) import sys if sys.platform == 'win32': pfiles_path = os.getenv('ProgramFiles', 'C:\\Program Files') py_dll_path = os.path.join(sys.exec_prefix, 'Library', 'bin') th_dll_path = os.path.join(os.path.dirname(__file__), 'libs') site_cuda_base_path = os.path.join( os.path.dirname(__file__), '..', 'nvidia' ) site_cuda_list = [ "cublas", "cuda_nvrtc", "cuda_runtime", "cudnn", "cufft", "curand", "cusolver", "cusparse", "nvjitlink", ] if sys.exec_prefix != sys.base_exec_prefix: base_py_dll_path = os.path.join( sys.base_exec_prefix, 'Library', 'bin' ) else: base_py_dll_path = '' dll_paths = list( filter( os.path.exists, [th_dll_path, py_dll_path, base_py_dll_path] ) ) for site_cuda_package in site_cuda_list: site_cuda_path = os.path.join( site_cuda_base_path, site_cuda_package, 'bin' ) if os.path.exists(site_cuda_path): dll_paths.append(site_cuda_path) import ctypes kernel32 = ctypes.WinDLL('kernel32.dll', use_last_error=True) with_load_library_flags = hasattr(kernel32, 'AddDllDirectory') prev_error_mode = kernel32.SetErrorMode(0x0001) kernel32.LoadLibraryW.restype = ctypes.c_void_p if with_load_library_flags: kernel32.LoadLibraryExW.restype = ctypes.c_void_p for dll_path in dll_paths: os.add_dll_directory(dll_path) try: ctypes.CDLL('vcruntime140.dll') ctypes.CDLL('msvcp140.dll') ctypes.CDLL('vcruntime140_1.dll') except OSError: import logging logging.error( '''Microsoft Visual C++ Redistributable is not installed, this may lead to the DLL load failure. It can be downloaded at https://aka.ms/vs/16/release/vc_redist.x64.exe''' ) import glob dlls = glob.glob(os.path.join(th_dll_path, '*.dll')) for site_cuda_package in site_cuda_list: site_cuda_path = os.path.join( site_cuda_base_path, site_cuda_package, 'bin' ) if os.path.exists(site_cuda_path): dlls.extend( glob.glob(os.path.join(site_cuda_path, '*.dll')) ) # Not load 32 bit dlls in 64 bit python. dlls = [dll for dll in dlls if '32_' not in dll] path_patched = False for dll in dlls: is_loaded = False if with_load_library_flags: res = kernel32.LoadLibraryExW(dll, None, 0x00001100) last_error = ctypes.get_last_error() if res is None and last_error != 126: err = ctypes.WinError(last_error) err.strerror += f' Error loading "{dll}" or one of its dependencies.' raise err elif res is not None: is_loaded = True if not is_loaded: if not path_patched: prev_path = os.environ['PATH'] os.environ['PATH'] = ';'.join( dll_paths + [os.environ['PATH']] ) path_patched = True res = kernel32.LoadLibraryW(dll) if path_patched: os.environ['PATH'] = prev_path if res is None: err = ctypes.WinError(ctypes.get_last_error()) err.strerror += f' Error loading "{dll}" or one of its dependencies.' raise err kernel32.SetErrorMode(prev_error_mode) disable_static() from .pir_utils import IrGuard ir_guard = IrGuard() ir_guard._switch_to_pir() __all__ = [ 'block_diag', 'iinfo', 'finfo', 'dtype', 'uint8', 'int8', 'int16', 'int32', 'int64', 'float16', 'float32', 'float64', 'bfloat16', 'bool', 'complex64', 'complex128', 'addmm', 'addmm_', 'allclose', 'isclose', 't', 't_', 'add', 'subtract', 'diag', 'diagflat', 'diag_embed', 'isnan', 'scatter_nd_add', 'unstack', 'get_default_dtype', 'save', 'multinomial', 'get_cuda_rng_state', 'get_rng_state', 'rank', 'empty_like', 'eye', 'cumsum', 'cumsum_', 'cummax', 'cummin', 'cumprod', 'cumprod_', 'logaddexp', 'logcumsumexp', 'logit', 'logit_', 'LazyGuard', 'sign', 'is_empty', 'equal', 'equal_', 'equal_all', 'is_tensor', 'is_complex', 'is_integer', 'cross', 'where', 'where_', 'log1p', 'cos', 'cos_', 'tan', 'tan_', 'mean', 'mode', 'mv', 'in_dynamic_mode', 'min', 'amin', 'any', 'slice', 'slice_scatter', 'normal', 'normal_', 'logsumexp', 'full', 'unsqueeze', 'unsqueeze_', 'argmax', 'Model', 'summary', 'flops', 'sort', 'searchsorted', 'bucketize', 'split', 'tensor_split', 'hsplit', 'dsplit', 'vsplit', 'logical_and', 'logical_and_', 'full_like', 'less_than', 'less_than_', 'kron', 'clip', 'Tensor', 'crop', 'ParamAttr', 'stanh', 'randint', 'randint_like', 'assign', 'gather', 'scale', 'zeros', 'rsqrt', 'squeeze', 'squeeze_', 'to_tensor', 'gather_nd', 'isin', 'isinf', 'isneginf', 'isposinf', 'isreal', 'uniform', 'floor_divide', 'floor_divide_', 'remainder', 'remainder_', 'floor_mod', 'floor_mod_', 'roll', 'batch', 'max', 'amax', 'logical_or', 'logical_or_', 'bitwise_and', 'bitwise_and_', 'bitwise_or', 'bitwise_or_', 'bitwise_xor', 'bitwise_xor_', 'bitwise_not', 'bitwise_not_', 'mm', 'flip', 'rot90', 'bincount', 'histogram', 'histogramdd', 'multiplex', 'CUDAPlace', 'empty', 'shape', 'real', 'imag', 'is_floating_point', 'complex', 'reciprocal', 'rand', 'less_equal', 'less_equal_', 'triu', 'triu_', 'sin', 'sin_', 'dist', 'cdist', 'pdist', 'unbind', 'meshgrid', 'arange', 'load', 'numel', 'median', 'nanmedian', 'quantile', 'nanquantile', 'no_grad', 'enable_grad', 'set_grad_enabled', 'is_grad_enabled', 'mod', 'mod_', 'abs', 'abs_', 'tril', 'tril_', 'pow', 'pow_', 'zeros_like', 'maximum', 'topk', 'index_select', 'CPUPlace', 'matmul', 'seed', 'acos', 'acos_', 'logical_xor', 'exp', 'expm1', 'expm1_', 'bernoulli', 'bernoulli_', 'binomial', 'poisson', 'standard_gamma', 'sinh', 'sinh_', 'sinc', 'sinc_', 'round', 'DataParallel', 'argmin', 'prod', 'broadcast_shape', 'conj', 'neg', 'neg_', 'lgamma', 'lgamma_', 'gammaincc', 'gammaincc_', 'gammainc', 'gammainc_', 'lerp', 'erfinv', 'inner', 'outer', 'square', 'square_', 'divide', 'divide_', 'gammaln', 'gammaln_', 'ceil', 'atan', 'atan_', 'atan2', 'rad2deg', 'deg2rad', 'gcd', 'gcd_', 'lcm', 'lcm_', 'expand', 'broadcast_to', 'ones_like', 'index_sample', 'cast', 'cast_', 'grad', 'all', 'ones', 'not_equal', 'sum', 'reduce_as', 'nansum', 'nanmean', 'count_nonzero', 'tile', 'greater_equal', 'greater_equal_', 'isfinite', 'create_parameter', 'dot', 'increment', 'erf', 'erf_', 'bmm', 'chunk', 'tolist', 'tensordot', 'greater_than', 'greater_than_', 'shard_index', 'argsort', 'tanh', 'tanh_', 'transpose', 'transpose_', 'cauchy_', 'geometric_', 'randn', 'strided_slice', 'unique', 'unique_consecutive', 'set_cuda_rng_state', 'set_rng_state', 'set_printoptions', 'std', 'flatten', 'flatten_', 'asin', 'multiply', 'multiply_', 'disable_static', 'masked_select', 'var', 'trace', 'enable_static', 'scatter_nd', 'set_default_dtype', 'disable_signal_handler', 'expand_as', 'stack', 'hstack', 'vstack', 'dstack', 'column_stack', 'row_stack', 'sqrt', 'randperm', 'linspace', 'logspace', 'reshape', 'reshape_', 'atleast_1d', 'atleast_2d', 'atleast_3d', 'reverse', 'nonzero', 'CUDAPinnedPlace', 'logical_not', 'logical_not_', 'add_n', 'minimum', 'scatter', 'scatter_', 'floor', 'cosh', 'log', 'log_', 'log2', 'log2_', 'log10', 'log10_', 'concat', 'check_shape', 'trunc', 'trunc_', 'frac', 'frac_', 'digamma', 'digamma_', 'standard_normal', 'diagonal', 'broadcast_tensors', 'einsum', 'set_flags', 'get_flags', 'asinh', 'acosh', 'atanh', 'as_complex', 'as_real', 'diff', 'angle', 'fmax', 'fmin', 'moveaxis', 'repeat_interleave', 'clone', 'kthvalue', 'renorm', 'renorm_', 'take_along_axis', 'put_along_axis', 'select_scatter', 'multigammaln', 'multigammaln_', 'nan_to_num', 'nan_to_num_', 'heaviside', 'tril_indices', 'index_add', "index_add_", "index_put", "index_put_", 'sgn', 'triu_indices', 'take', 'frexp', 'ldexp', 'ldexp_', 'trapezoid', 'cumulative_trapezoid', 'polar', 'vander', 'unflatten', 'as_strided', 'view', 'view_as', 'unfold', 'nextafter', 'i0', 'i0_', 'i0e', 'i1', 'i1e', 'polygamma', 'polygamma_', 'copysign', 'copysign_', 'bitwise_left_shift', 'bitwise_left_shift_', 'bitwise_right_shift', 'bitwise_right_shift_', 'masked_fill', 'masked_fill_', 'masked_scatter', 'masked_scatter_', 'hypot', 'hypot_', 'index_fill', "index_fill_", 'diagonal_scatter', 'combinations', 'signbit', ]