libcudf  24.02.00
error.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2023, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/detail/utilities/stacktrace.hpp>
20 
21 #include <cuda.h>
22 #include <cuda_runtime_api.h>
23 #include <stdexcept>
24 #include <string>
25 #include <type_traits>
26 
27 namespace cudf {
39  // Exclude the current stackframe, as it is this constructor.
40  : _stacktrace{cudf::detail::get_stacktrace(cudf::detail::capture_last_stackframe::NO)}
41  {
42  }
43 
44  public:
50  char const* stacktrace() const { return _stacktrace.c_str(); }
51 
52  protected:
53  std::string const _stacktrace;
54 };
55 
62 struct logic_error : public std::logic_error, public stacktrace_recorder {
68  logic_error(char const* const message) : std::logic_error(message) {}
69 
75  logic_error(std::string const& message) : std::logic_error(message) {}
76 
77  // TODO Add an error code member? This would be useful for translating an
78  // exception to an error code in a pure-C API
79 
80  ~logic_error()
81  {
82  // Needed so that the first instance of the implicit destructor for any TU isn't 'constructed'
83  // from a host+device function marking the implicit version also as host+device
84  }
85 };
90 struct cuda_error : public std::runtime_error, public stacktrace_recorder {
97  cuda_error(std::string const& message, cudaError_t const& error)
98  : std::runtime_error(message), _cudaError(error)
99  {
100  }
101 
102  public:
108  cudaError_t error_code() const { return _cudaError; }
109 
110  protected:
111  cudaError_t _cudaError;
112 };
113 
114 struct fatal_cuda_error : public cuda_error {
115  using cuda_error::cuda_error; // Inherit constructors
116 };
117 
125 struct data_type_error : public std::invalid_argument, public stacktrace_recorder {
131  data_type_error(char const* const message) : std::invalid_argument(message) {}
132 
138  data_type_error(std::string const& message) : std::invalid_argument(message) {}
139 };
142 } // namespace cudf
143 
144 #define STRINGIFY_DETAIL(x) #x
145 #define CUDF_STRINGIFY(x) STRINGIFY_DETAIL(x)
146 
176 #define CUDF_EXPECTS(...) \
177  GET_CUDF_EXPECTS_MACRO(__VA_ARGS__, CUDF_EXPECTS_3, CUDF_EXPECTS_2) \
178  (__VA_ARGS__)
179 
181 
182 #define GET_CUDF_EXPECTS_MACRO(_1, _2, _3, NAME, ...) NAME
183 
184 #define CUDF_EXPECTS_3(_condition, _reason, _exception_type) \
185  do { \
186  static_assert(std::is_base_of_v<std::exception, _exception_type>); \
187  (_condition) ? static_cast<void>(0) \
188  : throw _exception_type /*NOLINT(bugprone-macro-parentheses)*/ \
189  {"CUDF failure at: " __FILE__ ":" CUDF_STRINGIFY(__LINE__) ": " _reason}; \
190  } while (0)
191 
192 #define CUDF_EXPECTS_2(_condition, _reason) CUDF_EXPECTS_3(_condition, _reason, cudf::logic_error)
193 
195 
215 #define CUDF_FAIL(...) \
216  GET_CUDF_FAIL_MACRO(__VA_ARGS__, CUDF_FAIL_2, CUDF_FAIL_1) \
217  (__VA_ARGS__)
218 
220 
221 #define GET_CUDF_FAIL_MACRO(_1, _2, NAME, ...) NAME
222 
223 #define CUDF_FAIL_2(_what, _exception_type) \
224  /*NOLINTNEXTLINE(bugprone-macro-parentheses)*/ \
225  throw _exception_type { "CUDF failure at:" __FILE__ ":" CUDF_STRINGIFY(__LINE__) ": " _what }
226 
227 #define CUDF_FAIL_1(_what) CUDF_FAIL_2(_what, cudf::logic_error)
228 
230 
231 namespace cudf {
232 namespace detail {
233 // @cond
234 inline void throw_cuda_error(cudaError_t error, char const* file, unsigned int line)
235 {
236  // Calls cudaGetLastError to clear the error status. It is nearly certain that a fatal error
237  // occurred if it still returns the same error after a cleanup.
238  cudaGetLastError();
239  auto const last = cudaFree(0);
240  auto const msg = std::string{"CUDA error encountered at: " + std::string{file} + ":" +
241  std::to_string(line) + ": " + std::to_string(error) + " " +
242  cudaGetErrorName(error) + " " + cudaGetErrorString(error)};
243  // Call cudaDeviceSynchronize to ensure `last` did not result from an asynchronous error.
244  // between two calls.
245  if (error == last && last == cudaDeviceSynchronize()) {
246  throw fatal_cuda_error{"Fatal " + msg, error};
247  } else {
248  throw cuda_error{msg, error};
249  }
250 }
251 // @endcond
252 } // namespace detail
253 } // namespace cudf
254 
262 #define CUDF_CUDA_TRY(call) \
263  do { \
264  cudaError_t const status = (call); \
265  if (cudaSuccess != status) { cudf::detail::throw_cuda_error(status, __FILE__, __LINE__); } \
266  } while (0);
267 
281 #ifndef NDEBUG
282 #define CUDF_CHECK_CUDA(stream) \
283  do { \
284  CUDF_CUDA_TRY(cudaStreamSynchronize(stream)); \
285  CUDF_CUDA_TRY(cudaPeekAtLastError()); \
286  } while (0);
287 #else
288 #define CUDF_CHECK_CUDA(stream) CUDF_CUDA_TRY(cudaPeekAtLastError());
289 #endif
cuDF interfaces
Definition: aggregation.hpp:34
Exception thrown when a CUDA error is encountered.
Definition: error.hpp:90
cuda_error(std::string const &message, cudaError_t const &error)
Construct a new cuda error object with error message and code.
Definition: error.hpp:97
cudaError_t _cudaError
CUDA error code.
Definition: error.hpp:111
cudaError_t error_code() const
Returns the CUDA error code associated with the exception.
Definition: error.hpp:108
Exception thrown when an operation is attempted on an unsupported dtype.
Definition: error.hpp:125
data_type_error(std::string const &message)
Construct a new data_type_error object with error message.
Definition: error.hpp:138
data_type_error(char const *const message)
Constructs a data_type_error with the error message.
Definition: error.hpp:131
Exception thrown when logical precondition is violated.
Definition: error.hpp:62
logic_error(char const *const message)
Constructs a logic_error with the error message.
Definition: error.hpp:68
logic_error(std::string const &message)
Construct a new logic error object with error message.
Definition: error.hpp:75
The struct to store the current stacktrace upon its construction.
Definition: error.hpp:37
std::string const _stacktrace
The whole stacktrace stored as one string.
Definition: error.hpp:53
char const * stacktrace() const
Get the stored stacktrace captured during object construction.
Definition: error.hpp:50