libcudf  23.12.00
error.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2023, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/detail/utilities/stacktrace.hpp>
20 
21 #include <cuda.h>
22 #include <cuda_runtime_api.h>
23 #include <stdexcept>
24 #include <string>
25 #include <type_traits>
26 
27 namespace cudf {
39  // Exclude the current stackframe, as it is this constructor.
40  : _stacktrace{cudf::detail::get_stacktrace(cudf::detail::capture_last_stackframe::NO)}
41  {
42  }
43 
44  public:
50  char const* stacktrace() const { return _stacktrace.c_str(); }
51 
52  protected:
53  std::string const _stacktrace;
54 };
55 
62 struct logic_error : public std::logic_error, public stacktrace_recorder {
68  logic_error(char const* const message) : std::logic_error(message) {}
69 
75  logic_error(std::string const& message) : std::logic_error(message) {}
76 
77  // TODO Add an error code member? This would be useful for translating an
78  // exception to an error code in a pure-C API
79 };
84 struct cuda_error : public std::runtime_error, public stacktrace_recorder {
91  cuda_error(std::string const& message, cudaError_t const& error)
92  : std::runtime_error(message), _cudaError(error)
93  {
94  }
95 
96  public:
102  cudaError_t error_code() const { return _cudaError; }
103 
104  protected:
105  cudaError_t _cudaError;
106 };
107 
108 struct fatal_cuda_error : public cuda_error {
109  using cuda_error::cuda_error; // Inherit constructors
110 };
111 
119 struct data_type_error : public std::invalid_argument, public stacktrace_recorder {
125  data_type_error(char const* const message) : std::invalid_argument(message) {}
126 
132  data_type_error(std::string const& message) : std::invalid_argument(message) {}
133 };
136 } // namespace cudf
137 
138 #define STRINGIFY_DETAIL(x) #x
139 #define CUDF_STRINGIFY(x) STRINGIFY_DETAIL(x)
140 
170 #define CUDF_EXPECTS(...) \
171  GET_CUDF_EXPECTS_MACRO(__VA_ARGS__, CUDF_EXPECTS_3, CUDF_EXPECTS_2) \
172  (__VA_ARGS__)
173 
175 
176 #define GET_CUDF_EXPECTS_MACRO(_1, _2, _3, NAME, ...) NAME
177 
178 #define CUDF_EXPECTS_3(_condition, _reason, _exception_type) \
179  do { \
180  static_assert(std::is_base_of_v<std::exception, _exception_type>); \
181  (_condition) ? static_cast<void>(0) \
182  : throw _exception_type /*NOLINT(bugprone-macro-parentheses)*/ \
183  {"CUDF failure at: " __FILE__ ":" CUDF_STRINGIFY(__LINE__) ": " _reason}; \
184  } while (0)
185 
186 #define CUDF_EXPECTS_2(_condition, _reason) CUDF_EXPECTS_3(_condition, _reason, cudf::logic_error)
187 
189 
209 #define CUDF_FAIL(...) \
210  GET_CUDF_FAIL_MACRO(__VA_ARGS__, CUDF_FAIL_2, CUDF_FAIL_1) \
211  (__VA_ARGS__)
212 
214 
215 #define GET_CUDF_FAIL_MACRO(_1, _2, NAME, ...) NAME
216 
217 #define CUDF_FAIL_2(_what, _exception_type) \
218  /*NOLINTNEXTLINE(bugprone-macro-parentheses)*/ \
219  throw _exception_type { "CUDF failure at:" __FILE__ ":" CUDF_STRINGIFY(__LINE__) ": " _what }
220 
221 #define CUDF_FAIL_1(_what) CUDF_FAIL_2(_what, cudf::logic_error)
222 
224 
225 namespace cudf {
226 namespace detail {
227 // @cond
228 inline void throw_cuda_error(cudaError_t error, char const* file, unsigned int line)
229 {
230  // Calls cudaGetLastError to clear the error status. It is nearly certain that a fatal error
231  // occurred if it still returns the same error after a cleanup.
232  cudaGetLastError();
233  auto const last = cudaFree(0);
234  auto const msg = std::string{"CUDA error encountered at: " + std::string{file} + ":" +
235  std::to_string(line) + ": " + std::to_string(error) + " " +
236  cudaGetErrorName(error) + " " + cudaGetErrorString(error)};
237  // Call cudaDeviceSynchronize to ensure `last` did not result from an asynchronous error.
238  // between two calls.
239  if (error == last && last == cudaDeviceSynchronize()) {
240  throw fatal_cuda_error{"Fatal " + msg, error};
241  } else {
242  throw cuda_error{msg, error};
243  }
244 }
245 // @endcond
246 } // namespace detail
247 } // namespace cudf
248 
256 #define CUDF_CUDA_TRY(call) \
257  do { \
258  cudaError_t const status = (call); \
259  if (cudaSuccess != status) { cudf::detail::throw_cuda_error(status, __FILE__, __LINE__); } \
260  } while (0);
261 
275 #ifndef NDEBUG
276 #define CUDF_CHECK_CUDA(stream) \
277  do { \
278  CUDF_CUDA_TRY(cudaStreamSynchronize(stream)); \
279  CUDF_CUDA_TRY(cudaPeekAtLastError()); \
280  } while (0);
281 #else
282 #define CUDF_CHECK_CUDA(stream) CUDF_CUDA_TRY(cudaPeekAtLastError());
283 #endif
cuDF interfaces
Definition: aggregation.hpp:34
Exception thrown when a CUDA error is encountered.
Definition: error.hpp:84
cuda_error(std::string const &message, cudaError_t const &error)
Construct a new cuda error object with error message and code.
Definition: error.hpp:91
cudaError_t _cudaError
CUDA error code.
Definition: error.hpp:105
cudaError_t error_code() const
Returns the CUDA error code associated with the exception.
Definition: error.hpp:102
Exception thrown when an operation is attempted on an unsupported dtype.
Definition: error.hpp:119
data_type_error(std::string const &message)
Construct a new data_type_error object with error message.
Definition: error.hpp:132
data_type_error(char const *const message)
Constructs a data_type_error with the error message.
Definition: error.hpp:125
Exception thrown when logical precondition is violated.
Definition: error.hpp:62
logic_error(char const *const message)
Constructs a logic_error with the error message.
Definition: error.hpp:68
logic_error(std::string const &message)
Construct a new logic error object with error message.
Definition: error.hpp:75
The struct to store the current stacktrace upon its construction.
Definition: error.hpp:37
std::string const _stacktrace
The whole stacktrace stored as one string.
Definition: error.hpp:53
char const * stacktrace() const
Get the stored stacktrace captured during object construction.
Definition: error.hpp:50