2121#include " paddle/phi/backends/cpu/cpu_context.h"
2222#include " paddle/phi/core/kernel_registry.h"
2323
24- #include " paddle/fluid/framework/generator.h"
25-
2624namespace phi {
2725
26+ // reference: https://gist.github.com/lakshayg/d80172fe5ae3c5d2c2aedb53c250320e
27+ template <typename T>
28+ T Erfinv (T x) {
29+ if (x < -1 || x > 1 ) {
30+ return std::numeric_limits<T>::quiet_NaN ();
31+ } else if (x == 1.0 ) {
32+ return std::numeric_limits<T>::infinity ();
33+ } else if (x == -1.0 ) {
34+ return -std::numeric_limits<T>::infinity ();
35+ }
36+
37+ const T LN2 = 6.931471805599453094172321214581e-1 ;
38+
39+ const T A0 = 1.1975323115670912564578e0 ;
40+ const T A1 = 4.7072688112383978012285e1 ;
41+ const T A2 = 6.9706266534389598238465e2 ;
42+ const T A3 = 4.8548868893843886794648e3 ;
43+ const T A4 = 1.6235862515167575384252e4 ;
44+ const T A5 = 2.3782041382114385731252e4 ;
45+ const T A6 = 1.1819493347062294404278e4 ;
46+ const T A7 = 8.8709406962545514830200e2 ;
47+
48+ const T B0 = 1.0000000000000000000e0 ;
49+ const T B1 = 4.2313330701600911252e1 ;
50+ const T B2 = 6.8718700749205790830e2 ;
51+ const T B3 = 5.3941960214247511077e3 ;
52+ const T B4 = 2.1213794301586595867e4 ;
53+ const T B5 = 3.9307895800092710610e4 ;
54+ const T B6 = 2.8729085735721942674e4 ;
55+ const T B7 = 5.2264952788528545610e3 ;
56+
57+ const T C0 = 1.42343711074968357734e0 ;
58+ const T C1 = 4.63033784615654529590e0 ;
59+ const T C2 = 5.76949722146069140550e0 ;
60+ const T C3 = 3.64784832476320460504e0 ;
61+ const T C4 = 1.27045825245236838258e0 ;
62+ const T C5 = 2.41780725177450611770e-1 ;
63+ const T C6 = 2.27238449892691845833e-2 ;
64+ const T C7 = 7.74545014278341407640e-4 ;
65+
66+ const T D0 = 1.4142135623730950488016887e0 ;
67+ const T D1 = 2.9036514445419946173133295e0 ;
68+ const T D2 = 2.3707661626024532365971225e0 ;
69+ const T D3 = 9.7547832001787427186894837e-1 ;
70+ const T D4 = 2.0945065210512749128288442e-1 ;
71+ const T D5 = 2.1494160384252876777097297e-2 ;
72+ const T D6 = 7.7441459065157709165577218e-4 ;
73+ const T D7 = 1.4859850019840355905497876e-9 ;
74+
75+ const T E0 = 6.65790464350110377720e0 ;
76+ const T E1 = 5.46378491116411436990e0 ;
77+ const T E2 = 1.78482653991729133580e0 ;
78+ const T E3 = 2.96560571828504891230e-1 ;
79+ const T E4 = 2.65321895265761230930e-2 ;
80+ const T E5 = 1.24266094738807843860e-3 ;
81+ const T E6 = 2.71155556874348757815e-5 ;
82+ const T E7 = 2.01033439929228813265e-7 ;
83+
84+ const T F0 = 1.414213562373095048801689e0 ;
85+ const T F1 = 8.482908416595164588112026e-1 ;
86+ const T F2 = 1.936480946950659106176712e-1 ;
87+ const T F3 = 2.103693768272068968719679e-2 ;
88+ const T F4 = 1.112800997078859844711555e-3 ;
89+ const T F5 = 2.611088405080593625138020e-5 ;
90+ const T F6 = 2.010321207683943062279931e-7 ;
91+ const T F7 = 2.891024605872965461538222e-15 ;
92+
93+ T abs_x = abs (x);
94+
95+ if (abs_x <= 0.85 ) {
96+ T r = 0.180625 - 0.25 * x * x;
97+ T num =
98+ (((((((A7 * r + A6) * r + A5) * r + A4) * r + A3) * r + A2) * r + A1) *
99+ r +
100+ A0);
101+ T den =
102+ (((((((B7 * r + B6) * r + B5) * r + B4) * r + B3) * r + B2) * r + B1) *
103+ r +
104+ B0);
105+ return x * num / den;
106+ }
107+
108+ T r = sqrt (LN2 - log (1.0 - abs_x));
109+
110+ T num, den;
111+ if (r <= 5.0 ) {
112+ r = r - 1.6 ;
113+ num =
114+ (((((((C7 * r + C6) * r + C5) * r + C4) * r + C3) * r + C2) * r + C1) *
115+ r +
116+ C0);
117+ den =
118+ (((((((D7 * r + D6) * r + D5) * r + D4) * r + D3) * r + D2) * r + D1) *
119+ r +
120+ D0);
121+ } else {
122+ r = r - 5.0 ;
123+ num =
124+ (((((((E7 * r + E6 ) * r + E5 ) * r + E4 ) * r + E3 ) * r + E2 ) * r + E1 ) *
125+ r +
126+ E0 );
127+ den =
128+ (((((((F7 * r + F6) * r + F5) * r + F4) * r + F3) * r + F2) * r + F1) *
129+ r +
130+ F0);
131+ }
132+
133+ if (x < 0 ) {
134+ return -num / den;
135+ } else {
136+ return num / den;
137+ }
138+ }
139+
140+ template <typename T>
141+ struct TruncatedNormal {
142+ T mean, std;
143+ T a_normal_cdf;
144+ T b_normal_cdf;
145+ TruncatedNormal (T mean, T std) : mean(mean), std(std) {
146+ auto normal_cdf = [](T x) {
147+ return (1.0 + std::erf (x / std::sqrt (2.0 ))) / 2.0 ;
148+ };
149+ a_normal_cdf = normal_cdf (-2.0 );
150+ b_normal_cdf = normal_cdf (2.0 );
151+ }
152+
153+ T operator ()(T value) const {
154+ auto p = a_normal_cdf + (b_normal_cdf - a_normal_cdf) * value;
155+ return std::sqrt (2.0 ) * Erfinv (2 * p - 1 ) * std + mean;
156+ }
157+ };
158+
28159template <typename T, typename Context>
29160void TruncatedGaussianRandomKernel (const Context& dev_ctx,
30161 const std::vector<int >& shape,
@@ -42,7 +173,13 @@ void TruncatedGaussianRandomKernel(const Context& dev_ctx,
42173 TruncatedNormal<T> truncated_normal (mean, std);
43174 int64_t size = tensor->numel ();
44175
45- auto engine = paddle::framework::GetCPURandomEngine (seed);
176+ std::shared_ptr<std::mt19937_64> engine;
177+ if (seed) {
178+ engine = std::make_shared<std::mt19937_64>();
179+ engine->seed (seed);
180+ } else {
181+ engine = dev_ctx.GetGenerator ()->GetCPUEngine ();
182+ }
46183 for (int64_t i = 0 ; i < size; ++i) {
47184 data[i] = truncated_normal (dist (*engine));
48185 }
0 commit comments