|
47 | 47 | 'generate_proposal_labels', |
48 | 48 | 'generate_proposals', |
49 | 49 | 'generate_mask_labels', |
50 | | - 'box_coder', |
51 | | - 'polygon_box_transform', |
52 | 50 | 'box_clip', |
53 | 51 | 'multiclass_nms', |
54 | 52 | 'locality_aware_nms', |
|
60 | 58 | ] |
61 | 59 |
|
62 | 60 |
|
63 | | -@templatedoc() |
64 | | -def box_coder( |
65 | | - prior_box, |
66 | | - prior_box_var, |
67 | | - target_box, |
68 | | - code_type="encode_center_size", |
69 | | - box_normalized=True, |
70 | | - name=None, |
71 | | - axis=0, |
72 | | -): |
73 | | - r""" |
74 | | -
|
75 | | - **Box Coder Layer** |
76 | | -
|
77 | | - Encode/Decode the target bounding box with the priorbox information. |
78 | | -
|
79 | | - The Encoding schema described below: |
80 | | -
|
81 | | - .. math:: |
82 | | -
|
83 | | - ox = (tx - px) / pw / pxv |
84 | | -
|
85 | | - oy = (ty - py) / ph / pyv |
86 | | -
|
87 | | - ow = \log(\abs(tw / pw)) / pwv |
88 | | -
|
89 | | - oh = \log(\abs(th / ph)) / phv |
90 | | -
|
91 | | - The Decoding schema described below: |
92 | | -
|
93 | | - .. math:: |
94 | | -
|
95 | | - ox = (pw * pxv * tx * + px) - tw / 2 |
96 | | -
|
97 | | - oy = (ph * pyv * ty * + py) - th / 2 |
98 | | -
|
99 | | - ow = \exp(pwv * tw) * pw + tw / 2 |
100 | | -
|
101 | | - oh = \exp(phv * th) * ph + th / 2 |
102 | | -
|
103 | | - where `tx`, `ty`, `tw`, `th` denote the target box's center coordinates, |
104 | | - width and height respectively. Similarly, `px`, `py`, `pw`, `ph` denote |
105 | | - the priorbox's (anchor) center coordinates, width and height. `pxv`, |
106 | | - `pyv`, `pwv`, `phv` denote the variance of the priorbox and `ox`, `oy`, |
107 | | - `ow`, `oh` denote the encoded/decoded coordinates, width and height. |
108 | | -
|
109 | | - During Box Decoding, two modes for broadcast are supported. Say target |
110 | | - box has shape [N, M, 4], and the shape of prior box can be [N, 4] or |
111 | | - [M, 4]. Then prior box will broadcast to target box along the |
112 | | - assigned axis. |
113 | | -
|
114 | | - Args: |
115 | | - prior_box(Variable): Box list prior_box is a 2-D Tensor with shape |
116 | | - [M, 4] holds M boxes and data type is float32 or float64. Each box |
117 | | - is represented as [xmin, ymin, xmax, ymax], [xmin, ymin] is the |
118 | | - left top coordinate of the anchor box, if the input is image feature |
119 | | - map, they are close to the origin of the coordinate system. |
120 | | - [xmax, ymax] is the right bottom coordinate of the anchor box. |
121 | | - prior_box_var(List|Variable|None): prior_box_var supports three types |
122 | | - of input. One is variable with shape [M, 4] which holds M group and |
123 | | - data type is float32 or float64. The second is list consist of |
124 | | - 4 elements shared by all boxes and data type is float32 or float64. |
125 | | - Other is None and not involved in calculation. |
126 | | - target_box(Variable): This input can be a 2-D LoDTensor with shape |
127 | | - [N, 4] when code_type is 'encode_center_size'. This input also can |
128 | | - be a 3-D Tensor with shape [N, M, 4] when code_type is |
129 | | - 'decode_center_size'. Each box is represented as |
130 | | - [xmin, ymin, xmax, ymax]. The data type is float32 or float64. |
131 | | - This tensor can contain LoD information to represent a batch of inputs. |
132 | | - code_type(str): The code type used with the target box. It can be |
133 | | - `encode_center_size` or `decode_center_size`. `encode_center_size` |
134 | | - by default. |
135 | | - box_normalized(bool): Whether treat the priorbox as a normalized box. |
136 | | - Set true by default. |
137 | | - name(str, optional): For detailed information, please refer |
138 | | - to :ref:`api_guide_Name`. Usually name is no need to set and |
139 | | - None by default. |
140 | | - axis(int): Which axis in PriorBox to broadcast for box decode, |
141 | | - for example, if axis is 0 and TargetBox has shape [N, M, 4] and |
142 | | - PriorBox has shape [M, 4], then PriorBox will broadcast to [N, M, 4] |
143 | | - for decoding. It is only valid when code type is |
144 | | - `decode_center_size`. Set 0 by default. |
145 | | -
|
146 | | - Returns: |
147 | | - Variable: |
148 | | -
|
149 | | - output_box(Variable): When code_type is 'encode_center_size', the |
150 | | - output tensor of box_coder_op with shape [N, M, 4] representing the |
151 | | - result of N target boxes encoded with M Prior boxes and variances. |
152 | | - When code_type is 'decode_center_size', N represents the batch size |
153 | | - and M represents the number of decoded boxes. |
154 | | -
|
155 | | - Examples: |
156 | | -
|
157 | | - .. code-block:: python |
158 | | -
|
159 | | - import paddle.fluid as fluid |
160 | | - import paddle |
161 | | - paddle.enable_static() |
162 | | - # For encode |
163 | | - prior_box_encode = fluid.data(name='prior_box_encode', |
164 | | - shape=[512, 4], |
165 | | - dtype='float32') |
166 | | - target_box_encode = fluid.data(name='target_box_encode', |
167 | | - shape=[81, 4], |
168 | | - dtype='float32') |
169 | | - output_encode = fluid.layers.box_coder(prior_box=prior_box_encode, |
170 | | - prior_box_var=[0.1,0.1,0.2,0.2], |
171 | | - target_box=target_box_encode, |
172 | | - code_type="encode_center_size") |
173 | | - # For decode |
174 | | - prior_box_decode = fluid.data(name='prior_box_decode', |
175 | | - shape=[512, 4], |
176 | | - dtype='float32') |
177 | | - target_box_decode = fluid.data(name='target_box_decode', |
178 | | - shape=[512, 81, 4], |
179 | | - dtype='float32') |
180 | | - output_decode = fluid.layers.box_coder(prior_box=prior_box_decode, |
181 | | - prior_box_var=[0.1,0.1,0.2,0.2], |
182 | | - target_box=target_box_decode, |
183 | | - code_type="decode_center_size", |
184 | | - box_normalized=False, |
185 | | - axis=1) |
186 | | - """ |
187 | | - return paddle.vision.ops.box_coder( |
188 | | - prior_box=prior_box, |
189 | | - prior_box_var=prior_box_var, |
190 | | - target_box=target_box, |
191 | | - code_type=code_type, |
192 | | - box_normalized=box_normalized, |
193 | | - axis=axis, |
194 | | - name=name, |
195 | | - ) |
196 | | - |
197 | | - |
198 | | -@templatedoc() |
199 | | -def polygon_box_transform(input, name=None): |
200 | | - """ |
201 | | - ${comment} |
202 | | -
|
203 | | - Args: |
204 | | - input(Variable): The input with shape [batch_size, geometry_channels, height, width]. |
205 | | - A Tensor with type float32, float64. |
206 | | - name(str, Optional): For details, please refer to :ref:`api_guide_Name`. |
207 | | - Generally, no setting is required. Default: None. |
208 | | -
|
209 | | - Returns: |
210 | | - Variable: The output with the same shape as input. A Tensor with type float32, float64. |
211 | | -
|
212 | | - Examples: |
213 | | - .. code-block:: python |
214 | | -
|
215 | | - import paddle.fluid as fluid |
216 | | - input = fluid.data(name='input', shape=[4, 10, 5, 5], dtype='float32') |
217 | | - out = fluid.layers.polygon_box_transform(input) |
218 | | - """ |
219 | | - check_variable_and_dtype( |
220 | | - input, "input", ['float32', 'float64'], 'polygon_box_transform' |
221 | | - ) |
222 | | - helper = LayerHelper("polygon_box_transform", **locals()) |
223 | | - output = helper.create_variable_for_type_inference(dtype=input.dtype) |
224 | | - |
225 | | - helper.append_op( |
226 | | - type="polygon_box_transform", |
227 | | - inputs={"Input": input}, |
228 | | - attrs={}, |
229 | | - outputs={"Output": output}, |
230 | | - ) |
231 | | - return output |
232 | | - |
233 | | - |
234 | 61 | def prior_box( |
235 | 62 | input, |
236 | 63 | image, |
|
0 commit comments