Skip to content

Commit 341e4c0

Browse files
authored
Merge branch 'develop' into t5
2 parents 413bc3a + fcfe692 commit 341e4c0

File tree

34 files changed

+2162
-1840
lines changed

34 files changed

+2162
-1840
lines changed

applications/document_intelligence/doc_vqa/Extraction/run_docvqa.py

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,25 +12,21 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import os
16-
import sys
17-
import copy
15+
import argparse
1816
import json
19-
import random
2017
import logging
18+
import os
19+
import random
2120
import warnings
22-
import argparse
23-
import numpy as np
24-
from collections import OrderedDict, Counter
21+
from collections import Counter
2522

23+
import numpy as np
2624
import paddle
27-
from paddle.static import InputSpec
28-
from paddle.jit import to_static
29-
from paddlenlp.transformers import LayoutXLMModel, LayoutXLMTokenizer
30-
3125
from docvqa import DocVQA
3226
from model import LayoutXLMForTokenClassification_with_CRF
3327

28+
from paddlenlp.transformers import LayoutXLMModel, LayoutXLMTokenizer
29+
3430
warnings.filterwarnings("ignore")
3531
logger = logging.getLogger(__name__)
3632

@@ -166,7 +162,6 @@ def main(args):
166162
global_step = 0
167163
tr_loss = 0.0
168164
set_seed(args)
169-
best_metrics = None
170165
for epoch_id in range(args.num_train_epochs):
171166
print("epoch id:{}".format(epoch_id))
172167
for step, batch in enumerate(train_dataloader):
@@ -193,7 +188,7 @@ def main(args):
193188
step,
194189
len(train_dataloader),
195190
lr_scheduler.get_lr(),
196-
loss.numpy()[0],
191+
float(loss),
197192
)
198193
)
199194

@@ -322,7 +317,6 @@ def _normalize(in_str):
322317

323318

324319
def calc_f1_score(answer, prediction):
325-
f1_scores = []
326320
ans_segs = _tokenize_chinese_chars(_normalize(answer))
327321
prediction_segs = _tokenize_chinese_chars(_normalize(prediction))
328322
f1 = fast_f1(prediction_segs, ans_segs)
@@ -436,7 +430,6 @@ def evaluate(args, model, tokenizer, label2id_map, id2label_map, pad_token_label
436430
line_json["question"] = line_label["question"]
437431
line_json["label_answer"] = line_text[1]
438432
line_json["predict_answer"] = line_text[2]
439-
all_boxes = line_res[3]
440433
label_bbox_index, predict_bbox_index = line_text[3], line_text[4]
441434
label_bboxes, predict_bboxes = [], []
442435
for i in range(len(line_label["bboxes"])):

applications/sentiment_analysis/ASO_analysis/demo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def predict(args, ext_model, cls_model, tokenizer, ext_id2label, cls_id2label):
8686
token_type_ids = paddle.to_tensor([encoded_inputs["token_type_ids"]])
8787

8888
logits = cls_model(input_ids, token_type_ids=token_type_ids)
89-
prediction = logits.argmax(axis=1).numpy()[0]
89+
prediction = int(logits.argmax(axis=1))
9090

9191
result = {"aspect": aspect, "opinions": opinion_words, "sentiment_polarity": cls_id2label[prediction]}
9292
results.append(result)

examples/dialogue/lic2021_baseline/finetune.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,24 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import math
1516
import os
1617
import time
17-
import math
18+
1819
import paddle
1920
import paddle.distributed as dist
2021
import paddle.nn as nn
2122
import paddle.nn.functional as F
23+
from args import parse_args, print_args
24+
from data import DialogueDataset
2225
from paddle.io import DataLoader
23-
from paddle.optimizer.lr import NoamDecay
2426
from paddle.optimizer import AdamW
27+
from paddle.optimizer.lr import NoamDecay
2528

26-
from paddlenlp.transformers import UnifiedTransformerLMHeadModel, UnifiedTransformerTokenizer
27-
28-
from args import parse_args, print_args
29-
from data import DialogueDataset
29+
from paddlenlp.transformers import (
30+
UnifiedTransformerLMHeadModel,
31+
UnifiedTransformerTokenizer,
32+
)
3033

3134

3235
def save_ckpt(model, tokenizer, save_dir, name):
@@ -129,7 +132,7 @@ def evaluation(model, data_loader):
129132
logits = model(token_ids, type_ids, pos_ids, generation_mask, tgt_pos)
130133
loss = F.cross_entropy(logits, tgt_label, reduction="sum")
131134

132-
total_loss += loss.numpy()[0]
135+
total_loss += float(loss.numpy())
133136
total_tokens += tgt_label.shape[0]
134137

135138
avg_loss = total_loss / total_tokens

examples/language_model/elmo/run_eval.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,14 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import os
16-
import time
1715
import math
18-
import paddle
19-
import paddle.nn as nn
20-
from paddle.io import DataLoader
21-
import paddle.distributed as dist
16+
import time
2217

18+
import paddle
2319
from args import parse_args, print_args
20+
from dataset import OneBillionWordDataset, load_vocab
2421
from elmo import ELMo, ELMoLoss
25-
from dataset import load_vocab, OneBillionWordDataset
22+
from paddle.io import DataLoader
2623

2724

2825
@paddle.no_grad()
@@ -67,14 +64,14 @@ def eval(args):
6764
loss = elmo_loss(outputs, [next_ids, next_ids_reverse])
6865
ppl = paddle.exp(loss)
6966

70-
total_loss += loss.numpy()[0]
67+
total_loss += float(loss)
7168
total_step += 1
7269

7370
total_time += time.time() - batch_start_time
7471
if step % args.log_freq == 0:
7572
print(
7673
"Eval step %d - loss: %.4f - Perplexity: %.4f - %.3fs/step"
77-
% (step, loss.numpy()[0] * args.unroll_steps, ppl.numpy()[0], total_time / args.log_freq)
74+
% (step, float(loss) * args.unroll_steps, float(ppl), total_time / args.log_freq)
7875
)
7976
total_time = 0.0
8077
batch_start_time = time.time()

examples/language_model/elmo/run_finetune.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,18 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import paddle
16-
import paddle.nn as nn
17-
import paddle.nn.functional as F
18-
from paddle.io import Dataset, DataLoader
19-
import paddle.distributed as dist
20-
15+
import argparse
2116
import os
2217
import re
23-
import argparse
24-
import numpy as np
25-
from typing import List
26-
from sklearn.model_selection import train_test_split
2718

19+
import numpy as np
20+
import paddle
21+
import paddle.distributed as dist
22+
import paddle.nn as nn
2823
from dataset import load_vocab
2924
from elmo import get_elmo_layer
25+
from paddle.io import DataLoader, Dataset
26+
from sklearn.model_selection import train_test_split
3027

3128

3229
# yapf: disable
@@ -249,7 +246,7 @@ def finetune(args):
249246
adam.clear_grad()
250247

251248
if step % args.logging_step == 0:
252-
print("step {}, loss {}".format(step, loss.numpy()[0]))
249+
print("step {}, loss {}".format(step, float(loss)))
253250

254251
acc = test(model, test_loader)
255252
print("\ntest acc {}\n".format(acc))
@@ -268,7 +265,7 @@ def test(model, test_loader):
268265
num += label.shape[0]
269266
predict = paddle.argmax(output, axis=1)
270267
label = paddle.cast(label, dtype=predict.dtype)
271-
correct += paddle.sum(paddle.cast(predict == label, dtype="int64")).numpy()[0]
268+
correct += int(paddle.sum(paddle.cast(predict == label, dtype="int64")))
272269
model.train()
273270
return correct * 1.0 / num
274271

examples/language_model/elmo/run_pretrain.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@
1414

1515
import os
1616
import time
17+
1718
import paddle
18-
import paddle.nn as nn
19-
from paddle.io import DataLoader
2019
import paddle.distributed as dist
21-
20+
import paddle.nn as nn
2221
from args import parse_args, print_args
22+
from dataset import OneBillionWordDataset, load_vocab
2323
from elmo import ELMo, ELMoLoss
24-
from dataset import load_vocab, OneBillionWordDataset
24+
from paddle.io import DataLoader
2525

2626

2727
def save_params(elmo, optimizer, save_dir, name):
@@ -104,7 +104,7 @@ def train(args):
104104
if step % args.log_freq == 0:
105105
print(
106106
"step %d/%d - loss: %.4f - Perplexity: %.4f - %.3fs/step"
107-
% (step, n_steps_total, loss.numpy()[0], ppl.numpy()[0], total_time / args.log_freq)
107+
% (step, n_steps_total, float(loss), float(ppl), total_time / args.log_freq)
108108
)
109109
total_time = 0.0
110110
if rank == 0 and step % args.save_freq == 0:

examples/language_model/elmo/word2vec_base.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,17 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import paddle
16-
import paddle.nn as nn
17-
import paddle.nn.functional as F
18-
from paddle.io import Dataset, DataLoader
19-
import paddle.distributed as dist
20-
15+
import argparse
2116
import os
2217
import re
23-
import argparse
18+
2419
import numpy as np
25-
from sklearn.model_selection import train_test_split
20+
import paddle
21+
import paddle.distributed as dist
22+
import paddle.nn as nn
2623
from gensim.models.keyedvectors import KeyedVectors
24+
from paddle.io import DataLoader, Dataset
25+
from sklearn.model_selection import train_test_split
2726

2827

2928
# yapf: disable
@@ -227,7 +226,7 @@ def train(args):
227226
adam.clear_grad()
228227

229228
if step % args.logging_step == 0:
230-
print("step %d, loss %.4f" % (step, loss.numpy()[0]))
229+
print("step %d, loss %.4f" % (step, float(loss)))
231230

232231
acc = test(model, test_loader)
233232
print("\ntest acc %.4f\n" % acc)
@@ -246,7 +245,7 @@ def test(model, test_loader):
246245
num += label.shape[0]
247246
predict = paddle.argmax(output, axis=1)
248247
label = paddle.cast(label, dtype=predict.dtype)
249-
correct += paddle.sum(paddle.cast(predict == label, dtype="int64")).numpy()[0]
248+
correct += int(paddle.sum(paddle.cast(predict == label, dtype="int64")))
250249
model.train()
251250
return correct * 1.0 / num
252251

examples/language_model/moe/dygraph/run_moe_pretrain.py

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,37 +12,42 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import argparse
16-
import math
1715
import os
1816
import random
1917
import time
18+
import types
19+
from types import MethodType
2020

21+
import lr
2122
import numpy as np
2223
import paddle
23-
from visualdl import LogWriter
24-
from modeling import GPTModel, GPTForPretraining, GPTPretrainingCriterion, GPTForPretrainingPipe
25-
from paddlenlp.transformers import GPTTokenizer, GPTChineseTokenizer
26-
from paddlenlp.utils.log import logger
27-
28-
from dataset import create_pretrained_dataset
24+
import paddle.distributed as dist
2925
from args import parse_args
30-
import lr
26+
from checkpointing import load_checkpoint, save_checkpoint
27+
from dataset import create_pretrained_dataset
28+
from framework import AdamW, group_sharded_parallel, obtain_storage
29+
from modeling import (
30+
GPTForPretraining,
31+
GPTForPretrainingPipe,
32+
GPTModel,
33+
GPTPretrainingCriterion,
34+
)
35+
from paddle import _legacy_C_ops
3136
from paddle.distributed import fleet
3237
from paddle.distributed.fleet.meta_parallel import get_rng_state_tracker
33-
from paddle.distributed.fleet.utils.hybrid_parallel_util import fused_allreduce_gradients
34-
import types
35-
from utils import get_timers, set_timers
36-
from types import MethodType
37-
from paddle import _legacy_C_ops
38+
from paddle.distributed.fleet.meta_parallel.sharding.group_sharded_utils import (
39+
GroupShardedScaler,
40+
)
41+
from paddle.distributed.fleet.meta_parallel.sharding.sharding_utils import (
42+
ShardingScaler,
43+
)
3844
from paddle.fluid.framework import core, in_dygraph_mode
39-
import paddle.distributed as dist
40-
from framework import assign_group_by_size, flatten_dense_tensors, obtain_storage, AdamW, group_sharded_parallel
4145
from paddle.incubate.distributed.models import moe
42-
from paddle.distributed.fleet.meta_parallel.sharding.sharding_utils import ShardingScaler
43-
from paddle.distributed.fleet.meta_parallel.sharding.group_sharded_utils import GroupShardedScaler
46+
from utils import get_timers, set_timers
47+
from visualdl import LogWriter
4448

45-
from checkpointing import save_checkpoint, load_checkpoint
49+
from paddlenlp.transformers import GPTChineseTokenizer, GPTTokenizer
50+
from paddlenlp.utils.log import logger
4651

4752
MODEL_CLASSES = {
4853
"gpt": (GPTForPretraining, GPTTokenizer),
@@ -172,7 +177,7 @@ def unscale_method(self, optimizer):
172177
if dist.get_world_size() > 1:
173178
is_found_inf = paddle.to_tensor([self._found_inf], dtype="int32")
174179
paddle.distributed.all_reduce(is_found_inf, op=paddle.distributed.ReduceOp.MAX, group=None)
175-
self._found_inf = is_found_inf.numpy()[0]
180+
self._found_inf = int(is_found_inf)
176181

177182

178183
def all_reduce_parameters(params, group):
@@ -437,7 +442,7 @@ def do_train(args):
437442

438443
clip = None
439444
if args.grad_clip > 0:
440-
is_expert_param_fun = lambda param: param.name in expert_fusion_names
445+
is_expert_param_fun = lambda param: param.name in expert_fusion_names # noqa: E731
441446
clip = moe.ClipGradByGlobalNorm(
442447
clip_norm=args.grad_clip,
443448
is_expert_param_func=is_expert_param_fun,

0 commit comments

Comments
 (0)