Skip to content

Commit 6321165

Browse files
committed
Merge branch 'develop' of github.com:greycooker/PaddleNLP into develop
merge
2 parents 52cb70d + d89c011 commit 6321165

File tree

9 files changed

+37
-11
lines changed

9 files changed

+37
-11
lines changed

paddlenlp/peft/lora/lora_model.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,7 @@ def _merge_trainable_tensor_parallel(self, trainable_state_dict):
195195
if key in trainable_name_action_mappings:
196196
ret = distributed_gather(tensor, group=mp_group, offload=True)
197197
action = trainable_name_action_mappings[key]
198-
is_collumn = self.lora_split_mapping[key]
199-
if "_scale" in key and not is_collumn and is_dst:
198+
if key in self.lora_split_mapping and not self.lora_split_mapping[key] and "_scale" in key and is_dst:
200199
ret = paddle.to_tensor(ret)
201200
tensor = paddle.max(ret, axis=0)
202201
else:

paddlenlp/transformers/model_utils.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,10 @@
4141
)
4242
from huggingface_hub.utils import EntryNotFoundError
4343
from paddle import Tensor
44-
from paddle.distributed.fleet.meta_parallel.parallel_layers import SharedLayerDesc
44+
from paddle.distributed.fleet.meta_parallel.parallel_layers import (
45+
PipelineLayer,
46+
SharedLayerDesc,
47+
)
4548
from paddle.nn import Embedding, Layer
4649

4750
# TODO(fangzeyang) Temporary fix and replace by paddle framework downloader later
@@ -933,6 +936,18 @@ def _post_init(self, original_init, *args, **kwargs):
933936
):
934937
self.init_weights()
935938

939+
# Note:
940+
# 1. PipelineLayer will create parameters for each layer and
941+
# call `_synchronize_shared_weights()` to synchronize the shared parameters.
942+
# 2. When setting the model `state_dict`, `_synchronize_shared_weights` will be called to
943+
# synchronize the shared parameters.
944+
# However, `self._init_weights` will re-initialize the parameters without
945+
# synchronizing the shared parameters. If the following step does not load a checkpoint,
946+
# the shared parameters will be different.
947+
948+
if isinstance(self, PipelineLayer):
949+
self._synchronize_shared_weights()
950+
936951
def _init_weights(self, layer):
937952
"""
938953
Initialize the weights. This method should be overridden by derived class.

pipelines/pipelines/document_stores/faiss.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ def update_embeddings(
391391

392392
vector_id_map = {}
393393
for doc in document_batch:
394-
vector_id_map[str(doc.id)] = str(vector_id)
394+
vector_id_map[str(doc.id)] = str(vector_id) + "_" + index
395395
vector_id += 1
396396
self.update_vector_ids(vector_id_map, index=index)
397397
progress_bar.set_description_str("Documents Processed")
@@ -443,7 +443,6 @@ def get_all_documents_generator(
443443
)
444444
if return_embedding is None:
445445
return_embedding = self.return_embedding
446-
447446
for doc in documents:
448447
if return_embedding:
449448
if doc.meta and doc.meta.get("vector_id") is not None:
@@ -588,7 +587,6 @@ def query_by_embedding(
588587

589588
if filters:
590589
logger.warning("Query filters are not implemented for the FAISSDocumentStore.")
591-
592590
index = index or self.index
593591
if not self.faiss_indexes.get(index):
594592
raise Exception(f"Index named '{index}' does not exists. Use 'update_embeddings()' to create an index.")
@@ -599,11 +597,9 @@ def query_by_embedding(
599597
query_emb = query_emb.reshape(1, -1).astype(np.float32)
600598
if self.similarity == "cosine":
601599
self.normalize_embedding(query_emb)
602-
603600
score_matrix, vector_id_matrix = self.faiss_indexes[index].search(query_emb, top_k)
604601
vector_ids_for_query = [str(vector_id) + "_" + index for vector_id in vector_id_matrix[0] if vector_id != -1]
605602
documents = self.get_documents_by_vector_ids(vector_ids_for_query, index=index)
606-
607603
# assign query score to each document
608604
scores_for_vector_ids: Dict[str, float] = {
609605
str(v_id): s for v_id, s in zip(vector_id_matrix[0], score_matrix[0])

pipelines/pipelines/document_stores/sql.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,15 +216,13 @@ def get_documents_by_vector_ids(
216216
):
217217
"""Fetch documents by specifying a list of text vector id strings"""
218218
index = index or self.index
219-
220219
documents = []
221220
for i in range(0, len(vector_ids), batch_size):
222221
query = self.session.query(DocumentORM).filter(
223222
DocumentORM.vector_id.in_(vector_ids[i : i + batch_size]), DocumentORM.index == index
224223
)
225224
for row in query.all():
226225
documents.append(self._convert_sql_row_to_document(row))
227-
228226
sorted_documents = sorted(documents, key=lambda doc: vector_ids.index(doc.meta["vector_id"]))
229227
return sorted_documents
230228

@@ -405,7 +403,6 @@ def write_documents(
405403
document_objects = [Document.from_dict(d) if isinstance(d, dict) else d for d in documents]
406404
else:
407405
document_objects = documents
408-
409406
document_objects = self._handle_duplicate_documents(
410407
documents=document_objects, index=index, duplicate_documents=duplicate_documents
411408
)

tests/transformers/load_subfolder/test_config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@
1515

1616
from paddlenlp.transformers import AutoConfig, BertConfig, CLIPConfig, T5Config
1717
from paddlenlp.utils.log import logger
18+
from tests.testing_utils import slow
1819

1920

2021
class ConfigLoadTester(unittest.TestCase):
22+
@slow
2123
def test_bert_config_load(self):
2224
logger.info("Download Bert Config from PaddleNLP BOS")
2325
bert_config = BertConfig.from_pretrained("bert-base-uncased", from_hf_hub=False)
@@ -43,6 +45,7 @@ def test_bert_config_load(self):
4345
bert_config = BertConfig.from_pretrained("aistudio/bert-base-uncased", from_aistudio=True)
4446
bert_config = AutoConfig.from_pretrained("aistudio/bert-base-uncased", from_aistudio=True)
4547

48+
@slow
4649
def test_clip_config_load(self):
4750
logger.info("Download CLIP Config from PaddleNLP BOS")
4851
clip_config = CLIPConfig.from_pretrained("openai/clip-vit-base-patch32", from_hf_hub=False)
@@ -68,6 +71,7 @@ def test_clip_config_load(self):
6871
clip_config = CLIPConfig.from_pretrained("aistudio/clip-vit-base-patch32", from_aistudio=True)
6972
clip_config = AutoConfig.from_pretrained("aistudio/clip-vit-base-patch32", from_aistudio=True)
7073

74+
@slow
7175
def test_t5_config_load(self):
7276
logger.info("Download T5 Config from PaddleNLP BOS")
7377
t5_config = T5Config.from_pretrained("t5-small", from_hf_hub=False)

tests/transformers/load_subfolder/test_image_processor.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@
1616

1717
from paddlenlp.transformers import AutoImageProcessor, CLIPImageProcessor
1818
from paddlenlp.utils.log import logger
19+
from tests.testing_utils import slow
1920

2021

2122
class ImageProcessorLoadTester(unittest.TestCase):
23+
@slow
2224
def test_clip_load(self):
2325
logger.info("Download model from PaddleNLP BOS")
2426
clip_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32", from_hf_hub=False)

tests/transformers/load_subfolder/test_model.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
from paddlenlp.transformers import AutoModel, BertModel, CLIPTextModel, T5Model
2222
from paddlenlp.utils.log import logger
23+
from tests.testing_utils import slow
2324

2425

2526
class ModelLoadTester(unittest.TestCase):
@@ -58,6 +59,7 @@ def test_cache_dir(
5859
else:
5960
assert any(".pdparams" in f for f in file_list), "*.pdparams not in cache_dir"
6061

62+
@slow
6163
def test_bert_load(self):
6264
# BOS
6365
logger.info("Download model from PaddleNLP BOS")
@@ -194,6 +196,7 @@ def test_bert_load(self):
194196
use_safetensors=False,
195197
)
196198

199+
@slow
197200
def test_bert_load_safe(self):
198201
# BOS
199202
logger.info("Download model from PaddleNLP BOS")
@@ -320,6 +323,7 @@ def test_bert_load_safe(self):
320323
use_safetensors=True,
321324
)
322325

326+
@slow
323327
def test_clip_load(self):
324328
# BOS
325329
logger.info("Download model from PaddleNLP BOS")
@@ -466,6 +470,7 @@ def test_clip_load(self):
466470
use_safetensors=False,
467471
)
468472

473+
@slow
469474
def test_clip_load_safe(self):
470475
# BOS
471476
logger.info("Download model from PaddleNLP BOS")
@@ -608,6 +613,7 @@ def test_clip_load_safe(self):
608613
use_safetensors=True,
609614
)
610615

616+
@slow
611617
def test_t5_load(self):
612618
# BOS
613619
logger.info("Download model from PaddleNLP BOS")
@@ -726,6 +732,7 @@ def test_t5_load(self):
726732
use_safetensors=False,
727733
)
728734

735+
@slow
729736
def test_t5_load_safe(self):
730737
# BOS
731738
logger.info("Download model from PaddleNLP BOS")

tests/transformers/load_subfolder/test_processor.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717

1818
from paddlenlp.transformers import AutoProcessor, CLIPProcessor
1919
from paddlenlp.utils.log import logger
20+
from tests.testing_utils import slow
2021

2122

2223
class ProcessorLoadTester(unittest.TestCase):
24+
@slow
2325
def test_clip_load(self):
2426
logger.info("Download model from PaddleNLP BOS")
2527
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", from_hf_hub=False)

tests/transformers/load_subfolder/test_tokenizer.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,11 @@
2222
T5Tokenizer,
2323
)
2424
from paddlenlp.utils.log import logger
25+
from tests.testing_utils import slow
2526

2627

2728
class TokenizerLoadTester(unittest.TestCase):
29+
@slow
2830
def test_bert_load(self):
2931
logger.info("Download model from PaddleNLP BOS")
3032
bert_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", from_hf_hub=False)
@@ -57,6 +59,7 @@ def test_bert_load(self):
5759
"aistudio/paddlenlp-test-model", subfolder="bert-base-uncased", from_aistudio=True
5860
)
5961

62+
@slow
6063
def test_clip_load(self):
6164
logger.info("Download model from PaddleNLP BOS")
6265
clip_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32", from_hf_hub=False)
@@ -89,6 +92,7 @@ def test_clip_load(self):
8992
"aistudio/paddlenlp-test-model", subfolder="clip-vit-base-patch32", from_aistudio=True
9093
)
9194

95+
@slow
9296
def test_t5_load(self):
9397
logger.info("Download model from PaddleNLP BOS")
9498
t5_tokenizer = T5Tokenizer.from_pretrained("t5-small", from_hf_hub=False)

0 commit comments

Comments
 (0)