This repository was archived by the owner on Sep 25, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 9.7k
This repository was archived by the owner on Sep 25, 2025. It is now read-only.
Error: Trying to access flag --preserve_unused_tokens before flags were parsed #1133
Copy link
Copy link
Open
Description
I have been using the following code fine until this morning. I got an error for using bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
Please let me know how to fix it
import pandas as pd
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization
from tensorflow.contrib import predictor
import pkg_resources
pkg_resources.get_distribution("bert-tensorflow").version
input_words = "Hello"
DATA_COLUMN = "message"
LABEL_COLUMN = "category_label"
test = pd.DataFrame({DATA_COLUMN: [input_words], LABEL_COLUMN : [0]})
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"
def create_tokenizer_from_hub_module():
"""Get the vocab file and casing info from the Hub module."""
with tf.Graph().as_default():
bert_module = hub.Module(BERT_MODEL_HUB)
tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
with tf.Session() as sess:
vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
tokenization_info["do_lower_case"]])
return bert.tokenization.FullTokenizer(
vocab_file=vocab_file, do_lower_case=do_lower_case)
tokenizer = create_tokenizer_from_hub_module()
test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None,
text_a = x[DATA_COLUMN],
text_b = None,
label = x[LABEL_COLUMN]), axis = 1)
# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128
label_list = [6,1,2,4,3,5,0]
# Convert our test features to InputFeatures that BERT understands.
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
Error:
INFO:tensorflow:Writing example 0 of 1
INFO:tensorflow:Writing example 0 of 1
UnparsedFlagAccessError: Trying to access flag --preserve_unused_tokens before flags were parsed.
---------------------------------------------------------------------------
UnparsedFlagAccessError Traceback (most recent call last)
<command-35675914> in <module>
16 label_list = [6,1,2,4,3,5,0]
17 # Convert our test features to InputFeatures that BERT understands.
---> 18 test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
19
20 input_ids_list = [x.input_ids for x in test_features]
/databricks/python/lib/python3.7/site-packages/bert/run_classifier.py in convert_examples_to_features(examples, label_list, max_seq_length, tokenizer)
778
779 feature = convert_single_example(ex_index, example, label_list,
--> 780 max_seq_length, tokenizer)
781
782 features.append(feature)
/databricks/python/lib/python3.7/site-packages/bert/run_classifier.py in convert_single_example(ex_index, example, label_list, max_seq_length, tokenizer)
394 label_map[label] = i
395
--> 396 tokens_a = tokenizer.tokenize(example.text_a)
397 tokens_b = None
Metadata
Metadata
Assignees
Labels
No labels