Skip to content
This repository was archived by the owner on Sep 25, 2025. It is now read-only.
This repository was archived by the owner on Sep 25, 2025. It is now read-only.

Error: Trying to access flag --preserve_unused_tokens before flags were parsed  #1133

@rv-ltran

Description

@rv-ltran

I have been using the following code fine until this morning. I got an error for using bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)

Please let me know how to fix it

import pandas as pd
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization
from tensorflow.contrib import predictor
import pkg_resources
pkg_resources.get_distribution("bert-tensorflow").version


input_words = "Hello"

DATA_COLUMN = "message"
LABEL_COLUMN = "category_label"


test = pd.DataFrame({DATA_COLUMN: [input_words], LABEL_COLUMN : [0]})

BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

def create_tokenizer_from_hub_module():
  """Get the vocab file and casing info from the Hub module."""
  with tf.Graph().as_default():
    bert_module = hub.Module(BERT_MODEL_HUB)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    with tf.Session() as sess:
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])

  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                               text_a = x[DATA_COLUMN], 
                                                               text_b = None, 
                                                               label = x[LABEL_COLUMN]), axis = 1)

# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128
label_list = [6,1,2,4,3,5,0]
# Convert our test features to InputFeatures that BERT understands.
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)


Error:

INFO:tensorflow:Writing example 0 of 1
INFO:tensorflow:Writing example 0 of 1
UnparsedFlagAccessError: Trying to access flag --preserve_unused_tokens before flags were parsed.
---------------------------------------------------------------------------
UnparsedFlagAccessError                   Traceback (most recent call last)
<command-35675914> in <module>
     16 label_list = [6,1,2,4,3,5,0]
     17 # Convert our test features to InputFeatures that BERT understands.
---> 18 test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
     19 
     20 input_ids_list = [x.input_ids for x in test_features]

/databricks/python/lib/python3.7/site-packages/bert/run_classifier.py in convert_examples_to_features(examples, label_list, max_seq_length, tokenizer)
    778 
    779     feature = convert_single_example(ex_index, example, label_list,
--> 780                                      max_seq_length, tokenizer)
    781 
    782     features.append(feature)

/databricks/python/lib/python3.7/site-packages/bert/run_classifier.py in convert_single_example(ex_index, example, label_list, max_seq_length, tokenizer)
    394     label_map[label] = i
    395 
--> 396   tokens_a = tokenizer.tokenize(example.text_a)
    397   tokens_b = None

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions