2022-12-14

Cast string to int64 is not supported

I have problem using subclass keras Api. First I read data from TF record as following

train_ds = tf.data.TFRecordDataset(['./data.tfrecord'])
val_ds = tf.data.TFRecordDataset(['./data_validate.tfrecord'])
train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

feature_description = {
    'label': tf.io.FixedLenFeature([], tf.int64, default_value=0),
    'feature1': tf.io.FixedLenFeature([], tf.string, default_value=''),
    'feature2':tf.io.FixedLenFeature([], tf.string, default_value='')
    }

def _parse_function(example_proto):
  example = tf.io.parse_single_example(example_proto, feature_description)
  return {'feature1': example['feature1'], 'feature2': example['feature2']}, example['label']

train_ds = train_ds.map(_parse_function).batch(256)
val_ds = val_ds.map(_parse_function).batch(256)

Second

I generated my Model as following

class VanillaModel(tf.keras.Model):

    def __init__(self, number_of_classes):
        super(VanillaModel, self).__init__()
        self.number_of_classes=number_of_classes
        max_tokens = 100_000
        sequence_length = 1042
        embedding_dim = 64

        embedding = "https://tfhub.dev/google/nnlm-en-dim128-with-normalization/2"
        self.feature2_embedding = hub.KerasLayer(embedding, input_shape=[],
                           dtype=tf.string, name="embedding", trainable=True)

        def custom_standardization(input_data):
            lowercase = tf.strings.lower(input_data)
            stripped_html = tf.strings.regex_replace(
                lowercase, r'<[^<>]*>', ' ')

            return tf.strings.regex_replace(stripped_html,
                                            '[%s]' % re.escape(string.punctuation), '')

        self.feature1_embedding = tf.keras.Sequential([
            feature1_lookup,
            Embedding(max_tokens, output_dim= 32, mask_zero=True),
            GlobalAveragePooling1D()
        ])

        self.hidden_layer_1 = Dense(embedding_dim, activation='relu', name="hidden_layer_1")
        self.hidden_layer_2 = Dense(embedding_dim//2, activation='relu', name="hidden_layer_2")
        self.drop_out_1 = Dropout(rate=0.15, name='drop_out_1')
        self.hidden_layer_3 = Dense(embedding_dim//2, activation='relu',name="hidden_layer_3")
        self.output_layer = Dense(number_of_classes, activation='sigmoid', name="output_layer")

    def call(self, inputs):
        x = tf.concat([
            self.feature1_embedding(inputs["feature1"]),
            self.feature2_embedding(inputs["feature2"])
        ], axis=1)
        
        x = self.hidden_layer_1(x)
        x = self.hidden_layer_2(x)
        x = self.drop_out_1(x)
        x = self.hidden_layer_3(x)
        output=self.output_layer(x)
        return output

Finally

I call Model to train it as following and pass dataset to model.fit:

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="logs")
steps_per_epoch = 85  
model = ProblemTypeModel(number_of_classes)                         
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(
                  from_logits=False),
              metrics=['accuracy'])
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=30,
    steps_per_epoch=steps_per_epoch,
    callbacks=[tensorboard_callback]) 

however I receive this Error Cast string to int64 is not supported

"name": "UnimplementedError",
    "message": "Graph execution error:\n\nDetected at node 'vanilla_model_22/sequential_45/Cast' defined at (most recent call last):\n    
    File \"c:\\Users\\fakeuser\\AppData\\Local\\Programs\\Python\\Python310\\lib\\runpy.py\", line 196, in _run_module_as_main\n      
    return _run_code(code, main_globals, None,\n    
    File \"c:\\Users\\fakeuser\\AppData\\Local\\Programs\\Python\\Python310\\lib\\runpy.py\", line 86, in _run_code\n      exec(code, run_globals)\n    
    File \"c:\\Users\\fakeuser\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\ipykernel_launcher.py\", line 17, in <module>\n      app.launch_new_instance()\n    
    File \"c:\\Users\\fakeuser\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\traitlets\\config\\application.py\", line 965, in launch_instance\n      app.start()\n    
    File \"c:\\Users\\fakeuser\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\ipykernel\\kernelapp.py\", line 712, in start\n      self.io_loop.start()\n    
    File \"c:\\Users\\fakeuser\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\tornado\\platform\\asyncio.py\", line 199, in start\n      
    self.asyncio_loop.run_forever()
.
.
.
.
tensor = tf.cast(tensor, dtype=ref_input.dtype)\nNode: 'problem_type_model_22/sequential_45/Cast'\nCast string to int64 is not supported\n\t [[]] [Op:__inference_train_function_32172]

my understanding that need to parse my dataset to tuple of features, label which that what i am doing in _parse_function, also when i added print statement in cell function o found out that it works just fine for first two examples and it errors out after that. i don't know why tensorflow throw Cast string to int64 is not supported exception. i would appreciate your all help if you could spot where is error in my code. thank you in advance to take time to look at my post.

tensorflow version: 2.8.3



No comments:

Post a Comment