Source code:
#!pip install tensorflow==2.0.0 %reset -f #libs import tensorflow as tf; import tensorflow_datasets as tfds; import logging; #build new encoder with new syllables------------------------------------------- def gen(): Syllables = ["foo","bar","bio","lo","gy","gist"]; for I in range(len(Syllables)): yield Syllables[I]; #end def Coder = tfds.features.text.SubwordTextEncoder.build_from_corpus(gen(),target_vocab_size=258) print("Syllables:",Coder.subwords); #string to vector String = "foo bar biologist tf"; Vector = Coder.encode(String); print("String:",String); print("To vector:",Vector); #vector back to string print("Back to words:",Coder.decode(Vector)); #use existing encoder----------------------------------------------------------- print("\nUse existing encoder..."); tf.get_logger().setLevel(logging.ERROR); Skept,Info = tfds.load("imdb_reviews/subwords8k",with_info=True); Coder = Info.features["text"].encoder; print("\nVocabulary size: {}".format(Coder.vocab_size)); print("String:",String); print("To vector:",Coder.encode("foo bar biologist tf")); print("\nDone."); #eof
No comments:
Post a Comment