diff --git a/README.md b/README.md index 3a9a513f537b85e9cb7fff3e04c2a9cc71f7491e..95fd7fbd0e79857cf25452263a10e3188c775857 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ # Fast Word2Vec TF -Fast implementation of Word2Vec in Tensorflow \ No newline at end of file +Fast implementation of Word2Vec in Tensorflow + +## Run Tests + +fast_TF_Word2Vec$python3 -m model.tests.test \ No newline at end of file diff --git a/model/__init__.py b/model/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..75ea5b5f5b82b48e6126dce45de834f3801ebc42 --- /dev/null +++ b/model/__init__.py @@ -0,0 +1 @@ +from .core.model import * diff --git a/model/__init__.pyc b/model/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..024dea7693f56e6e4bc83914ee18c7f66122b9bd Binary files /dev/null and b/model/__init__.pyc differ diff --git a/model/__pycache__/__init__.cpython-36.pyc b/model/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..72e614988bc90ee05f3030c6be35d0f8b92520a2 Binary files /dev/null and b/model/__pycache__/__init__.cpython-36.pyc differ diff --git a/model/__pycache__/model.cpython-36.pyc b/model/__pycache__/model.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..70cb1a535a5b83af06f766e53ed121fbcba364bf Binary files /dev/null and b/model/__pycache__/model.cpython-36.pyc differ diff --git a/model/core/__init__.py b/model/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/model/core/__init__.pyc b/model/core/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d86d42982ba96ebca2de06bf76f85c9af76a3b83 Binary files /dev/null and b/model/core/__init__.pyc differ diff --git a/model/core/__pycache__/__init__.cpython-36.pyc b/model/core/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8da1204f6d2635eb35d9383b064060984ffada56 Binary files /dev/null and b/model/core/__pycache__/__init__.cpython-36.pyc differ diff --git a/model/core/__pycache__/model.cpython-36.pyc b/model/core/__pycache__/model.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2dc435eec2dd294f2e0e4366f8e978c0321d6ca0 Binary files /dev/null and b/model/core/__pycache__/model.cpython-36.pyc differ diff --git a/model/core/model.py b/model/core/model.py new file mode 100644 index 0000000000000000000000000000000000000000..52a427f6d315cee6168e32f0ad240f9b12be736a --- /dev/null +++ b/model/core/model.py @@ -0,0 +1,17 @@ +import tensorflow as tf + +def logp(x,y,embedx,embedy,device='/device:GPU:0'): + with tf.device(device): + # embeded vectors: x is central and y is context + y_embed = tf.gather(embedy,y) # (N_BATCH,N_DIM) + x_embed = tf.gather(embedx,x) # (N_BATCH,N_DIM) + + # logprob of (y,x) and of (x,) + yx_logp = tf.squeeze( tf.matmul(tf.expand_dims(x_embed,1),tf.expand_dims(y_embed,-1)) ) # (N_BATCH,) + x_logp = tf.matmul(embedy,tf.transpose(x_embed)) # (N_WORDS,N_BATCH) + x_logp = tf.reduce_logsumexp(x_logp,axis=0) # (N_BATCH,) + + # logprob of (y|x) + y_x_logp = yx_logp - x_logp # (N_BATCH,1) + logp = tf.reduce_mean(y_x_logp) + return logp \ No newline at end of file diff --git a/model/core/model.pyc b/model/core/model.pyc new file mode 100644 index 0000000000000000000000000000000000000000..de0ddd5420191e8c94afa18c36f7414d2495c1da Binary files /dev/null and b/model/core/model.pyc differ diff --git a/model/tests/__init__.py b/model/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/model/tests/__pycache__/__init__.cpython-36.pyc b/model/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..236d8b15097e0683dfde8d527fee6099e06d8a7c Binary files /dev/null and b/model/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/model/tests/__pycache__/test.cpython-36.pyc b/model/tests/__pycache__/test.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0a59c167be369fcd57e7d9a7c586a2b897dabb58 Binary files /dev/null and b/model/tests/__pycache__/test.cpython-36.pyc differ diff --git a/model/tests/test.py b/model/tests/test.py new file mode 100644 index 0000000000000000000000000000000000000000..6f712dd72d95ee08cea446ca58aba4a642ee268d --- /dev/null +++ b/model/tests/test.py @@ -0,0 +1,10 @@ +import unittest +from ..core.model import * + +class TestTensorflowSetup(unittest.TestCase): + + def test_version2x(self): + self.assertTrue(tf.version.VERSION.startswith('2')) + +if __name__ == '__main__': + unittest.main() diff --git a/model/tests/test.pyc b/model/tests/test.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a4ebdb49c67f9f765b619cf5be7ab216d468df23 Binary files /dev/null and b/model/tests/test.pyc differ