Skip to content

Commit 3201fcc

Browse files
committed
fixed toml for pyjedai to work with colab GENSIM PROBLEM
1 parent 19abcec commit 3201fcc

2 files changed

Lines changed: 23 additions & 8 deletions

File tree

pyproject.toml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
66

77
[project]
88
name = "pyjedai"
9-
version = "0.3.2"
9+
version = "0.3.3"
1010
description = "An open-source library that builds powerful end-to-end Entity Resolution workflows."
1111
readme = "README.md"
1212
authors = [
@@ -40,13 +40,12 @@ classifiers = [
4040
keywords = ["deduplication", "entity-resolution", "link-discovery"]
4141
requires-python = ">=3.9"
4242
dependencies = [
43-
"gensim",
4443
"matplotlib",
4544
"networkx",
4645
"nltk",
47-
"numpy >= 1.7.0,<2.0",
46+
"numpy<3.0",
4847
"pandas",
49-
"scipy==1.12",
48+
"scipy",
5049
"seaborn",
5150
"tqdm",
5251
"transformers",
@@ -59,8 +58,13 @@ dependencies = [
5958
"ollama"
6059
]
6160

61+
6262
[project.optional-dependencies]
6363
dev = ["pip-tools", "pytest"]
64+
with-gensim = [
65+
"numpy>=1.7, < 2.0",
66+
"gensim",
67+
"scipy==1.12"]
6468

6569
[project.urls]
6670
"Homepage" = "http://pyjedai.rtfd.io"

src/pyjedai/vector_based_blocking.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,13 @@
1515
import platform
1616
RUNNING_OS = platform.system()
1717

18-
import gensim.downloader as api
18+
try:
19+
import gensim.downloader as api
20+
HAS_GENSIM = True
21+
except ImportError:
22+
HAS_GENSIM = False
23+
24+
1925
import networkx as nx
2026
import numpy as np
2127
import torch
@@ -60,11 +66,14 @@ class EmbeddingsNNBlockBuilding(PYJEDAIFeature):
6066
_method_info = "Creates a set of candidate pais for every entity id " + \
6167
"based on Embeddings creariot and Similarity search among the vectors."
6268

69+
6370
_gensim_mapping_download = {
6471
'fasttext' : 'fasttext-wiki-news-subwords-300',
6572
'glove' : 'glove-wiki-gigaword-300',
6673
'word2vec' : 'word2vec-google-news-300'
67-
}
74+
75+
} if HAS_GENSIM else {}
76+
6877
_sentence_transformer_mapping = {
6978
'smpnet' : 'all-mpnet-base-v2',
7079
'st5' : 'gtr-t5-large',
@@ -303,9 +312,11 @@ def _create_gensim_embeddings(self) -> Tuple[np.array, np.array]:
303312
Returns:
304313
Tuple[np.array, np.array]: Embeddings from D1 and D2
305314
"""
306-
vectors_1 = []
315+
if not HAS_GENSIM :
316+
raise ImportError("Reinstall pyjedai with pip install pyjedai[with-gensim] or use other vectorizer")
317+
307318
vocabulary = api.load(self._gensim_mapping_download[self.vectorizer])
308-
319+
309320
if not self._d1_loaded:
310321
for e1 in self._entities_d1:
311322
vectors_1.append(self._create_vector(e1, vocabulary))

0 commit comments

Comments
 (0)