Skip to content

Commit 03b3f8e

Browse files
authored
Merge pull request #27 from jstammers/update/custom-embeddings
Enable Custom Word and Sentence Embedding Models
2 parents 55ae3db + 98dfd8f commit 03b3f8e

4 files changed

Lines changed: 192 additions & 106 deletions

File tree

docs/tutorials/pyTorchWorkflow.ipynb

Lines changed: 34 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,10 @@
124124
"- pyTorch Sentence transformers : `{'smpnet','st5','sdistilroberta','sminilm','sent_glove'}`\n",
125125
"- pyTorch Word transformers :`{'bert', 'distilbert', 'roberta', 'xlnet', 'albert'}`\n",
126126
"\n",
127+
"Custom Word or Sentence embedding models can be specified using a file path or HuggingFace identifier and a corresponding argument to `emb.build_blocks`\n",
128+
"- Custom Sentence transformers: `vectorizer='model_name'` and `emb.build_blocks(..., custom_pretrained_model='sentence')`\n",
129+
"- Custom Word transformers: `vectorizer='model_name'` and `emb.build_blocks(..., custom_pretrained_model='word')`\n",
130+
"\n",
127131
"## FAISS\n",
128132
"\n",
129133
"faiss.IndexIVFFlat is an implementation of an inverted file index with coarse quantization. This index is used to efficiently search for nearest neighbors of a query vector in a large dataset of vectors. Here's a brief explanation of the parameters used in this index:\n"
@@ -11542,7 +11546,7 @@
1154211546
0.041863806545734406,
1154311547
-0.19797062873840332,
1154411548
0.08422613888978958,
11545-
-0.0004040927451569587,
11549+
-4.040927451569587E-4,
1154611550
0.10142297297716141,
1154711551
-0.1592729687690735,
1154811552
-0.13892339169979095,
@@ -12278,7 +12282,7 @@
1227812282
0.01203717477619648,
1227912283
0.03730224072933197,
1228012284
0.00999721884727478,
12281-
-0.0000967714368016459,
12285+
-9.67714368016459E-5,
1228212286
-0.0012333603808656335,
1228312287
0.0049127815291285515,
1228412288
-0.042411092668771744,
@@ -12326,7 +12330,7 @@
1232612330
-0.1373370885848999,
1232712331
-0.09738717973232269,
1232812332
-0.08944021910429001,
12329-
-0.000026221718144370243,
12333+
-2.6221718144370243E-5,
1233012334
0.1785321682691574,
1233112335
-0.14823342859745026,
1233212336
0.29911166429519653,
@@ -12496,7 +12500,7 @@
1249612500
-0.15336649119853973,
1249712501
-0.17148637771606445,
1249812502
-0.04576775059103966,
12499-
-0.0002886982692871243,
12503+
-2.886982692871243E-4,
1250012504
0.05859009176492691,
1250112505
0.02274763211607933,
1250212506
-0.21441605687141418,
@@ -12794,7 +12798,7 @@
1279412798
0.020745988935232162,
1279512799
-0.3763636648654938,
1279612800
-0.39480143785476685,
12797-
0.0006196317262947559,
12801+
6.196317262947559E-4,
1279812802
-0.3080458641052246,
1279912803
0.05050341784954071,
1280012804
0.04591885954141617,
@@ -14196,7 +14200,7 @@
1419614200
-0.23650690913200378,
1419714201
-0.2087535709142685,
1419814202
-0.2256527990102768,
14199-
-0.0006704668630845845,
14203+
-6.704668630845845E-4,
1420014204
-0.23260481655597687,
1420114205
0.059237588196992874,
1420214206
0.05719619616866112,
@@ -14286,7 +14290,7 @@
1428614290
0.10674760490655899,
1428714291
-0.058976732194423676,
1428814292
0.09496849775314331,
14289-
0.00004359266677056439,
14293+
4.359266677056439E-5,
1429014294
-0.22975105047225952,
1429114295
-0.27638518810272217,
1429214296
0.09805252403020859,
@@ -14691,7 +14695,7 @@
1469114695
-0.23680473864078522,
1469214696
-0.07355748116970062,
1469314697
-0.2165745049715042,
14694-
0.0005726073868572712,
14698+
5.726073868572712E-4,
1469514699
-0.07698024809360504,
1469614700
0.054467570036649704,
1469714701
0.3693055808544159,
@@ -14762,7 +14766,7 @@
1476214766
-0.034802526235580444,
1476314767
0.12485354393720627,
1476414768
0.20356221497058868,
14765-
-0.0005281756748445332,
14769+
-5.281756748445332E-4,
1476614770
0.09996781498193741,
1476714771
0.067182257771492,
1476814772
-0.2062978595495224,
@@ -14936,7 +14940,7 @@
1493614940
0.32058626413345337,
1493714941
-0.3016047179698944,
1493814942
-0.08911110460758209,
14939-
-0.0007734508835710585,
14943+
-7.734508835710585E-4,
1494014944
-0.07933899015188217,
1494114945
-0.29577797651290894,
1494214946
-0.3050692677497864,
@@ -15181,7 +15185,7 @@
1518115185
-0.2573504149913788,
1518215186
0.03609131649136543,
1518315187
-0.0062813530676066875,
15184-
0.00045815910561941564,
15188+
4.5815910561941564E-4,
1518515189
0.14266984164714813,
1518615190
0.10829097777605057,
1518715191
0.13612718880176544,
@@ -15607,7 +15611,7 @@
1560715611
-0.28111472725868225,
1560815612
0.20452271401882172,
1560915613
0.08420056104660034,
15610-
0.0007678090478293598,
15614+
7.678090478293598E-4,
1561115615
-0.20812328159809113,
1561215616
-0.3826219141483307,
1561315617
-0.05620969459414482,
@@ -43579,7 +43583,7 @@
4357943583
0.04186234250664711,
4358043584
-0.1979692131280899,
4358143585
0.08422474563121796,
43582-
-0.00040560279740020633,
43586+
-4.0560279740020633E-4,
4358343587
0.101422019302845,
4358443588
-0.15927384793758392,
4358543589
-0.1389235258102417,
@@ -44315,7 +44319,7 @@
4431544319
0.012037341482937336,
4431644320
0.03730267286300659,
4431744321
0.009998459368944168,
44318-
-0.00009480538574280217,
44322+
-9.480538574280217E-5,
4431944323
-0.0012314494233578444,
4432044324
0.0049147047102451324,
4432144325
-0.042409781366586685,
@@ -44363,7 +44367,7 @@
4436344367
-0.13733446598052979,
4436444368
-0.09738475829362869,
4436544369
-0.08943670243024826,
44366-
-0.00002462582051521167,
44370+
-2.462582051521167E-5,
4436744371
0.1785307079553604,
4436844372
-0.14823150634765625,
4436944373
0.2991122305393219,
@@ -44533,7 +44537,7 @@
4453344537
-0.15336599946022034,
4453444538
-0.1714865267276764,
4453544539
-0.045769669115543365,
44536-
-0.0002892519987653941,
44540+
-2.892519987653941E-4,
4453744541
0.058590117841959,
4453844542
0.022746196016669273,
4453944543
-0.21441666781902313,
@@ -44831,7 +44835,7 @@
4483144835
0.02074439823627472,
4483244836
-0.37636175751686096,
4483344837
-0.39480140805244446,
44834-
0.0006176820024847984,
44838+
6.176820024847984E-4,
4483544839
-0.3080475628376007,
4483644840
0.050506748259067535,
4483744841
0.04591672495007515,
@@ -45800,7 +45804,7 @@
4580045804
0.38535815477371216,
4580145805
0.07165347784757614,
4580245806
0.2231462001800537,
45803-
0.0009984212229028344,
45807+
9.984212229028344E-4,
4580445808
0.2918051481246948,
4580545809
0.28693827986717224,
4580645810
-0.09961967915296555,
@@ -46232,7 +46236,7 @@
4623246236
-0.23650597035884857,
4623346237
-0.20875486731529236,
4623446238
-0.22565750777721405,
46235-
-0.0006674157339148223,
46239+
-6.674157339148223E-4,
4623646240
-0.23261016607284546,
4623746241
0.05923283472657204,
4623846242
0.05719119682908058,
@@ -46322,7 +46326,7 @@
4632246326
0.10675148665904999,
4632346327
-0.058976758271455765,
4632446328
0.09496378153562546,
46325-
0.00003729849049705081,
46329+
3.729849049705081E-5,
4632646330
-0.22975380718708038,
4632746331
-0.2763764262199402,
4632846332
0.09804049134254456,
@@ -46727,7 +46731,7 @@
4672746731
-0.2367977499961853,
4672846732
-0.07355692982673645,
4672946733
-0.2165636569261551,
46730-
0.0005692781996913254,
46734+
5.692781996913254E-4,
4673146735
-0.07698062807321548,
4673246736
0.054483864456415176,
4673346737
0.369301974773407,
@@ -46798,7 +46802,7 @@
4679846802
-0.03479684516787529,
4679946803
0.12484855204820633,
4680046804
0.2035500407218933,
46801-
-0.0005334240850061178,
46805+
-5.334240850061178E-4,
4680246806
0.09996215999126434,
4680346807
0.06717655062675476,
4680446808
-0.20629936456680298,
@@ -46972,7 +46976,7 @@
4697246976
0.3205871284008026,
4697346977
-0.30160635709762573,
4697446978
-0.08910681307315826,
46975-
-0.0007655520457774401,
46979+
-7.655520457774401E-4,
4697646980
-0.07933466136455536,
4697746981
-0.2957795262336731,
4697846982
-0.30506977438926697,
@@ -47217,7 +47221,7 @@
4721747221
-0.25735044479370117,
4721847222
0.03609400615096092,
4721947223
-0.00628046365454793,
47220-
0.0004585284332279116,
47224+
4.585284332279116E-4,
4722147225
0.14267142117023468,
4722247226
0.10829322040081024,
4722347227
0.13612817227840424,
@@ -47643,7 +47647,7 @@
4764347647
-0.2811163365840912,
4764447648
0.20452405512332916,
4764547649
0.08419119566679001,
47646-
0.0007738231215626001,
47650+
7.738231215626001E-4,
4764747651
-0.20812755823135376,
4764847652
-0.3826282024383545,
4764947653
-0.05621104687452316,
@@ -47889,7 +47893,7 @@
4788947893
-0.19475585222244263,
4789047894
-0.28821855783462524,
4789147895
-0.295050710439682,
47892-
-0.00009316201612818986,
47896+
-9.316201612818986E-5,
4789347897
0.11140735447406769,
4789447898
0.09493596851825714,
4789547899
-0.051914915442466736,
@@ -48251,7 +48255,7 @@
4825148255
0.2557325065135956,
4825248256
0.29231905937194824,
4825348257
-0.15225599706172943,
48254-
-0.0006214584573172033,
48258+
-6.214584573172033E-4,
4825548259
0.04559261351823807,
4825648260
0.37871798872947693,
4825748261
0.2513127326965332,
@@ -48793,7 +48797,7 @@
4879348797
-0.08106150478124619,
4879448798
-0.0757635086774826,
4879548799
-0.0913897454738617,
48796-
-0.000560691230930388,
48800+
-5.60691230930388E-4,
4879748801
-0.22720053791999817,
4879848802
0.21542513370513916,
4879948803
-0.05549247935414314,
@@ -48890,7 +48894,7 @@
4889048894
-0.17256703972816467,
4889148895
-0.22397547960281372,
4889248896
-0.21700340509414673,
48893-
0.0004682873550336808,
48897+
4.682873550336808E-4,
4889448898
0.15235066413879395,
4889548899
-0.20267672836780548,
4889648900
0.16648273169994354,
@@ -49655,7 +49659,7 @@
4965549659
0.12497597932815552,
4965649660
-0.09104467183351517,
4965749661
0.21928001940250397,
49658-
0.00009569602116243914,
49662+
9.569602116243914E-5,
4965949663
-0.0020389629062265158,
4966049664
-0.16811877489089966,
4966149665
0.017217257991433144,

0 commit comments

Comments
 (0)