Skip to content

Commit 51c8924

Browse files
ramir1djklim87
andauthored
Installing the Ukrainian Lettimizer (#121)
* Installing the Ukrainian Lettimizer --------- Co-authored-by: djklim87 <klim@manticoresearch.com>
1 parent 96be981 commit 51c8924

2 files changed

Lines changed: 56 additions & 3 deletions

File tree

Dockerfile

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ RUN groupadd -r manticore && useradd -r -g manticore manticore
1010

1111
ENV GOSU_VERSION 1.11
1212

13-
ENV DAEMON_URL=${DAEMON_URL:-"https://repo.manticoresearch.com/repository/manticoresearch_jammy/dists/jammy/main/binary-_ARCH_64/manticore-server_13.13.0-25100704-e5465fe44__ARCH_64.deb \
13+
ENV DAEMON_URL ${DAEMON_URL:-"https://repo.manticoresearch.com/repository/manticoresearch_jammy/dists/jammy/main/binary-_ARCH_64/manticore-server_13.13.0-25100704-e5465fe44__ARCH_64.deb \
1414
https://repo.manticoresearch.com/repository/manticoresearch_jammy/dists/jammy/main/binary-_ARCH_64/manticore-server-core_13.13.0-25100704-e5465fe44__ARCH_64.deb \
1515
https://repo.manticoresearch.com/repository/manticoresearch_jammy/dists/jammy/main/binary-_ARCH_64/manticore-backup_1.9.6+25070510-5247d066_all.deb \
1616
https://repo.manticoresearch.com/repository/manticoresearch_jammy/dists/jammy/main/binary-_ARCH_64/manticore-buddy_3.35.1+25090418-41d9811f_all.deb \
@@ -70,7 +70,7 @@ RUN if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then export ARCH="arm"; else expo
7070
&& wget -q https://repo.manticoresearch.com/manticore-dev-repo.noarch.deb \
7171
&& dpkg -i manticore-dev-repo.noarch.deb \
7272
&& apt-key adv --fetch-keys 'https://repo.manticoresearch.com/GPG-KEY-manticore' && apt-get -y update \
73-
&& apt-get -y install manticore manticore-extra manticore-load manticore-language-packs;\
73+
&& apt-get -y install manticore manticore-extra manticore-load manticore-lemmatizer-uk manticore-language-packs;\
7474
elif [ ! -z "$DAEMON_URL" ]; then \
7575
echo "2nd step of building release image for linux/${ARCH}64 architecture" \
7676
&& echo "ARCH: ${ARCH}" \
@@ -98,6 +98,15 @@ RUN if [ -d "/packages/" ]; then apt -y install /packages/*deb; fi \
9898
&& tar -xf /tmp/ru.pak.tgz -C /usr/share/manticore/ \
9999
&& rm /tmp/*.pak.tgz
100100

101+
# Installing the Ukrainian Lemmatizer using the working Jammy approach
102+
RUN apt-get update && apt-get install -y software-properties-common curl && \
103+
add-apt-repository -y ppa:deadsnakes/ppa && \
104+
apt-get update && \
105+
apt-get install -y python3.9 python3.9-dev python3.9-distutils && \
106+
curl https://bootstrap.pypa.io/get-pip.py | python3.9 && \
107+
python3.9 -m pip install pymorphy2 pymorphy2-dicts-uk && \
108+
apt-get clean && rm -rf /var/lib/apt/lists/*
109+
101110
COPY manticore.conf.sh /etc/manticoresearch/
102111
RUN sed -i '/log = \/var\/log\/manticore\/searchd.log/d;/query_log = \/var\/log\/manticore\/query.log/d' /etc/manticoresearch/manticore.conf
103112
RUN md5sum /etc/manticoresearch/manticore.conf | awk '{print $1}' > /manticore.conf.md5
@@ -122,7 +131,8 @@ EXPOSE 9308
122131
EXPOSE 9312
123132
ENV LANG C.UTF-8
124133
ENV LC_ALL C.UTF-8
125-
ENV MANTICORE_CONFIG="/etc/manticoresearch/manticore.conf.sh|/etc/manticoresearch/manticore.conf"
134+
ENV PYTHONWARNINGS "ignore::UserWarning:pymorphy2.analyzer"
135+
ENV MANTICORE_CONFIG "/etc/manticoresearch/manticore.conf.sh|/etc/manticoresearch/manticore.conf"
126136
CMD ["searchd", "-c", "/etc/manticoresearch/manticore.conf.sh", "--nodetach"]
127137

128138
# How to build manually:
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
––– block: ./base/init –––
2+
––– input –––
3+
docker run -d --name manticore manticoresoftware/manticore:current
4+
––– output –––
5+
#!/[0-9a-z]+/!#
6+
––– input –––
7+
if timeout 5 grep -qm1 'accepting connections' <(docker logs -f manticore); then echo 'accepting connections'; else echo 'Manticore failed to start properly in 10 seconds'; fi
8+
––– output –––
9+
accepting connections
10+
––– input –––
11+
docker exec manticore mysql -h0 -P9306 -e "CREATE TABLE test_uk (id bigint, content text) rt_mem_limit = '256M' morphology = 'lemmatize_uk' charset_table = '0..9, A..Z->a..z, _, a..z, U+0410..U+042F->U+0430..U+044F, U+0430..U+044F, U+0454, U+0456, U+0457, U+0491';"
12+
––– output –––
13+
––– input –––
14+
docker exec manticore mysql -h0 -P9306 -e "INSERT INTO test_uk (id, content) VALUES (1, 'бігаю'), (2, 'муркотіти'), (3, 'їжа');"
15+
––– output –––
16+
––– input –––
17+
docker exec manticore mysql -h0 -P9306 -e "CALL KEYWORDS('бігаю', 'test_uk');"
18+
––– output –––
19+
+------+------------+--------------+
20+
| qpos | tokenized | normalized |
21+
+------+------------+--------------+
22+
| 1 | бігаю | бігати |
23+
+------+------------+--------------+
24+
––– input –––
25+
docker exec manticore mysql -h0 -P9306 -e "CALL KEYWORDS('їжа', 'test_uk');"
26+
––– output –––
27+
+------+-----------+------------+
28+
| qpos | tokenized | normalized |
29+
+------+-----------+------------+
30+
| 1 | їжа | їжа |
31+
+------+-----------+------------+
32+
––– input –––
33+
docker exec manticore mysql -h0 -P9306 -e "SELECT * FROM test_uk WHERE MATCH('бігати');"
34+
––– output –––
35+
+------+------------+
36+
| id | content |
37+
+------+------------+
38+
| 1 | бігаю |
39+
+------+------------+
40+
––– input –––
41+
docker stop manticore
42+
––– output –––
43+
#!/[0-9a-z]+/!#

0 commit comments

Comments
 (0)