Commit 85236c3a authored by Raul Sirel's avatar Raul Sirel
Browse files

Merge branch 'initial_idea_for_worker'

parents c2a5fadb be856e41
Pipeline #4456 canceled with stages
......@@ -3,6 +3,7 @@ image: debian:buster
stages:
- test
- build
- build-worker
Test:
before_script:
......@@ -26,3 +27,14 @@ Build:
- twine upload dist/*
only:
- tags
BuildWorker:
stage: build-worker
tags:
- docker
script:
- docker login -u $CI_DEPLOY_USER -p $CI_DEPLOY_PASSWORD docker.texta.ee
- sh ./worker/build_and_push.sh
- docker system prune --volumes -f
only:
- tags
......@@ -19,3 +19,5 @@ dependencies:
- stanza==1.1.*
- regex
- phonenumberslite
- celery==4.*
- redis==3.*
......@@ -7,4 +7,6 @@ langdetect
lang-trans
regex
pytest
phonenumberslite
\ No newline at end of file
phonenumberslite
redis==3.*
celery==4.*
\ No newline at end of file
FROM debian:buster
RUN set -x \
&& apt-get update \
&& apt-get install -y --no-install-recommends python3-pip
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
RUN pip3 install http://pypi.texta.ee/texta-mlp/texta-mlp-latest.tar.gz
WORKDIR /var/texta-rest
ENTRYPOINT ["/var/texta-rest/docker/conf/entrypoint.sh"]
CMD ["supervisord", "-n"]
#!/bin/bash
# retrieve version from file
version_file="./VERSION"
version=$(cat "$version_file")
# build latest image
docker build --compress --force-rm --no-cache -t docker.texta.ee/texta/texta-mlp-python/mlp-worker:latest -f ./worker/cpu.Dockerfile ./worker
# build latest GPU image
#docker build --compress --force-rm --no-cache -t docker.texta.ee/texta/texta-mlp-python/mlp-worker:latest-gpu -f ./worker/gpu.Dockerfile ./worker
# tag version
#docker tag docker.texta.ee/texta/texta-mlp-python/mlp-worker:latest docker.texta.ee/texta/texta-mlp-python/mlp-worker:$version
#docker tag docker.texta.ee/texta/texta-mlp-python/mlp-worker:latest-gpu docker.texta.ee/texta/texta-mlp-python/mlp-worker:$version-gpu
# push version tag
#docker push docker.texta.ee/texta/texta-mlp-python/mlp-worker:$version
#docker push docker.texta.ee/texta/texta-mlp-python/mlp-worker:$version-gpu
# push latest tag
docker push docker.texta.ee/texta/texta-mlp-python/mlp-worker:latest
#docker push docker.texta.ee/texta/texta-mlp-python/mlp-worker:latest-gpu
FROM continuumio/miniconda3:latest
# create dir for MLP
RUN mkdir /var/texta-mlp
WORKDIR /var/texta-mlp
# install requirements
COPY ./environment-cpu.yaml ./environment.yaml
RUN conda env create -f environment.yaml \
# conda clean up
&& conda clean -afy \
&& find /opt/conda/ -follow -type f -name '*.a' -delete \
&& find /opt/conda/ -follow -type f -name '*.pyc' -delete \
&& find /opt/conda/ -follow -type f -name '*.js.map' -delete
# copy files
COPY ./supervisord.conf /opt/conda/envs/texta-mlp/etc/supervisord/conf.d/supervisord.conf
COPY ./entrypoint.sh ./entrypoint.sh
COPY ./settings.py ./settings.py
COPY ./taskman.py ./taskman.py
# create dir for data
RUN mkdir /var/texta-mlp/data
# Ownership to www-data and entrypoint
RUN chown -R www-data:www-data /var/texta-mlp \
&& chmod 775 -R /var/texta-mlp \
&& chmod +x /var/texta-mlp/entrypoint.sh \
&& rm -rf /root/.cache
ENTRYPOINT ["/var/texta-mlp/entrypoint.sh"]
CMD ["supervisord", "-n"]
version: '3'
services:
mlp-redis:
container_name: mlp-redis
image: redis:latest
ports:
- 6379
restart: always
texta-mlp-worker:
container_name: texta-mlp-worker
build:
context: .
dockerfile: ./cpu.Dockerfile
volumes:
- mlp-data:/var/texta-mlp/data
volumes:
mlp-data:
#!/bin/bash
# Set default for MLP workers
export TEXTA_MLP_TASK_WORKERS="${TEXTA_MLP_TASK_WORKERS:-4}"
# activate env
source activate texta-mlp
exec "$@"
name: texta-mlp
channels:
- conda-forge
dependencies:
- python=3.7
- pip
- supervisor
- lxml
- pip:
- texta-mlp
- celery==5.*
- redis==3.*
- torch==1.5.1+cpu
- -f https://download.pytorch.org/whl/torch_stable.html
name: texta-mlp
channels:
- conda-forge
dependencies:
- python=3.7
- pip
- supervisor
- lxml
- pip:
- texta-mlp
- celery==5.*
- redis==3.*
import os
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# env variables
MLP_WORKER_LANGUAGE_CODES = os.getenv("MLP_WORKER_LANGUAGE_CODES", "et").split(",")
MLP_WORKER_DEFAULT_LANGUAGE_CODE = os.getenv("MLP_WORKER_DEFAULT_LANGUAGE_CODES", "et")
MLP_WORKER_RESOURCE_DIR = os.getenv("MLP_WORKER_RESOURCE_DIR", "/var/data")
MLP_WORKER_BROKER = os.getenv("MLP_WORKER_BROKER", "redis://mlp-redis:6379/0")
MLP_WORKER_RESULT_BACKEND = os.getenv("MLP_WORKER_RESULT_BACKEND", "redis://mlp-redis:6379/0")
[supervisord]
nodaemon=true
user=root
[program:mlp]
command=celery -A taskman worker --concurrency=%(ENV_TEXTA_MLP_TASK_WORKERS)s -l info -O fair -Q mlp_queue -n mlp-worker
directory=/var/texta-mlp
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
user=www-data
from celery import shared_task
from celery import Celery
from typing import Optional
import logging
from texta_mlp.mlp import MLP
from settings import (
MLP_WORKER_LANGUAGE_CODES,
MLP_WORKER_DEFAULT_LANGUAGE_CODE,
MLP_WORKER_RESOURCE_DIR,
MLP_WORKER_BROKER,
MLP_WORKER_RESULT_BACKEND
)
# Create Celery app with proper conf
app = Celery("worker")
app.conf.broker_url = MLP_WORKER_BROKER
app.conf.result_backend = MLP_WORKER_RESULT_BACKEND
# start logging
logging.basicConfig(format='%(levelname)s %(asctime)s: %(message)s',
datefmt='%d.%m.%Y %H:%M:%S',
level=logging.INFO)
# Global MLP object for the worker so it won't get reloaded on each task
ml_processor: Optional[MLP] = None
def load_mlp():
global ml_processor
logging.info("Start loading MLP")
if ml_processor is None:
ml_processor = MLP(
language_codes=MLP_WORKER_LANGUAGE_CODES,
default_language_code=MLP_WORKER_DEFAULT_LANGUAGE_CODE,
resource_dir=MLP_WORKER_RESOURCE_DIR
)
logging.info("Successfully loaded MLP")
@shared_task
def mlp(document: list, field: list, analyzers: list):
try:
load_mlp()
processed = ml_processor.process_docs(docs=document, doc_paths=field, analyzers=analyzers)
return processed
except Exception as e:
return e
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment