From c185e5e7683e80a704faa8de51d9de3534d2eb9b Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Sat, 5 Aug 2023 21:27:08 -0400 Subject: [PATCH 01/40] update CI --- .github/workflows/build.yaml | 243 ++-------------------------- .github/workflows/client-tests.yaml | 25 --- .github/workflows/load_test.yaml | 108 ------------- 3 files changed, 15 insertions(+), 361 deletions(-) delete mode 100644 .github/workflows/client-tests.yaml delete mode 100644 .github/workflows/load_test.yaml diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 124e6a33ee0..f1b7012b3bb 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -1,246 +1,33 @@ -name: Build and push docker image to internal registry +name: Build and push docker image to github registry on: - workflow_dispatch: push: branches: - - 'main' - tags: - - 'v*' - pull_request: - paths: - - ".github/workflows/build.yaml" - - "integration-tests/**" - - "server/**" - - "proto/**" - - "router/**" - - "launcher/**" - - "Cargo.lock" - - "rust-toolchain.toml" - - "Dockerfile" - branches: - - 'main' + - main jobs: - start-runner: - name: Start self-hosted EC2 runner + build: + name: Build Images runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - EC2_AMI_ID: ami-03cfed9ea28f4b002 - EC2_INSTANCE_TYPE: g5.12xlarge - EC2_SUBNET_ID: subnet-931b34f5,subnet-ecb993cd,subnet-943dc2d8,subnet-45371f1a,subnet-ee93e0df,subnet-fddc3dfc - EC2_SECURITY_GROUP: sg-030175c435ac141d6 - outputs: - label: ${{ steps.start-ec2-runner.outputs.label }} - ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Start EC2 runner - id: start-ec2-runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: start - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - ec2-image-id: ${{ env.EC2_AMI_ID }} - ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }} - subnet-id: ${{ env.EC2_SUBNET_ID }} - security-group-id: ${{ env.EC2_SECURITY_GROUP }} - aws-resource-tags: > # optional, requires additional permissions - [ - {"Key": "Name", "Value": "ec2-tgi-github-runner"}, - {"Key": "GitHubRepository", "Value": "${{ github.repository }}"} - ] - - build-and-push-image: - concurrency: - group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - needs: start-runner # required to start the main job when the runner is ready - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner permissions: - contents: write + contents: read packages: write - # This is used to complete the identity challenge - # with sigstore/fulcio when running outside of PRs. - id-token: write - security-events: write steps: - - name: Checkout repository - uses: actions/checkout@v3 - - name: Initialize Docker Buildx - uses: docker/setup-buildx-action@v2.0.0 - with: - install: true - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4.4.1 - - name: Install cosign - if: github.event_name != 'pull_request' - uses: sigstore/cosign-installer@f3c664df7af409cb4873aa5068053ba9d61a57b6 #v2.6.0 - with: - cosign-release: 'v1.13.1' - - name: Tailscale - uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966 - with: - authkey: ${{ secrets.TAILSCALE_AUTHKEY }} - - name: Login to GitHub Container Registry - if: github.event_name != 'pull_request' - uses: docker/login-action@v2 + - uses: actions/checkout@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2.2.1 + - name: Login to container registry + uses: docker/login-action@v2.1.0 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Login to internal Container Registry - uses: docker/login-action@v2.1.0 - with: - username: ${{ secrets.TAILSCALE_DOCKER_USERNAME }} - password: ${{ secrets.TAILSCALE_DOCKER_PASSWORD }} - registry: registry.internal.huggingface.tech - - name: Login to Azure Container Registry - if: github.event_name != 'pull_request' - uses: docker/login-action@v2.1.0 - with: - username: ${{ secrets.AZURE_DOCKER_USERNAME }} - password: ${{ secrets.AZURE_DOCKER_PASSWORD }} - registry: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io - # If pull request - - name: Extract metadata (tags, labels) for Docker - if: ${{ github.event_name == 'pull_request' }} - id: meta-pr - uses: docker/metadata-action@v4.3.0 - with: - images: | - registry.internal.huggingface.tech/api-inference/community/text-generation-inference - tags: | - type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }} - # If main, release or tag - - name: Extract metadata (tags, labels) for Docker - if: ${{ github.event_name != 'pull_request' }} - id: meta - uses: docker/metadata-action@v4.3.0 - with: - flavor: | - latest=auto - images: | - registry.internal.huggingface.tech/api-inference/community/text-generation-inference - ghcr.io/huggingface/text-generation-inference - db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference - tags: | - type=semver,pattern={{version}} - type=semver,pattern={{major}}.{{minor}} - type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }} - type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }} - name: Build and push Docker image - id: build-and-push - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v3.2.0 with: context: . - file: Dockerfile push: true - platforms: 'linux/amd64' - build-args: | - GIT_SHA=${{ env.GITHUB_SHA }} - DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} - tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }} - labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }} - cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min - cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min - # Sign the resulting Docker image digest except on PRs. - # This will only write to the public Rekor transparency log when the Docker - # repository is public to avoid leaking data. - - name: Sign the published Docker image - if: ${{ github.event_name != 'pull_request' }} - env: - COSIGN_EXPERIMENTAL: "true" - # This step uses the identity token to provision an ephemeral certificate - # against the sigstore community Fulcio instance. - run: echo "${{ steps.meta.outputs.tags }}" | xargs -I {} cosign sign {}@${{ steps.build-and-push.outputs.digest }} - - name: Run Trivy in GitHub SBOM mode and submit results to Dependency Graph - uses: aquasecurity/trivy-action@master - if: ${{ github.event_name != 'pull_request' }} - with: - image-ref: 'ghcr.io/huggingface/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}' - format: 'github' - output: 'dependency-results.sbom.json' - github-pat: ${{ secrets.GITHUB_TOKEN }} - scanners: 'vuln' - - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@master - if: ${{ github.event_name != 'pull_request' }} - with: - image-ref: 'ghcr.io/huggingface/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}' - format: 'sarif' - output: 'trivy-results.sarif' - severity: 'CRITICAL' - scanners: 'vuln' - - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 - if: ${{ github.event_name != 'pull_request' }} - with: - sarif_file: 'trivy-results.sarif' - - integration-tests: - concurrency: - group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - needs: - - start-runner - - build-and-push-image # Wait for the docker image to be built - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner - env: - DOCKER_VOLUME: /cache - steps: - - uses: actions/checkout@v2 - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4.4.1 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - name: Tailscale - uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966 - with: - authkey: ${{ secrets.TAILSCALE_AUTHKEY }} - - name: Prepare disks - run: | - sudo mkfs -t ext4 /dev/nvme1n1 - sudo mkdir ${{ env.DOCKER_VOLUME }} - sudo mount /dev/nvme1n1 ${{ env.DOCKER_VOLUME }} - - name: Install - run: | - make install-integration-tests - - name: Run tests - run: | - export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }} - export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} - pytest -s -vv integration-tests - - stop-runner: - name: Stop self-hosted EC2 runner - needs: - - start-runner - - build-and-push-image - - integration-tests - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Stop EC2 runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: stop - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - label: ${{ needs.start-runner.outputs.label }} - ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} + tags: | + ghcr.io/${{ github.repository }}:latest + cache-from: type=registry,ref=ghcr.io/${{ github.repository }}:latest + cache-to: type=inline \ No newline at end of file diff --git a/.github/workflows/client-tests.yaml b/.github/workflows/client-tests.yaml deleted file mode 100644 index 1fa0b39d7db..00000000000 --- a/.github/workflows/client-tests.yaml +++ /dev/null @@ -1,25 +0,0 @@ -name: Python Client Tests - -on: - pull_request: - paths: - - ".github/workflows/client-tests.yaml" - - "clients/python/**" - -jobs: - run_tests: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - name: Set up Python - uses: actions/setup-python@v1 - with: - python-version: 3.9 - - name: Install - run: | - cd clients/python && pip install . - - name: Run tests - run: | - pip install pytest pytest-asyncio - make python-client-tests diff --git a/.github/workflows/load_test.yaml b/.github/workflows/load_test.yaml deleted file mode 100644 index fd22e395780..00000000000 --- a/.github/workflows/load_test.yaml +++ /dev/null @@ -1,108 +0,0 @@ -name: Nightly load test - -on: - schedule: - - cron: '0 0 * * 1-5' - - pull_request: - paths: - - ".github/workflows/load_test.yaml" - branches: - - 'main' - -jobs: - start-runner: - name: Start self-hosted EC2 runner - runs-on: ubuntu-latest - env: - AWS_REGION: eu-central-1 - EC2_AMI_ID: ami-0ab09c07cfd194259 - EC2_INSTANCE_TYPE: g5.12xlarge - EC2_SUBNET_ID: subnet-988fd9f2,subnet-6f56db13,subnet-6a039326 - EC2_SECURITY_GROUP: sg-072f92ae3082936c6 - outputs: - label: ${{ steps.start-ec2-runner.outputs.label }} - ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Start EC2 runner - id: start-ec2-runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: start - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - ec2-image-id: ${{ env.EC2_AMI_ID }} - ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }} - subnet-id: ${{ env.EC2_SUBNET_ID }} - security-group-id: ${{ env.EC2_SECURITY_GROUP }} - aws-resource-tags: > # optional, requires additional permissions - [ - {"Key": "Name", "Value": "ec2-tgi-github-runner"}, - {"Key": "GitHubRepository", "Value": "${{ github.repository }}"} - ] - - load-tests: - concurrency: - group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - needs: start-runner # required to start the main job when the runner is ready - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner - env: - DOCKER_VOLUME: /cache - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Prepare disks - run: | - sudo mkfs -t ext4 /dev/nvme1n1 - sudo mkdir ${{ env.DOCKER_VOLUME }} - sudo mount /dev/nvme1n1 ${{ env.DOCKER_VOLUME }} - - - name: Install k6 - run: | - curl https://github.com/grafana/k6/releases/download/v0.44.0/k6-v0.44.0-linux-amd64.tar.gz -L | tar xvz --strip-components 1 - - - name: Start starcoder - run: | - docker run --name tgi-starcoder --rm --gpus all -p 3000:80 -v ${{ env.DOCKER_VOLUME }}:/data -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} --pull always -d ghcr.io/huggingface/text-generation-inference:latest --model-id bigcode/starcoder --num-shard 2 --max-batch-total-tokens 32768 - sleep 10 - wget --timeout 10 --retry-on-http-error --waitretry=1 --tries=240 http://localhost:3000/health - - - name: Run k6 - run: | - ./k6 run load_tests/starcoder_load.js - - - name: Stop starcoder - if: ${{ always() }} - run: | - docker stop tgi-starcoder || true - - stop-runner: - name: Stop self-hosted EC2 runner - needs: - - start-runner - - load-tests - runs-on: ubuntu-latest - env: - AWS_REGION: eu-central-1 - if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Stop EC2 runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: stop - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - label: ${{ needs.start-runner.outputs.label }} - ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} From 928b889b78dddb5b6653c0e210311e6e2eb16524 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Sat, 5 Aug 2023 21:31:54 -0400 Subject: [PATCH 02/40] update CI v2 --- .github/workflows/build.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index f1b7012b3bb..cd7ab994897 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -22,12 +22,16 @@ jobs: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} + - name: Downcase repository name + id: downcase + run: | + echo "::set-output name=repository::${{ github.repository,, }}" - name: Build and push Docker image uses: docker/build-push-action@v3.2.0 with: context: . push: true tags: | - ghcr.io/${{ github.repository }}:latest + ghcr.io/${{ github.repositor }}:latest cache-from: type=registry,ref=ghcr.io/${{ github.repository }}:latest cache-to: type=inline \ No newline at end of file From ccceb326d105b73e8cf6e7692b92681fdc8139be Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Sat, 5 Aug 2023 21:34:17 -0400 Subject: [PATCH 03/40] update CI v3 --- .github/workflows/build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index cd7ab994897..1b20aeaefd8 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -25,7 +25,7 @@ jobs: - name: Downcase repository name id: downcase run: | - echo "::set-output name=repository::${{ github.repository,, }}" + echo "::set-output name=repository::$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]')" - name: Build and push Docker image uses: docker/build-push-action@v3.2.0 with: From a9b83ce59af5e85f2a2fe76de2f422ea5d1e18f0 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Mon, 7 Aug 2023 23:28:38 -0400 Subject: [PATCH 04/40] Modify README for fork. Start script for no-sudo installation --- .gitignore | 3 +- README.md | 22 ++++++++-- server/Makefile | 1 - server/Makefile-flash-att | 17 +++----- server/Makefile-flash-att-v2 | 13 ------ server/Makefile-vllm | 13 +++--- server/vllm_testscript.py | 22 ++++++++++ setup_conda_nosudo.sh | 78 ++++++++++++++++++++++++++++++++++++ 8 files changed, 132 insertions(+), 37 deletions(-) delete mode 100644 server/Makefile-flash-att-v2 create mode 100644 server/vllm_testscript.py create mode 100644 setup_conda_nosudo.sh diff --git a/.gitignore b/.gitignore index 20c9baee226..de17588f0a1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .idea target router/tokenizer.json -*__pycache__* +.openssl +*__pycache__* \ No newline at end of file diff --git a/README.md b/README.md index 2bbb6583788..a8f4ee818d1 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ ![image](https://github.com/huggingface/text-generation-inference/assets/3841370/38ba1531-ea0d-4851-b31a-a6d4ddc944b0) -# Text Generation Inference +# LTI's **Text Generation Inference** Fork GitHub Repo stars @@ -15,8 +15,15 @@ -A Rust, Python and gRPC server for text generation inference. Used in production at [HuggingFace](https://huggingface.co) -to power LLMs api-inference widgets. +A Rust, Python and gRPC server for text generation inference. + +Forked from [HuggingFace](https://huggingface.co)'s [Text Generation Inference](https://github.com/huggingface/text-generation-inference/) project (prior to its re-licensing), it's commercial-friendly and licensed under the Apache 2.0. + +## *A note on this fork* + +This fork was created mainly due to two reasons: +1. Primarily, it allows us faster iteration and more flexibility, which is essential for our research uses. It also allows more control over development and documentation, crucial for our in-house uses at CMU. +2. The fork also gives us the opportunity to maintain and update a fully open-source, commercial-friendly framework for fast LLM inference (the original goal of the project, and the intention of many contributors who helped before the re-licensing). ## Table of contents @@ -77,6 +84,15 @@ or ## Get started +### *For LTI/cluster users* + +If you are an LTI student using one of its cluster (or generally belong to an academic cluster that doesn't have docker installed), you can side-steps problems with installing system dependencies by using the [(mini)conda](https://docs.conda.io/en/latest/miniconda.html) package manager. Set the CONDA_HOME environment variable to the path of your conda installation, and run the following commands: + +```shell +CONDA_HOME=/path/to/conda +bash setup_conda_nosudo.sh +``` + ### Docker The easiest way of getting started is using the official Docker container: diff --git a/server/Makefile b/server/Makefile index a4ce6d8b7a2..ea6bd0052c4 100644 --- a/server/Makefile +++ b/server/Makefile @@ -1,5 +1,4 @@ include Makefile-flash-att -include Makefile-flash-att-v2 include Makefile-vllm unit-tests: diff --git a/server/Makefile-flash-att b/server/Makefile-flash-att index bc1d37ef5e2..dce53fdbcd5 100644 --- a/server/Makefile-flash-att +++ b/server/Makefile-flash-att @@ -1,16 +1,11 @@ -flash_att_commit := 3a9bfd076f98746c73362328958dbc68d145fbec +flash_attention_commit := v1.0.9 flash-attention: # Clone flash attention pip install packaging - git clone https://github.com/HazyResearch/flash-attention.git + git clone git@github.com:Dao-AILab/flash-attention.git -build-flash-attention: flash-attention - cd flash-attention && git fetch && git checkout $(flash_att_commit) - cd flash-attention && python setup.py build - cd flash-attention/csrc/rotary && python setup.py build - cd flash-attention/csrc/layer_norm && python setup.py build - -install-flash-attention: build-flash-attention - pip uninstall flash_attn rotary_emb dropout_layer_norm -y || true - cd flash-attention && python setup.py install && cd csrc/layer_norm && python setup.py install && cd ../rotary && python setup.py install \ No newline at end of file +install-flash-attention: flash-attention + pip uninstall flash-attention -y || true + cd flash-attention && git fetch && git checkout $(flash_attention_commit) + cd flash-attention && pip install . \ No newline at end of file diff --git a/server/Makefile-flash-att-v2 b/server/Makefile-flash-att-v2 deleted file mode 100644 index a7d633563d8..00000000000 --- a/server/Makefile-flash-att-v2 +++ /dev/null @@ -1,13 +0,0 @@ -flash_att_v2_commit := 4f285b354796fb17df8636485b9a04df3ebbb7dc - -flash-attention-v2: - # Clone flash attention - pip install packaging - git clone https://github.com/HazyResearch/flash-attention.git flash-attention-v2 - -build-flash-attention-v2: flash-attention-v2 - cd flash-attention-v2 && git fetch && git checkout $(flash_att_v2_commit) - cd flash-attention-v2 && python setup.py build - -install-flash-attention-v2: build-flash-attention-v2 - cd flash-attention-v2 && python setup.py install \ No newline at end of file diff --git a/server/Makefile-vllm b/server/Makefile-vllm index 9100fff4e3c..ce0c7e1ba19 100644 --- a/server/Makefile-vllm +++ b/server/Makefile-vllm @@ -1,13 +1,10 @@ -vllm_commit := 084ca75d4271f8f67be731bc58e0d41d8e0afd3a +vllm_commit := "v0.1.2" vllm: # Clone vllm - git clone https://github.com/OlivierDehaene/vllm.git + git clone https://github.com/vllm-project/vllm.git -build-vllm: vllm - cd vllm && git fetch && git checkout $(vllm_commit) - cd vllm && python setup.py build - -install-vllm: build-vllm +install-vllm: vllm pip uninstall vllm -y || true - cd vllm && python setup.py install \ No newline at end of file + cd vllm && git fetch && git checkout $(vllm_commit) + cd vllm && pip install . \ No newline at end of file diff --git a/server/vllm_testscript.py b/server/vllm_testscript.py new file mode 100644 index 00000000000..667fd1f243f --- /dev/null +++ b/server/vllm_testscript.py @@ -0,0 +1,22 @@ +# Tests if VLLM works correctly +import vllm +import time + +prompts = [ + 'Hello, my name is', + 'CMU\'s PhD students are', +] +sampling_params = vllm.SamplingParams(temperature=0.8, top_p=0.95) + +llm = vllm.LLM(model="openlm-research/open_llama_13b") + +# time the generation +start = time.time() +outputs = llm.generate(prompts, sampling_params) +end = time.time() +for output in outputs: + prompt = output.prompt + generated = output.outputs[0].text + print(f'Prompt: {prompt!r}, Generated: {generated!r}') +print() +print(f'Time taken: {end - start:.2f}s') \ No newline at end of file diff --git a/setup_conda_nosudo.sh b/setup_conda_nosudo.sh new file mode 100644 index 00000000000..8d8dd3f7bcf --- /dev/null +++ b/setup_conda_nosudo.sh @@ -0,0 +1,78 @@ +#!/bin/zsh +# Script for setting up a conda environment with for launching servers +# It sidesteps system-wide installations by relying on conda for most packages +# and by building openssl from source +# TODO: only got it to work with a static build of OpenSSL, which is not ideal +ENV_NAME=tgi-venv-v3 +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +TEST_EXT=true +N_THREADS=8 + +set -euo pipefail + +# check if CONDA_HOME is set and create environment +if [ -z "$CONDA_HOME" ] +then + echo "Please set CONDA_HOME to the location of your conda installation" + exit 1 +fi +source ${CONDA_HOME}/etc/profile.d/conda.sh +conda create -y -n ${ENV_NAME} python=3.9 +conda activate ${ENV_NAME} + +# # Install dependencies and gxx +conda install -y "gxx<12.0" -c conda-forge +conda install -y -c conda-forge "rust>=1.65.0" +conda install -y -c "nvidia/label/cuda-11.8.0" cuda-toolkit + +# bring in the conda environment variables forward +# (needed for proper linking) +export LD_LIBRARY_PATH=${CONDA_HOME}/envs/${ENV_NAME}/lib:$LD_LIBRARY_PATH +export PATH=${CONDA_HOME}/envs/${ENV_NAME}/bin:$PATH +export CUDA_HOME=${CONDA_HOME}/envs/${ENV_NAME} + +# download and build openssl +mkdir -p /tmp/openssl +cd /tmp/openssl +wget https://www.openssl.org/source/openssl-1.1.1l.tar.gz -O openssl.tar.gz +tar -xzf openssl.tar.gz +cd openssl-1.1.1l +./config --prefix=${DIR}/.openssl --openssldir=${DIR}/.openssl +make -j $N_THREADS +make install +cd $DIR +rm -rf /tmp/openssl + +export LD_LIBRARY_PATH=${DIR}/.openssl/lib:$LD_LIBRARY_PATH +export PATH=${DIR}/.openssl/bin:$PATH + +# install base package +OPENSSL_DIR=${DIR}/.openssl \ +OPENSSL_LIB_DIR=${DIR}/.openssl/lib \ +OPENSSL_INCLUDE_DIR=${DIR}/.openssl/include \ +BUILD_EXTENSIONS=True \ + make -j $N_THREADS install + +# # install ninja for faster compilation of CUDA kernels and setup workdir +pip install ninja +cd ${DIR}/server +mkdir -p workdir + +cp Makefile-vllm workdir/Makefile +cd workdir && sleep 1 +make -j $N_THREADS install-vllm +cd ${DIR}/server +if [ "$TEST_EXT" = true ] ; then + # run vllm_testscript.py and check if it works + python3 vllm_testscript.py +fi + +# install flash attention +cp Makefile-flash-att workdir/Makefile +cd workdir && sleep 1 +make -j $N_THREADS install-flash-attention +cd ${DIR}/server +rm -rf workdir + +cd ${DIR} +make run-falcon-7b-instruct \ No newline at end of file From 8b7901ad4c179a548efb531140f10de878ecedcb Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Mon, 7 Aug 2023 23:48:59 -0400 Subject: [PATCH 05/40] update to mamba --- setup_conda_nosudo.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/setup_conda_nosudo.sh b/setup_conda_nosudo.sh index 8d8dd3f7bcf..8eb13fb8117 100644 --- a/setup_conda_nosudo.sh +++ b/setup_conda_nosudo.sh @@ -8,7 +8,7 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" TEST_EXT=true N_THREADS=8 -set -euo pipefail +set -eo pipefail # check if CONDA_HOME is set and create environment if [ -z "$CONDA_HOME" ] @@ -19,11 +19,13 @@ fi source ${CONDA_HOME}/etc/profile.d/conda.sh conda create -y -n ${ENV_NAME} python=3.9 conda activate ${ENV_NAME} +# python can't handle this dependency madness, switch to C++ +conda install -y -c conda-forge mamba # # Install dependencies and gxx -conda install -y "gxx<12.0" -c conda-forge -conda install -y -c conda-forge "rust>=1.65.0" -conda install -y -c "nvidia/label/cuda-11.8.0" cuda-toolkit +mamba install -y "gxx<12.0" -c conda-forge +mamba install -y -c conda-forge "rust>=1.65.0" +mamba install -y -c "nvidia/label/cuda-11.8.0" cuda-toolkit # bring in the conda environment variables forward # (needed for proper linking) From 18c8a8a6614f36c8f81888967a6338a415f004b3 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Tue, 8 Aug 2023 13:02:37 -0400 Subject: [PATCH 06/40] update to support new flash attention code update build script --- Makefile | 3 + README.md | 2 +- server/Makefile-flash-att | 8 +- .../custom_modeling/flash_llama_modeling.py | 8 +- .../custom_modeling/flash_neox_modeling.py | 8 +- .../custom_modeling/flash_rw_modeling.py | 12 +-- .../flash_santacoder_modeling.py | 8 +- .../utils/flash_attn.py | 90 +++---------------- server/vllm_testscript.py | 2 +- setup_conda_nosudo.sh | 56 +++++++++--- 10 files changed, 87 insertions(+), 110 deletions(-) diff --git a/Makefile b/Makefile index 7f534c7ccd7..25515a828c1 100644 --- a/Makefile +++ b/Makefile @@ -42,6 +42,9 @@ python-client-tests: python-tests: python-server-tests python-client-tests +run-open-llama-3b-v2: + text-generation-launcher --model-id openlm-research/open_llama_3b_v2 --port 8080 + run-falcon-7b-instruct: text-generation-launcher --model-id tiiuae/falcon-7b-instruct --port 8080 diff --git a/README.md b/README.md index a8f4ee818d1..281547ea154 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ Forked from [HuggingFace](https://huggingface.co)'s [Text Generation Inference]( This fork was created mainly due to two reasons: 1. Primarily, it allows us faster iteration and more flexibility, which is essential for our research uses. It also allows more control over development and documentation, crucial for our in-house uses at CMU. -2. The fork also gives us the opportunity to maintain and update a fully open-source, commercial-friendly framework for fast LLM inference (the original goal of the project, and the intention of many contributors who helped before the re-licensing). +2. While we understand the reasons behind the re-licensing, we don't want our (research) contributions to be locked behind a restrictive license. ## Table of contents diff --git a/server/Makefile-flash-att b/server/Makefile-flash-att index dce53fdbcd5..aa48b9f9889 100644 --- a/server/Makefile-flash-att +++ b/server/Makefile-flash-att @@ -6,6 +6,10 @@ flash-attention: git clone git@github.com:Dao-AILab/flash-attention.git install-flash-attention: flash-attention - pip uninstall flash-attention -y || true + #pip uninstall flash-attention -y || true cd flash-attention && git fetch && git checkout $(flash_attention_commit) - cd flash-attention && pip install . \ No newline at end of file + cd flash-attention && pip install . && cd csrc/layer_norm && pip install . && cd ../rotary + +test-flash-attention: flash-attention + pip install pytest + cd flash-attention && pytest -q -s tests/test_flash_attn.py \ No newline at end of file diff --git a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py index b6285856fc3..56af34ea46d 100644 --- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py @@ -30,8 +30,8 @@ import dropout_layer_norm # vllm imports -import vllm_cache_ops -import vllm_attention_ops +import vllm.cache_ops +import vllm.attention_ops from text_generation_server.utils.flash_attn import attention from text_generation_server.utils.layers import ( @@ -247,7 +247,7 @@ def forward( self.rotary_emb(query, cos, sin) self.rotary_emb(torch.select(kv, dim=1, index=0), cos, sin) - vllm_cache_ops.reshape_and_cache( + vllm.cache_ops.reshape_and_cache( kv[:, 0], kv[:, 1], kv_cache[0], kv_cache[1], slots ) @@ -270,7 +270,7 @@ def forward( else: # kv_cache[1] => [num_blocks, num_heads, head_size, block_size] block_size = kv_cache[1].shape[3] - vllm_attention_ops.single_query_cached_kv_attention( + vllm.attention_ops.single_query_cached_kv_attention( attn_output, query, kv_cache[0], diff --git a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py index e7c8ced4ca7..6dac3e26b6c 100644 --- a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py @@ -28,8 +28,8 @@ from typing import Optional, List, Tuple # vllm imports -import vllm_cache_ops -import vllm_attention_ops +import vllm.cache_ops +import vllm.attention_ops from text_generation_server.utils.flash_attn import attention from text_generation_server.utils.layers import ( @@ -141,7 +141,7 @@ def forward( self.rotary_emb(qkv[:, 0], cos, sin) self.rotary_emb(qkv[:, 1], cos, sin) - vllm_cache_ops.reshape_and_cache( + vllm.cache_ops.reshape_and_cache( qkv[:, 1], qkv[:, 2], kv_cache[0], kv_cache[1], slots ) @@ -164,7 +164,7 @@ def forward( else: # kv_cache[1] => [num_blocks, num_heads, head_size, block_size] block_size = kv_cache[1].shape[3] - vllm_attention_ops.single_query_cached_kv_attention( + vllm.attention_ops.single_query_cached_kv_attention( attn_output, qkv[:, 0], kv_cache[0], diff --git a/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py b/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py index 3570b283e59..0c371665f3b 100644 --- a/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py @@ -7,8 +7,8 @@ from typing import Optional, List, Tuple # vllm imports -import vllm_cache_ops -import vllm_attention_ops +import vllm.cache_ops +import vllm.attention_ops from text_generation_server.utils.flash_attn import attention from text_generation_server.utils.layers import ( @@ -191,7 +191,7 @@ def forward( self.rotary_emb(query, cos, sin) self.rotary_emb(torch.select(kv, dim=1, index=0), cos, sin) - vllm_cache_ops.reshape_and_cache( + vllm.cache_ops.reshape_and_cache( kv[:, 0], kv[:, 1], kv_cache[0], kv_cache[1], slots ) @@ -214,7 +214,7 @@ def forward( else: # kv_cache[1] => [num_blocks, num_heads_kv, head_size, block_size] block_size = kv_cache[1].shape[3] - vllm_attention_ops.single_query_cached_kv_attention( + vllm.attention_ops.single_query_cached_kv_attention( attn_output, query, kv_cache[0], @@ -307,7 +307,7 @@ def forward( self.rotary_emb(query, cos, sin) self.rotary_emb(torch.select(kv, dim=2, index=0), cos, sin) - vllm_cache_ops.reshape_and_cache( + vllm.cache_ops.reshape_and_cache( kv[:, :, 0].contiguous(), kv[:, :, 1].contiguous(), kv_cache[0], @@ -334,7 +334,7 @@ def forward( else: # kv_cache[1] => [num_blocks, num_groups, head_size, block_size] block_size = kv_cache[1].shape[3] - vllm_attention_ops.single_query_cached_kv_attention( + vllm.attention_ops.single_query_cached_kv_attention( attn_output, query, kv_cache[0], diff --git a/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py b/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py index 2dd0a5ee4e0..f7f93c4e1a0 100644 --- a/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py @@ -6,8 +6,8 @@ from typing import Optional, List, Tuple # vllm imports -import vllm_cache_ops -import vllm_attention_ops +import vllm.cache_ops +import vllm.attention_ops from text_generation_server.utils.flash_attn import attention from text_generation_server.utils.layers import ( @@ -258,7 +258,7 @@ def forward( query = query.view(-1, self.num_heads, self.head_size) key_value = key_value.view(-1, 2, 1, self.head_size) - vllm_cache_ops.reshape_and_cache( + vllm.cache_ops.reshape_and_cache( key_value[:, 0], key_value[:, 1], kv_cache[0], kv_cache[1], slots ) @@ -281,7 +281,7 @@ def forward( else: # kv_cache[1] => [num_blocks, 1, head_size, block_size] block_size = kv_cache[1].shape[3] - vllm_attention_ops.single_query_cached_kv_attention( + vllm.attention_ops.single_query_cached_kv_attention( attn_output, query, kv_cache[0], diff --git a/server/text_generation_server/utils/flash_attn.py b/server/text_generation_server/utils/flash_attn.py index c472d1fceab..d61743c1991 100644 --- a/server/text_generation_server/utils/flash_attn.py +++ b/server/text_generation_server/utils/flash_attn.py @@ -15,39 +15,20 @@ is_sm90 = major == 9 and minor == 0 HAS_FLASH_ATTN = False -HAS_FLASH_ATTN_V2 = False try: - try: - import flash_attn_2_cuda - except ImportError: - raise ImportError( - "Flash Attention V2 is not installed.\n" - "Use the official Docker image (ghcr.io/huggingface/text-generation-inference:latest) " - "or install flash attention v2 with `cd server && make install install-flash-attention-v2`" - ) - if not (is_sm8x or is_sm90): - raise ImportError( - f"GPU with CUDA capability {major} {minor} is not supported for " - "Flash Attention V2" - ) - HAS_FLASH_ATTN_V2 = True + import flash_attn_2_cuda as flash_attn_cuda except ImportError as e: - try: - import flash_attn_cuda - except ImportError: - raise ImportError( - "Flash Attention is not installed.\n" - "Use the official Docker image (ghcr.io/huggingface/text-generation-inference:latest) " - "or install flash attention with `cd server && make install install-flash-attention`" - ) from e - - if not (is_sm75 or is_sm8x or is_sm90): - raise ImportError( - f"GPU with CUDA capability {major} {minor} is not supported" - ) from e - logger.warning(f"Unable to use Flash Attention V2: {e}") - HAS_FLASH_ATTN = True - + raise ImportError( + f"Flash Attention V2 is not installed.\n" + f"Error message: {e}\n" + "Use the official Docker image (ghcr.io/huggingface/text-generation-inference:latest) " + "or install flash attention v2 with `cd server && make install install-flash-attention-v2`" + ) +if not (is_sm8x or is_sm90): + raise ImportError( + f"GPU with CUDA capability {major} {minor} is not supported for " + "Flash Attention V2" + ) def attention( q, @@ -58,52 +39,8 @@ def attention( max_s, softmax_scale, ): - if HAS_FLASH_ATTN_V2: - return flash_attn_2_cuda.varlen_fwd( - q, - k, - v, - out, - cu_seqlens, - cu_seqlens, - max_s, - max_s, - 0.0, - softmax_scale, - False, - True, - False, - None, - ) - if HAS_FLASH_ATTN: - # Flash attention v1 requires q, k and v to have the same number of heads - if k.shape[1] != q.shape[1]: - # MQA expand - if k.shape[1] == 1: - k = k.expand(-1, q.shape[1], -1) - # Grouped attention reshape - else: - original_shape = k.shape - k = ( - k.unsqueeze(2) - .expand(-1, -1, q.shape[1] // k.shape[1], -1) - .reshape(original_shape[0], -1, original_shape[2]) - ) - if v.shape[1] != q.shape[1]: - # MQA expand - if v.shape[1] == 1: - v = v.expand(-1, q.shape[1], -1) - # Grouped attention reshape - else: - original_shape = v.shape - v = ( - v.unsqueeze(2) - .expand(-1, -1, q.shape[1] // v.shape[1], -1) - .reshape(original_shape[0], -1, original_shape[2]) - ) - - return flash_attn_cuda.fwd( + return flash_attn_cuda.varlen_fwd( q, k, v, @@ -117,7 +54,6 @@ def attention( False, True, False, - 0, None, ) diff --git a/server/vllm_testscript.py b/server/vllm_testscript.py index 667fd1f243f..71f61a7de93 100644 --- a/server/vllm_testscript.py +++ b/server/vllm_testscript.py @@ -8,7 +8,7 @@ ] sampling_params = vllm.SamplingParams(temperature=0.8, top_p=0.95) -llm = vllm.LLM(model="openlm-research/open_llama_13b") +llm = vllm.LLM(model="openlm-research/open_llama_3b_v2") # time the generation start = time.time() diff --git a/setup_conda_nosudo.sh b/setup_conda_nosudo.sh index 8eb13fb8117..796e41a3119 100644 --- a/setup_conda_nosudo.sh +++ b/setup_conda_nosudo.sh @@ -3,10 +3,13 @@ # It sidesteps system-wide installations by relying on conda for most packages # and by building openssl from source # TODO: only got it to work with a static build of OpenSSL, which is not ideal -ENV_NAME=tgi-venv-v3 +ENV_NAME=tgi-env DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -TEST_EXT=true N_THREADS=8 +# currently can only build in TIR with this set to false +# seems un-important, as it only affects BLOOM/NEOX +BUILD_EXTENSIONS=false +TEST_EXTRA=true set -eo pipefail @@ -22,17 +25,38 @@ conda activate ${ENV_NAME} # python can't handle this dependency madness, switch to C++ conda install -y -c conda-forge mamba +# remove possible extra cuda and gccs from path +# (not sure if needed, but added during debugging and kept for now) +export PATH=$(echo $PATH | tr ":" "\n" | grep -v cuda | grep -v gcc | tr "\n" ":" | sed 's/:$//g') +export LD_LIBRARY_PATH=$(echo $LD_LIBRARY_PATH | tr ":" "\n" | grep -v cuda | grep -v gcc | tr "\n" ":" | sed 's/:$//g') + # # Install dependencies and gxx mamba install -y "gxx<12.0" -c conda-forge +mamba install -y -c conda-forge curl mamba install -y -c conda-forge "rust>=1.65.0" mamba install -y -c "nvidia/label/cuda-11.8.0" cuda-toolkit # bring in the conda environment variables forward -# (needed for proper linking) +# (not sure if needed, but added during debugging and kept for now) export LD_LIBRARY_PATH=${CONDA_HOME}/envs/${ENV_NAME}/lib:$LD_LIBRARY_PATH export PATH=${CONDA_HOME}/envs/${ENV_NAME}/bin:$PATH export CUDA_HOME=${CONDA_HOME}/envs/${ENV_NAME} +# add protoc +export PROTOC_ZIP=protoc-21.12-linux-x86_64.zip +mkdir -p /tmp/protoc +mkdir -p ~/local/bin +mkdir -p ~/local/include +cd /tmp/protoc +curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP +unzip -o $PROTOC_ZIP -d ~/local/ bin/protoc +unzip -o $PROTOC_ZIP -d ~/local/ 'include/*' +cd $DIR +rm -rf /tmp/protoc + +export PATH=~/local/bin:$PATH +export LD_LIBRARY_PATH=~/local/lib:$LD_LIBRARY_PATH + # download and build openssl mkdir -p /tmp/openssl cd /tmp/openssl @@ -44,37 +68,47 @@ make -j $N_THREADS make install cd $DIR rm -rf /tmp/openssl - export LD_LIBRARY_PATH=${DIR}/.openssl/lib:$LD_LIBRARY_PATH export PATH=${DIR}/.openssl/bin:$PATH # install base package +cd ${DIR} OPENSSL_DIR=${DIR}/.openssl \ OPENSSL_LIB_DIR=${DIR}/.openssl/lib \ OPENSSL_INCLUDE_DIR=${DIR}/.openssl/include \ -BUILD_EXTENSIONS=True \ - make -j $N_THREADS install +BUILD_EXTENSIONS=$BUILD_EXTENSIONS \ + make install-server -# # install ninja for faster compilation of CUDA kernels and setup workdir +# install ninja for faster compilation of CUDA kernels and setup workdir pip install ninja cd ${DIR}/server mkdir -p workdir +# install vllm cp Makefile-vllm workdir/Makefile cd workdir && sleep 1 make -j $N_THREADS install-vllm cd ${DIR}/server -if [ "$TEST_EXT" = true ] ; then +if [ "$TEST_EXTRA" = true ] ; then # run vllm_testscript.py and check if it works python3 vllm_testscript.py fi +rm -rf workdir/* # install flash attention +cd ${DIR}/server cp Makefile-flash-att workdir/Makefile cd workdir && sleep 1 -make -j $N_THREADS install-flash-attention +make -j $N_THREADS test-flash-attention cd ${DIR}/server rm -rf workdir -cd ${DIR} -make run-falcon-7b-instruct \ No newline at end of file +# # override protobuf +pip install 'protobuf<3.21' + +# # install python client +cd ${DIR}/clients/python +pip install . + +cd $DIR +make run-open-llama-3b-v2 \ No newline at end of file From 25559ace0f218d2644b406066dbb5d7eeffc36fd Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Wed, 9 Aug 2023 02:01:47 +0000 Subject: [PATCH 07/40] Further update code to fix dependency updates. Fix code with RotaryEmbeddings. Finish(?) no-sudo setup script --- Makefile | 7 +- benchmark/dump_fast_tokenizer.py | 19 +++++ notebooks/test_client.ipynb | 76 +++++++++++++++++ server/Makefile-flash-att | 11 ++- server/Makefile-vllm | 8 +- .../custom_modeling/flash_llama_modeling.py | 13 ++- .../utils/flash_attn.py | 2 + server/text_generation_server/utils/layers.py | 83 ++++++++++++++++--- server/vllm_testscript.py | 2 +- setup_conda_nosudo.sh | 48 ++++++++--- 10 files changed, 235 insertions(+), 34 deletions(-) create mode 100644 benchmark/dump_fast_tokenizer.py create mode 100644 notebooks/test_client.ipynb diff --git a/Makefile b/Makefile index 25515a828c1..41bed28e1d5 100644 --- a/Makefile +++ b/Makefile @@ -42,8 +42,11 @@ python-client-tests: python-tests: python-server-tests python-client-tests -run-open-llama-3b-v2: - text-generation-launcher --model-id openlm-research/open_llama_3b_v2 --port 8080 +run-llama2-benchmark: + text-generation-launcher --model-id lmsys/vicuna-7b-v1.5 + +run-llama2-vicuna-7b: + text-generation-launcher --model-id lmsys/vicuna-7b-v1.5 --port 8080 run-falcon-7b-instruct: text-generation-launcher --model-id tiiuae/falcon-7b-instruct --port 8080 diff --git a/benchmark/dump_fast_tokenizer.py b/benchmark/dump_fast_tokenizer.py new file mode 100644 index 00000000000..c2799b1faa6 --- /dev/null +++ b/benchmark/dump_fast_tokenizer.py @@ -0,0 +1,19 @@ +import os +import json +import argparse +from transformers import AutoTokenizer + +def dump_fast_tokenizer(tokenizer_name, output_path): + tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) + tokenizer.save_pretrained(output_path) + +def main(): + parser = argparse.ArgumentParser(description="Dump fast tokenizer json file") + parser.add_argument("--tokenizer-name", required=True, help="Name of the Hugging Face tokenizer") + parser.add_argument("--output", required=True, help="Output path for the fast tokenizer json file") + args = parser.parse_args() + + dump_fast_tokenizer(args.tokenizer_name, args.output) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/notebooks/test_client.ipynb b/notebooks/test_client.ipynb new file mode 100644 index 00000000000..8b5b95d6466 --- /dev/null +++ b/notebooks/test_client.ipynb @@ -0,0 +1,76 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import text_generation as tg_client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "IP=\"0.0.0.0\"\n", + "PORT=8080" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "client = tg_client.Client(f\"http://{IP}:{PORT}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(client.generate(\"CMU's PhD students are\", max_new_tokens=20).generated_text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "text = \"\"\n", + "for response in client.generate_stream(\"CMU's PhD students are\", max_new_tokens=20):\n", + " if not response.token.special:\n", + " text += response.token.text\n", + "print(text)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tgi-env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/server/Makefile-flash-att b/server/Makefile-flash-att index aa48b9f9889..268a635a350 100644 --- a/server/Makefile-flash-att +++ b/server/Makefile-flash-att @@ -1,14 +1,17 @@ -flash_attention_commit := v1.0.9 +flash_attention_commit := v2.0.4 flash-attention: # Clone flash attention pip install packaging git clone git@github.com:Dao-AILab/flash-attention.git + cd flash-attention && git fetch && git checkout $(flash_attention_commit) install-flash-attention: flash-attention - #pip uninstall flash-attention -y || true - cd flash-attention && git fetch && git checkout $(flash_attention_commit) - cd flash-attention && pip install . && cd csrc/layer_norm && pip install . && cd ../rotary + pip uninstall flash-attention -y || true + cd flash-attention && pip install . + cd flash-attention/csrc/layer_norm && pip install . + cd flash-attention/csrc/rotary && pip install . + test-flash-attention: flash-attention pip install pytest diff --git a/server/Makefile-vllm b/server/Makefile-vllm index ce0c7e1ba19..25f0f2ce8f1 100644 --- a/server/Makefile-vllm +++ b/server/Makefile-vllm @@ -3,8 +3,12 @@ vllm_commit := "v0.1.2" vllm: # Clone vllm git clone https://github.com/vllm-project/vllm.git + cd vllm && git fetch && git checkout $(vllm_commit) install-vllm: vllm pip uninstall vllm -y || true - cd vllm && git fetch && git checkout $(vllm_commit) - cd vllm && pip install . \ No newline at end of file + cd vllm && pip install . + +test-vllm: vllm + pip install pytest + cd vllm && pytest -q -s tests/kernels/test_attention.py \ No newline at end of file diff --git a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py index 56af34ea46d..e201c77faf4 100644 --- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py @@ -185,10 +185,12 @@ def __init__( self.hidden_size = config.hidden_size self.head_size = self.hidden_size // self.num_heads - self.rotary_emb = PositionRotaryEmbedding.load( - prefix=f"{prefix}.rotary_emb", weights=weights + self.rotary_emb = PositionRotaryEmbedding.static( + config=config, + dim=self.head_size, + base=10000.0, + device=weights.device ) - self.softmax_scale = self.head_size**-0.5 if self.num_heads % weights.process_group.size() != 0: @@ -275,12 +277,15 @@ def forward( query, kv_cache[0], kv_cache[1], - self.kv_head_mapping, + # commented for now due to move to another vllm version + # not sure if it breaks anything + # self.kv_head_mapping, self.softmax_scale, block_tables, input_lengths, block_size, max_s, + None, ) return self.o_proj(attn_output.view(-1, self.num_heads * self.head_size)) diff --git a/server/text_generation_server/utils/flash_attn.py b/server/text_generation_server/utils/flash_attn.py index d61743c1991..6c8e1ca2cae 100644 --- a/server/text_generation_server/utils/flash_attn.py +++ b/server/text_generation_server/utils/flash_attn.py @@ -17,6 +17,8 @@ HAS_FLASH_ATTN = False try: import flash_attn_2_cuda as flash_attn_cuda + import flash_attn + HAS_FLASH_ATTN = True except ImportError as e: raise ImportError( f"Flash Attention V2 is not installed.\n" diff --git a/server/text_generation_server/utils/layers.py b/server/text_generation_server/utils/layers.py index 7a45808ec92..bae70f755ee 100644 --- a/server/text_generation_server/utils/layers.py +++ b/server/text_generation_server/utils/layers.py @@ -375,35 +375,68 @@ def forward(self, hidden_states, residual=None): try: from flash_attn.layers.rotary import RotaryEmbedding import rotary_emb + + def _create_inv_freq(dim, base, device): + inv_freq = 1.0 / ( + base + ** (torch.arange(0, dim, 2, device=device, dtype=torch.float32) / dim) + ) + return inv_freq + + def _get_rope_config(config): + if os.getenv("ROPE_SCALING", None) is not None: + rope_scaling = {"type": os.environ["ROPE_SCALING"], "factor": float(os.environ["ROPE_FACTOR"])} + return rope_scaling + return getattr(config, "rope_scaling", None) + class PositionRotaryEmbedding(nn.Module): - def __init__(self, inv_freq): + def __init__(self, inv_freq, scaling_factor): super().__init__() - self.inv_freq = inv_freq self._seq_len_cached = 0 self._cos_cached = None self._sin_cached = None self._cos_k_cached = None self._sin_k_cached = None + self.scaling_factor = scaling_factor + self.dynamic_args = None @classmethod - def static(cls, dim, base, device): - inv_freq = 1.0 / ( - base - ** (torch.arange(0, dim, 2, device=device, dtype=torch.float32) / dim) - ) - return cls(inv_freq) + def static(cls, config, dim, base, device): + inv_freq = _create_inv_freq(dim, base, device) + scaling_factor = None + rope_scaling = _get_rope_config(config) + if rope_scaling is not None: + scaling_factor = rope_scaling["factor"] + if rope_scaling["type"] == "linear": + pass + elif rope_scaling["type"] == "dynamic": + return DynamicPositionRotaryEmbedding(dim=dim, max_position_embeddings=config.max_position_embeddings, base=base, device=inv_freq.device, scaling_factor=scaling_factor) + else: + raise NotImplementedError(f"rope scaling type {rope_scaling['type']} is not implemented or invalid") + return cls(inv_freq, scaling_factor) @classmethod - def load(cls, prefix, weights): + def load(cls, config, prefix, weights): # XXX: Always load this in float32 ! dtype = weights.dtype weights.dtype = torch.float32 inv_freq = weights.get_tensor(f"{prefix}.inv_freq") weights.dtype = dtype - return cls(inv_freq) + scaling_factor = None + rope_scaling = _get_rope_config(config) + if rope_scaling is not None: + scaling_factor = rope_scaling["factor"] + if rope_scaling["type"] == "linear": + pass + elif rope_scaling["type"] == "dynamic": + return DynamicPositionRotaryEmbedding(dim=2*inv_freq.shape[0], max_position_embeddings=config.max_position_embeddings, base=10000.0, device=inv_freq.device, scaling_factor=scaling_factor) + else: + raise NotImplementedError(f"rope scaling type {rope_scaling['type']} is not implemented or invalid") + return cls(inv_freq, scaling_factor) + def _update_cos_sin_cache(self, dtype, device, seqlen): # Reset the tables if the sequence length has changed, # or if we're on a new device (possibly due to tracing for instance) @@ -441,5 +474,35 @@ def forward(self, x: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor): rotary_emb.apply_rotary(x1, x2, cos, sin, x1, x2, False) return x + class DynamicPositionRotaryEmbedding(PositionRotaryEmbedding): + def __init__(self, dim, max_position_embeddings, base, device, scaling_factor): + inv_freq = _create_inv_freq(dim, base, device) + super().__init__(inv_freq, scaling_factor) + self.dim = dim + self.max_position_embeddings = max_position_embeddings + self.base = base + + def _update_cos_sin_cache(self, dtype, device, seqlen): + # Reset the tables if the sequence length has changed, + # or if we're on a new device (possibly due to tracing for instance) + if ( + seqlen > self._seq_len_cached + or self._cos_cached.device != device + or self._cos_cached.dtype != dtype + ): + if seqlen > self.max_position_embeddings: + newbase = self.base * ((self.scaling_factor * seqlen / self.max_position_embeddings) - (self.scaling_factor - 1)) ** (self.dim / (self.dim - 2)) + self.inv_freq = _create_inv_freq(self.dim, newbase, self.inv_freq.device) + self._seq_len_cached = seqlen + t = torch.arange(seqlen, device=device, dtype=self.inv_freq.dtype) + if self.scaling_factor is not None: + t /= self.scaling_factor + # Don't do einsum, it converts fp32 to fp16 + # freqs = torch.einsum("i,j->ij", t, self.inv_freq) + + freqs = torch.outer(t, self.inv_freq.to(device=t.device)) + self._cos_cached = torch.cos(freqs).to(dtype) + self._sin_cached = torch.sin(freqs).to(dtype) + except ImportError: pass diff --git a/server/vllm_testscript.py b/server/vllm_testscript.py index 71f61a7de93..eedbb75aca9 100644 --- a/server/vllm_testscript.py +++ b/server/vllm_testscript.py @@ -8,7 +8,7 @@ ] sampling_params = vllm.SamplingParams(temperature=0.8, top_p=0.95) -llm = vllm.LLM(model="openlm-research/open_llama_3b_v2") +llm = vllm.LLM(model="lmsys/vicuna-7b-v1.5") # time the generation start = time.time() diff --git a/setup_conda_nosudo.sh b/setup_conda_nosudo.sh index 796e41a3119..0c736cb2ff4 100644 --- a/setup_conda_nosudo.sh +++ b/setup_conda_nosudo.sh @@ -6,10 +6,12 @@ ENV_NAME=tgi-env DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" N_THREADS=8 -# currently can only build in TIR with this set to false +# currently can only build in TIR without extensions # seems un-important, as it only affects BLOOM/NEOX BUILD_EXTENSIONS=false TEST_EXTRA=true +BENCHMARK=true +SERVER_WAIT=180 set -eo pipefail @@ -30,9 +32,9 @@ conda install -y -c conda-forge mamba export PATH=$(echo $PATH | tr ":" "\n" | grep -v cuda | grep -v gcc | tr "\n" ":" | sed 's/:$//g') export LD_LIBRARY_PATH=$(echo $LD_LIBRARY_PATH | tr ":" "\n" | grep -v cuda | grep -v gcc | tr "\n" ":" | sed 's/:$//g') -# # Install dependencies and gxx +# # Install dependencies mamba install -y "gxx<12.0" -c conda-forge -mamba install -y -c conda-forge curl +mamba install -y -c conda-forge curl git mamba install -y -c conda-forge "rust>=1.65.0" mamba install -y -c "nvidia/label/cuda-11.8.0" cuda-toolkit @@ -53,9 +55,8 @@ unzip -o $PROTOC_ZIP -d ~/local/ bin/protoc unzip -o $PROTOC_ZIP -d ~/local/ 'include/*' cd $DIR rm -rf /tmp/protoc - -export PATH=~/local/bin:$PATH export LD_LIBRARY_PATH=~/local/lib:$LD_LIBRARY_PATH +export PATH=~/local/bin:$PATH # download and build openssl mkdir -p /tmp/openssl @@ -77,7 +78,7 @@ OPENSSL_DIR=${DIR}/.openssl \ OPENSSL_LIB_DIR=${DIR}/.openssl/lib \ OPENSSL_INCLUDE_DIR=${DIR}/.openssl/include \ BUILD_EXTENSIONS=$BUILD_EXTENSIONS \ - make install-server + make install # install ninja for faster compilation of CUDA kernels and setup workdir pip install ninja @@ -85,12 +86,14 @@ cd ${DIR}/server mkdir -p workdir # install vllm +rm -rf workdir/* cp Makefile-vllm workdir/Makefile cd workdir && sleep 1 make -j $N_THREADS install-vllm +make test-vllm cd ${DIR}/server if [ "$TEST_EXTRA" = true ] ; then - # run vllm_testscript.py and check if it works + make test-vllm python3 vllm_testscript.py fi rm -rf workdir/* @@ -99,16 +102,39 @@ rm -rf workdir/* cd ${DIR}/server cp Makefile-flash-att workdir/Makefile cd workdir && sleep 1 -make -j $N_THREADS test-flash-attention +make -j $N_THREADS install-flash-attention +if [ "$TEST_EXTRA" = true ] ; then + make test-flash-attention +fi cd ${DIR}/server rm -rf workdir -# # override protobuf +# override protobuf pip install 'protobuf<3.21' # # install python client cd ${DIR}/clients/python pip install . -cd $DIR -make run-open-llama-3b-v2 \ No newline at end of file +# run a example server +if [ "$BENCHMARK" = true ] ; then + cd ${DIR} + # trap signal to avoid orphan server process + trap "trap - SIGTERM && kill -- -$$" SIGINT SIGTERM EXIT + # launch server as background process, checking for errors + make run-llama2-benchmark & + # sleep to make sure server has time to boot + sleep $SERVER_WAIT + + OPENSSL_DIR=${DIR}/.openssl \ + OPENSSL_LIB_DIR=${DIR}/.openssl/lib \ + OPENSSL_INCLUDE_DIR=${DIR}/.openssl/include \ + make install-benchmark + python benchmark/dump_fast_tokenizer.py --tokenizer-name=lmsys/vicuna-7b-v1.5 --output=/tmp/vicuna-7b-v1.5/ + text-generation-benchmark --tokenizer-name=/tmp/vicuna-7b-v1.5 +fi + +# set default conda environment variables +conda env config vars set LD_LIBRARY_PATH=${LD_LIBRARY_PATH} +conda env config vars set PATH=${PATH} +conda env config vars set CUDA_HOME=${CUDA_HOME} \ No newline at end of file From 5c1b2abd97d6729b5a375c785f8aaa3ff555a0e2 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Mon, 14 Aug 2023 04:11:04 +0000 Subject: [PATCH 08/40] Finish basic prototype of the central. Add integration with Chat-UI --- .gitmodules | 3 + Cargo.lock | 243 +++++++++++++++++- Cargo.toml | 9 +- Makefile | 10 + central/Cargo.toml | 24 ++ central/README.md | 24 ++ central/src/main.rs | 212 +++++++++++++++ chat-ui | 1 + clients/python/text_generation/client.py | 31 +++ launcher/Cargo.toml | 4 +- launcher/src/main.rs | 67 ++++- notebooks/test_client.ipynb | 84 +++++- setup_scripts/conda_client.sh | 34 +++ .../conda_server.sh | 15 +- 14 files changed, 735 insertions(+), 26 deletions(-) create mode 100644 .gitmodules create mode 100644 central/Cargo.toml create mode 100644 central/README.md create mode 100644 central/src/main.rs create mode 160000 chat-ui create mode 100644 setup_scripts/conda_client.sh rename setup_conda_nosudo.sh => setup_scripts/conda_server.sh (89%) diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000000..ae1eeb3c438 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "chat-ui"] + path = chat-ui + url = git@github.com:CoderPat/chat-ui.git diff --git a/Cargo.lock b/Cargo.lock index 8984ea6ad4b..576cee21d86 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -57,6 +57,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anstream" version = "0.3.2" @@ -398,6 +413,21 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec837a71355b28f6556dbd569b37b3f363091c0bd4b2e735674521b4c5fd9bc5" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "time 0.1.45", + "wasm-bindgen", + "winapi", +] + [[package]] name = "cipher" version = "0.4.4" @@ -991,7 +1021,7 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -1057,6 +1087,31 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" +[[package]] +name = "headers" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3e372db8e5c0d213e0cd0b9be18be2aca3d44cf2fe30a9d46a65581cd454584" +dependencies = [ + "base64 0.13.1", + "bitflags 1.3.2", + "bytes", + "headers-core", + "http", + "httpdate", + "mime", + "sha1", +] + +[[package]] +name = "headers-core" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7f66481bfee273957b1f20485a4ff3362987f85b2c236580d81b4eb7a326429" +dependencies = [ + "http", +] + [[package]] name = "heck" version = "0.4.1" @@ -1178,6 +1233,29 @@ dependencies = [ "tokio-native-tls", ] +[[package]] +name = "iana-time-zone" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "ident_case" version = "1.0.1" @@ -1546,7 +1624,7 @@ checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2" dependencies = [ "libc", "log", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "windows-sys 0.48.0", ] @@ -1571,6 +1649,24 @@ dependencies = [ "syn 2.0.25", ] +[[package]] +name = "multer" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01acbdc23469fd8fe07ab135923371d5f5a422fbf9c522158677c8eb15bc51c2" +dependencies = [ + "bytes", + "encoding_rs", + "futures-util", + "http", + "httparse", + "log", + "memchr", + "mime", + "spin 0.9.8", + "version_check", +] + [[package]] name = "multimap" version = "0.8.3" @@ -1905,7 +2001,7 @@ dependencies = [ "once_cell", "pin-project-lite", "thiserror", - "urlencoding", + "urlencoding 2.1.2", ] [[package]] @@ -2195,7 +2291,7 @@ dependencies = [ "mach2", "once_cell", "raw-cpuid", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "web-sys", "winapi", ] @@ -2547,6 +2643,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "scopeguard" version = "1.1.0" @@ -2911,6 +3013,23 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "text-generation-central" +version = "0.9.4" +dependencies = [ + "bytes", + "chrono", + "clap", + "reqwest", + "serde", + "serde_json", + "thiserror", + "tokio", + "urlencoding 1.3.3", + "warp", + "whoami", +] + [[package]] name = "text-generation-client" version = "0.9.4" @@ -2940,7 +3059,9 @@ dependencies = [ "serde_json", "tracing", "tracing-subscriber", + "urlencoding 1.3.3", "vergen", + "whoami", ] [[package]] @@ -3006,6 +3127,17 @@ dependencies = [ "once_cell", ] +[[package]] +name = "time" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" +dependencies = [ + "libc", + "wasi 0.10.0+wasi-snapshot-preview1", + "winapi", +] + [[package]] name = "time" version = "0.3.23" @@ -3159,6 +3291,18 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-tungstenite" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54319c93411147bced34cb5609a80e0a8e44c5999c93903a81cd866630ec0bfd" +dependencies = [ + "futures-util", + "log", + "tokio", + "tungstenite", +] + [[package]] name = "tokio-util" version = "0.7.8" @@ -3436,6 +3580,25 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" +[[package]] +name = "tungstenite" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30ee6ab729cd4cf0fd55218530c4522ed30b7b6081752839b68fcec8d0960788" +dependencies = [ + "base64 0.13.1", + "byteorder", + "bytes", + "http", + "httparse", + "log", + "rand", + "sha1", + "thiserror", + "url", + "utf-8", +] + [[package]] name = "typenum" version = "1.16.0" @@ -3516,12 +3679,24 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "urlencoding" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a1f0175e03a0973cf4afd476bef05c26e228520400eb1fd473ad417b1c00ffb" + [[package]] name = "urlencoding" version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8db7427f936968176eaa7cdf81b7f98b980b18495ec28f1b5791ac3bfe3eea9" +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf8parse" version = "0.2.1" @@ -3591,7 +3766,7 @@ dependencies = [ "rustc_version", "rustversion", "sysinfo", - "time", + "time 0.3.23", ] [[package]] @@ -3619,6 +3794,43 @@ dependencies = [ "try-lock", ] +[[package]] +name = "warp" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba431ef570df1287f7f8b07e376491ad54f84d26ac473489427231e1718e1f69" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "headers", + "http", + "hyper", + "log", + "mime", + "mime_guess", + "multer", + "percent-encoding", + "pin-project", + "rustls-pemfile", + "scoped-tls", + "serde", + "serde_json", + "serde_urlencoded", + "tokio", + "tokio-stream", + "tokio-tungstenite", + "tokio-util", + "tower-service", + "tracing", +] + +[[package]] +name = "wasi" +version = "0.10.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -3722,6 +3934,16 @@ dependencies = [ "once_cell", ] +[[package]] +name = "whoami" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22fc3756b8a9133049b26c7f61ab35416c130e8c09b660f5b3958b446f52cc50" +dependencies = [ + "wasm-bindgen", + "web-sys", +] + [[package]] name = "winapi" version = "0.3.9" @@ -3753,6 +3975,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets 0.48.1", +] + [[package]] name = "windows-sys" version = "0.45.0" @@ -3919,7 +4150,7 @@ dependencies = [ "hmac", "pbkdf2", "sha1", - "time", + "time 0.3.23", "zstd", ] diff --git a/Cargo.toml b/Cargo.toml index 3bfe9831b9e..ba91c5de830 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,14 +4,15 @@ members = [ "router", "router/client", "router/grpc-metadata", + "central", "launcher" ] [workspace.package] -version = "0.9.4" -edition = "2021" -authors = ["Olivier Dehaene"] -homepage = "https://github.com/huggingface/text-generation-inference" +version = "0.9.5" +edition = "2023" +authors = ["Patrick Fernandes"] +homepage = "https://github.com/coderpat/text-generation-inference" [profile.release] debug = 1 diff --git a/Makefile b/Makefile index 41bed28e1d5..5fd978fa36a 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,9 @@ install-router: install-launcher: cd launcher && cargo install --path . +install-central: + cd central && cargo install --path . + install-benchmark: cd benchmark && cargo install --path . @@ -48,6 +51,13 @@ run-llama2-benchmark: run-llama2-vicuna-7b: text-generation-launcher --model-id lmsys/vicuna-7b-v1.5 --port 8080 +run-llama2-vicuna-7b-quantize: + text-generation-launcher --model-id lmsys/vicuna-7b-v1.5 --port 8080 --quantize bitsandbytes + +run-llama2-vicuna-13b: + text-generation-launcher --model-id lmsys/vicuna-13b-v1.5 --port 8081 \ + --master-port 29600 --shard-uds-path=/tmp/text-generation-server-2 + run-falcon-7b-instruct: text-generation-launcher --model-id tiiuae/falcon-7b-instruct --port 8080 diff --git a/central/Cargo.toml b/central/Cargo.toml new file mode 100644 index 00000000000..0b84813b8a1 --- /dev/null +++ b/central/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "text-generation-central" +description = "Text Generation Central controller tool" +version.workspace = true +edition.workspace = true +authors.workspace = true +homepage.workspace = true + +[[bin]] +name = "text-generation-central" +path = "src/main.rs" + +[dependencies] +clap = { version = "4.1.4", features = ["derive", "env"] } +warp = "0.3" +serde = {version = "1.0.142", features = ["derive"]} +serde_json = "1.0.93" +reqwest = { version = "0.11.14", features = [] } +tokio = { version = "1.25.0", features = ["full"] } +chrono = "0.4" +urlencoding = "1.1.1" +bytes = "1.1" +whoami = "1.4" + diff --git a/central/README.md b/central/README.md new file mode 100644 index 00000000000..3a27d73a1dc --- /dev/null +++ b/central/README.md @@ -0,0 +1,24 @@ +
+ +# Text Generation Central controller tool +
+ +A lightweight tool for tracking which models are running, who is running them, in what ip and port, etc. + +## Install + +From the root of the project run: + +```shell +make install-central +``` + +## Run + +To run the central controller tool, run: + +```shell +text-generation-central --port $PORT +``` + +TODO: Docs on environment variables \ No newline at end of file diff --git a/central/src/main.rs b/central/src/main.rs new file mode 100644 index 00000000000..df7b1ca7652 --- /dev/null +++ b/central/src/main.rs @@ -0,0 +1,212 @@ +use clap::Parser; +use warp::{Filter, Reply}; +use serde::{Serialize,Deserialize}; +use std::{sync::Arc, collections::HashMap}; +use tokio::sync::Mutex; + +/// App Configuration +#[derive(Parser, Debug)] +#[clap(author, version, about, long_about = None)] +struct Args { + // The address the central uses to listen to server requests + #[clap(default_value = "0.0.0.0", long, env)] + hostname: String, + + // The port the central uses to listen to server requests + #[clap(default_value = "8086", long, env)] + port: u16, + + // The interval (in seconds) between central pings to models + #[clap(default_value = "60", long, env)] + ping_interval: u64, + + // By default is None, if set pings a server on launch and if alive registers it + #[clap(default_value = None, long, env)] + initial_ping: Option, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct ModelRecord { + pub name: String, + pub address: String, + pub owner: String, +} + +#[derive(Deserialize, Clone, Debug)] +struct ModelInfo { + docker_label: Option, + max_batch_total_tokens: u32, + max_best_of: u32, + max_concurrent_requests: u32, + max_input_length: u32, + max_stop_sequences: u32, + max_total_tokens: u32, + max_waiting_tokens: u32, + model_device_type: String, + model_dtype: String, + model_id: String, + model_pipeline_tag: String, + model_sha: String, + sha: String, + validation_workers: u32, + version: String, + waiting_served_ratio: f32, +} + +type Models = Arc>>; + +#[tokio::main] + +async fn main() -> Result<(), Box> { + let args = Args::parse(); + let hostname = args.hostname; + let port = args.port; + let ping_interval = args.ping_interval; + let initial_ping = args.initial_ping; + // get current user from env + let user = whoami::username(); + + let models: Models = Arc::new(Mutex::new(HashMap::new())); + + fn with_models(models: Models) -> impl Filter + Clone { + warp::any().map(move || models.clone()) + } + + async fn handle_model_notice(encoded_id: String, record: ModelRecord, models: Models) -> Result { + println!("Received model notice for {}", encoded_id); + let model_id = urlencoding::decode(&encoded_id).unwrap(); + models.lock().await.insert(model_id, record); + Ok(warp::reply::with_status( + "Model registered successfully", + warp::http::StatusCode::OK, + )) + } + + async fn handle_list_models(models: Models) -> Result { + let models = models.lock().await; + // print for debug + let mut models_list: Vec = vec![]; + for (_, record) in models.iter() { + models_list.push(record.clone()); + } + Ok(warp::reply::with_status( + warp::reply::json(&models_list).into_response(), + warp::http::StatusCode::OK, + )) + } + + let model_notice_route = warp::path("model_up") + .and(warp::path::param::()) + .and(warp::post()) + .and(warp::body::json()) + .and(with_models(models.clone())) + .and_then(handle_model_notice); + + let list_models_route = warp::path("list_models") + .and(warp::get()) + .and(with_models(models.clone())) + .and_then(handle_list_models); + + let catch_all = warp::any() + .map(||{ + println!("Warning: Received a request on an unhandled route"); + warp::reply::with_status( + "Unhandled route!", + warp::http::StatusCode::NOT_FOUND, + ) + }); + + let routes = model_notice_route + .or(list_models_route) + .or(catch_all); + + let listener = warp::serve(routes).run((hostname.parse::().unwrap(), port)); + let monitor = async { + // ping server if provided + if let Some(model_addr) = initial_ping { + // split server into ip and port variables strings + let model_ip = model_addr.split(":").collect::>()[0]; + let model_port = model_addr.split(":").collect::>()[1]; + + let url = format!("http://{}:{}/info", model_ip, model_port); + let response = reqwest::get(&url).await; + match response { + Ok(response) => { + if response.status().is_success() { + let body = response.text().await?; + let info: ModelInfo = serde_json::from_str(&body)?; + let address = format!("{}:{}", model_ip, model_port); + models.lock().await.insert( + info.model_id.clone(), + ModelRecord { + name: info.model_id.clone(), + address: address, + owner: user.to_string(), + }); + } else { + println!("Model not alive"); + } + }, + Err(e) => { + println!("Model not alive"); + } + }; + } + + // every Ns, for every model, ping in /health, and if not alive remove from models () + loop { + let mut models = models.lock().await; + let mut keys_removal: Vec = vec![]; + + for (model, record) in models.iter() { + let url = format!("http://{}/health", record.address); + let response = reqwest::get(&url).await; + match response { + Ok(response) => { + if !response.status().is_success() { + keys_removal.push(model.to_string()); + } + }, + Err(e) => { + keys_removal.push(model.to_string()); + } + } + }; + + let mut dropped_models: HashMap = HashMap::new(); + for key in keys_removal { + if let Some(record) = models.remove(&key) { + dropped_models.insert(key, record); + } + } + + // print current time + println!("------------------"); + println!("Current time: {}", chrono::Local::now().format("%Y-%m-%d %H:%M:%S")); + // print models that stayed, one in each line + println!("Current Models:"); + for (model, record) in models.iter() { + println!("\t{} - {} by {}", model, record.address, record.owner); + } + // print dropped models + println!("Dropped Models:"); + for (model, record) in dropped_models.iter() { + println!("\t{} - {} by {}", model, record.address, record.owner); + } + + std::mem::drop(models); + tokio::time::sleep(std::time::Duration::from_secs(ping_interval)).await; + } + + Ok(()) as Result<(), Box> + }; + + // wrap listener to go into try join + let listener = async { + listener.await; + Ok(()) + }; + tokio::try_join!(listener, monitor); + Ok(()) +} + diff --git a/chat-ui b/chat-ui new file mode 160000 index 00000000000..fcf054a2e4c --- /dev/null +++ b/chat-ui @@ -0,0 +1 @@ +Subproject commit fcf054a2e4c1bf9e1bed8936ae0443f178fdca95 diff --git a/clients/python/text_generation/client.py b/clients/python/text_generation/client.py index bf045d47735..c5f8166484d 100644 --- a/clients/python/text_generation/client.py +++ b/clients/python/text_generation/client.py @@ -1,5 +1,6 @@ import json import requests +import os from aiohttp import ClientSession, ClientTimeout from pydantic import ValidationError @@ -35,6 +36,36 @@ class Client: ``` """ + @classmethod + def list_from_central( + cls, + central_url: str = None, + ): + """ + Get the list of available models from the central model hub + + Args: + central_url (`str`): + Text Generation Central URL + + Returns: + List[Dict[str, str]]: List of available models + """ + if central_url is None: + # check if environment variable is set + if os.environ.get("TGI_CENTRAL_ADDRESS") is None: + raise ValueError( + "No Central url provided and TGI_CENTRAL_ADDRESS environment variable is not set" + ) + central_url = f"http://{os.environ.get(/'TGI_CENTRAL_ADDRESS')}" + + # query from /models endpoint + resp = requests.get(f"{central_url}/list_models") + payload = resp.json() + if resp.status_code != 200: + raise parse_error(resp.status_code, payload) + return payload + def __init__( self, base_url: str, diff --git a/launcher/Cargo.toml b/launcher/Cargo.toml index 3e7f86d4e6f..4c47f5affe1 100644 --- a/launcher/Cargo.toml +++ b/launcher/Cargo.toml @@ -10,14 +10,16 @@ homepage.workspace = true clap = { version = "4.1.4", features = ["derive", "env"] } ctrlc = { version = "3.2.5", features = ["termination"] } nix = "0.26.2" +reqwest = { version = "0.11.14", features = ["blocking", "json"] } serde = { version = "1.0.152", features = ["derive"] } serde_json = "1.0.93" tracing = "0.1.37" tracing-subscriber = { version = "0.3.16", features = ["json", "env-filter"] } +urlencoding = "1.1.1" +whoami = "1.4.0" [dev-dependencies] float_eq = "1.0.1" -reqwest = { version = "0.11.14", features = ["blocking", "json"] } [build-dependencies] vergen = { version = "8.0.0", features = ["build", "cargo", "git", "gitcl", "rustc", "si"] } diff --git a/launcher/src/main.rs b/launcher/src/main.rs index 2ad788a405b..d568a6f1aa3 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -16,6 +16,8 @@ use std::thread::sleep; use std::time::{Duration, Instant}; use std::{fs, io}; use tracing_subscriber::EnvFilter; +use serde::Serialize; +use serde_json::json; mod env_runtime; @@ -25,6 +27,14 @@ enum Quantization { Gptq, } +#[derive(Serialize)] +struct ModelRecord { + name: String, + address: String, + owner: String, +} + + impl std::fmt::Display for Quantization { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { // To keep in track with `server`. @@ -276,6 +286,10 @@ struct Args { #[clap(long, env)] ngrok_edge: Option, + /// Central address, used to register the server in the central registry + #[clap(long, env)] + central_address: Option, + /// Display a lot of information about your runtime environment #[clap(long, short, action)] env: bool, @@ -1083,11 +1097,6 @@ fn main() -> Result<(), LauncherError> { // Download and convert model weights download_convert_model(&args, running.clone())?; - if !running.load(Ordering::SeqCst) { - // Launcher was asked to stop - return Ok(()); - } - // Shared shutdown bool let shutdown = Arc::new(AtomicBool::new(false)); // Shared shutdown channel @@ -1097,6 +1106,23 @@ fn main() -> Result<(), LauncherError> { // Shared channel to track shard status let (status_sender, status_receiver) = mpsc::channel(); + // clone args to avoid borrowing issues + // if central_address is None, check if enviroment variable is set + let central_address = match args.central_address.clone() { + Some(central_address) => Some(central_address), + None => match env::var("TGI_CENTRAL_ADDRESS") { + Ok(central_address) => Some(central_address), + Err(_) => None, + }, + }; + let encoded_id = urlencoding::encode(&args.model_id); + let model_record = ModelRecord { + name: args.model_id.clone(), + // build address string with hostnmae and port + address: format!("{}:{}", args.hostname, args.port), + owner: whoami::username() + }; + spawn_shards( num_shard, &args, @@ -1123,6 +1149,37 @@ fn main() -> Result<(), LauncherError> { // Default exit code let mut exit_code = Ok(()); + // Ping central server to register model, using request + if let Some(central_address) = central_address { + println!("Attempting to register in Central at {}", central_address); + let url = format!("http://{}/model_up/%7B%7D", central_address, encoded_id.to_string()); + let client = reqwest::blocking::Client::new(); + let res = client + .post(&url) + .json(&model_record) + .send(); + + match res { + Ok(response) => { + if response.status().is_success() { + println!("Successfully registered on central server"); + } else { + println!("Failed to register on central server"); + // response + println!("Response: {:?}", response); + } + }, + Err(e) => println!("Error occurred while initiating connection with central server: {}", e) + } + } else { + println!("No central server address provided. Skipping registration"); + } + + if !running.load(Ordering::SeqCst) { + // Launcher was asked to stop + return Ok(()); + } + while running.load(Ordering::SeqCst) { if let Ok(ShardStatus::Failed(rank)) = status_receiver.try_recv() { tracing::error!("Shard {rank} crashed"); diff --git a/notebooks/test_client.ipynb b/notebooks/test_client.ipynb index 8b5b95d6466..e4c24b3cd2b 100644 --- a/notebooks/test_client.ipynb +++ b/notebooks/test_client.ipynb @@ -2,37 +2,95 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/media/hdd1/patrick/miniconda3/envs/tgi-env-v2/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import text_generation as tg" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "import text_generation as tg_client" + "# set environment variable\n", + "import os\n", + "os.environ['TGI_CENTRAL_ADDRESS'] = '0.0.0.0:8765'" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'tg' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m servers \u001b[39m=\u001b[39m tg\u001b[39m.\u001b[39mClient\u001b[39m.\u001b[39mlist_from_central()\n\u001b[1;32m 2\u001b[0m \u001b[39mprint\u001b[39m(servers)\n", + "\u001b[0;31mNameError\u001b[0m: name 'tg' is not defined" + ] + } + ], + "source": [ + "servers = tg.Client.list_from_central()\n", + "print(servers)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "IP=\"0.0.0.0\"\n", - "PORT=8080" + "first_server_addr = servers[0]['address']" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "client = tg_client.Client(f\"http://{IP}:{PORT}\")" + "client = tg.Client(f\"http://{first_server_addr}/")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], + "source": [ + "client = tg.Client(\"http://0.0.0.0:8080/")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "expected to be self-motivated and to work independently. nobody is going to hold your hand\n" + ] + } + ], "source": [ "print(client.generate(\"CMU's PhD students are\", max_new_tokens=20).generated_text)" ] @@ -41,7 +99,15 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " expected to be self-motivated and to work independently. nobody is going to hold your hand\n" + ] + } + ], "source": [ "text = \"\"\n", "for response in client.generate_stream(\"CMU's PhD students are\", max_new_tokens=20):\n", diff --git a/setup_scripts/conda_client.sh b/setup_scripts/conda_client.sh new file mode 100644 index 00000000000..8bc3bee87d1 --- /dev/null +++ b/setup_scripts/conda_client.sh @@ -0,0 +1,34 @@ +ENV_NAME=tgi-env-client +# get the directory of this script, and go one up to get the root directory +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +DIR="$(dirname "$DIR")" +N_THREADS=8 +INSTALL_CHATUI=true + +set -eo pipefail + +# check if CONDA_HOME is set and create environment +if [ -z "$CONDA_HOME" ] +then + echo "Please set CONDA_HOME to the location of your conda installation" + exit 1 +fi + +source ${CONDA_HOME}/etc/profile.d/conda.sh +conda create -y -n ${ENV_NAME} python=3.9 +conda activate ${ENV_NAME} +conda install -y -c conda-forge mamba + +# install client +cd ${DIR}/clients/python +pip install . + +echo $PATH +echo $LD_LIBRARY_PATH + +if [ "$INSTALL_CHATUI" = true ] ; then + # install chat-ui + cd ${DIR}/chat-ui + mamba install -y -c conda-forge mongodb pymongo "nodejs>=18" + npm install +fi \ No newline at end of file diff --git a/setup_conda_nosudo.sh b/setup_scripts/conda_server.sh similarity index 89% rename from setup_conda_nosudo.sh rename to setup_scripts/conda_server.sh index 0c736cb2ff4..0aa687d7a29 100644 --- a/setup_conda_nosudo.sh +++ b/setup_scripts/conda_server.sh @@ -3,9 +3,12 @@ # It sidesteps system-wide installations by relying on conda for most packages # and by building openssl from source # TODO: only got it to work with a static build of OpenSSL, which is not ideal -ENV_NAME=tgi-env +ENV_NAME=tgi-env-v2 +# get the directory of this script, and go one up to get the root directory DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +DIR="$(dirname "$DIR")" N_THREADS=8 + # currently can only build in TIR without extensions # seems un-important, as it only affects BLOOM/NEOX BUILD_EXTENSIONS=false @@ -27,6 +30,16 @@ conda activate ${ENV_NAME} # python can't handle this dependency madness, switch to C++ conda install -y -c conda-forge mamba +# check if `module` is available and unload gcc and cuda modules +if [ -x "$(command -v module)" ] +then + # get list of loaded modules, grep for gcc and unload all gcc modules found + # TODO: Fix this, it's not working + # For now, unload manually + # module list | grep gcc | sed 's/ //g' | sed 's/(gcc)//g' | xargs -I{} module unload {} + # module unload "cuda*" +fi + # remove possible extra cuda and gccs from path # (not sure if needed, but added during debugging and kept for now) export PATH=$(echo $PATH | tr ":" "\n" | grep -v cuda | grep -v gcc | tr "\n" ":" | sed 's/:$//g') From b4de25656d717615f60e307f81e26e298db79777 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Mon, 14 Aug 2023 04:35:29 +0000 Subject: [PATCH 09/40] Update README.md --- README.md | 70 +++++++++++++++++++++++++---------- setup_scripts/conda_server.sh | 2 +- 2 files changed, 52 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 281547ea154..5e3eeb754a0 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,10 @@ Forked from [HuggingFace](https://huggingface.co)'s [Text Generation Inference]( This fork was created mainly due to two reasons: 1. Primarily, it allows us faster iteration and more flexibility, which is essential for our research uses. It also allows more control over development and documentation, crucial for our in-house uses at CMU. -2. While we understand the reasons behind the re-licensing, we don't want our (research) contributions to be locked behind a restrictive license. +2. While we understand the reasons behind the re-licensing, we don't want our (research) contributions to be locked behind a restrictive license. This fork will not sync with the upstream repository, and will be updated independently. + +*For contributors*: If HuggingFace's upstream has a feature that you want to use, please open an issue first and discuss porting the functionality independently. +Do not just copy the code over, as it will be rejected. ## Table of contents @@ -86,13 +89,57 @@ or ### *For LTI/cluster users* -If you are an LTI student using one of its cluster (or generally belong to an academic cluster that doesn't have docker installed), you can side-steps problems with installing system dependencies by using the [(mini)conda](https://docs.conda.io/en/latest/miniconda.html) package manager. Set the CONDA_HOME environment variable to the path of your conda installation, and run the following commands: +If you are new to this library, as it has been already being used in your cluster, we recommend by starting with a *client-only* installation. +For example, to install the python client in a new conda environment, run: + +```shell +cd clients/python +pip install . +``` + +This will install the python client. You can then query the API to list the models available in your cluster, and use models for inference. + +```python +from text_generation import Client + +# get current models +models = Client.list_from_central() +model_addr = models[0]["address"] + +client = Client(model_addr) +print(client.generate("What is Deep Learning?", max_new_tokens=20).generated_text) + +text = "" +for response in client.generate_stream("What is Deep Learning?", max_new_tokens=20): + if not response.token.special: + text += response.token.text +print(text) +``` + +#### Running your own servers + +If you are an LTI student using one of its cluster (or generally belong to an academic cluster that doesn't have docker installed), you can side-steps problems with installing system dependencies by using the [(mini)conda](https://docs.conda.io/en/latest/miniconda.html) package manager. + +Set the CONDA_HOME environment variable to the path of your conda installation, and run the following commands: ```shell -CONDA_HOME=/path/to/conda -bash setup_conda_nosudo.sh +export CONDA_HOME=/path/to/conda +bash setup_scripts/conda_server.sh ``` +This will create a conda environment with all the dependencies needed to run the model servers. + +You should then be able to launch models with the `text-generation-launcher` command, or by using one of the predefined MAKE rules +```shell +conda activate tgi-env +make run-llama2-vicuna-7b +``` + +#### Setting up a Central server + +#### Chat-UI + + ### Docker The easiest way of getting started is using the official Docker container: @@ -128,22 +175,7 @@ curl 127.0.0.1:8080/generate_stream \ or from Python: -```shell -pip install text-generation -``` - -```python -from text_generation import Client -client = Client("http://127.0.0.1:8080/") -print(client.generate("What is Deep Learning?", max_new_tokens=20).generated_text) - -text = "" -for response in client.generate_stream("What is Deep Learning?", max_new_tokens=20): - if not response.token.special: - text += response.token.text -print(text) -``` ### API documentation diff --git a/setup_scripts/conda_server.sh b/setup_scripts/conda_server.sh index 0aa687d7a29..f9b217c2dc1 100644 --- a/setup_scripts/conda_server.sh +++ b/setup_scripts/conda_server.sh @@ -3,7 +3,7 @@ # It sidesteps system-wide installations by relying on conda for most packages # and by building openssl from source # TODO: only got it to work with a static build of OpenSSL, which is not ideal -ENV_NAME=tgi-env-v2 +ENV_NAME=tgi-env # get the directory of this script, and go one up to get the root directory DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" DIR="$(dirname "$DIR")" From e5a9b648ce4ef71e36efa9fec054828d62caf6aa Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Mon, 14 Aug 2023 11:56:27 -0400 Subject: [PATCH 10/40] update central/launcher to pass quantization info --- central/src/main.rs | 15 +++++++++++++-- launcher/src/main.rs | 6 +++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/central/src/main.rs b/central/src/main.rs index df7b1ca7652..b3c51b9ce6c 100644 --- a/central/src/main.rs +++ b/central/src/main.rs @@ -30,6 +30,7 @@ pub struct ModelRecord { pub name: String, pub address: String, pub owner: String, + pub is_quantized: bool, } #[derive(Deserialize, Clone, Debug)] @@ -55,6 +56,16 @@ struct ModelInfo { type Models = Arc>>; + +// define function to print model info +fn printModelRecord(model: &ModelRecord) { + if record.is_quantized { + println!("\t{} (quant) - {} by {}", record.name, record.address, record.owner); + } else { + println!("\t{} - {} by {}", record.name, record.address, record.owner); + } +} + #[tokio::main] async fn main() -> Result<(), Box> { @@ -186,12 +197,12 @@ async fn main() -> Result<(), Box> { // print models that stayed, one in each line println!("Current Models:"); for (model, record) in models.iter() { - println!("\t{} - {} by {}", model, record.address, record.owner); + printModelRecord(record); } // print dropped models println!("Dropped Models:"); for (model, record) in dropped_models.iter() { - println!("\t{} - {} by {}", model, record.address, record.owner); + printModelRecord(record); } std::mem::drop(models); diff --git a/launcher/src/main.rs b/launcher/src/main.rs index d568a6f1aa3..3b1ccb4e610 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -32,6 +32,7 @@ struct ModelRecord { name: String, address: String, owner: String, + is_quantized: bool, } @@ -356,9 +357,11 @@ fn shard_manager( shard_args.push("--sharded".to_string()); } + mut is_quantized = false; if let Some(quantize) = quantize { shard_args.push("--quantize".to_string()); shard_args.push(quantize.to_string()) + is_quantized = true; } if let Some(dtype) = dtype { @@ -1120,7 +1123,8 @@ fn main() -> Result<(), LauncherError> { name: args.model_id.clone(), // build address string with hostnmae and port address: format!("{}:{}", args.hostname, args.port), - owner: whoami::username() + owner: whoami::username(), + is_quantized: is_quantized }; spawn_shards( From 468101a211aafed60b78985f49f38255a7060f84 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Mon, 14 Aug 2023 17:25:20 +0000 Subject: [PATCH 11/40] Fix code for quantization info in the Central --- Cargo.lock | 11 +++++------ Cargo.toml | 2 +- Makefile | 12 ++++++++++-- central/src/main.rs | 9 ++++++--- chat-ui | 2 +- launcher/src/main.rs | 4 +--- 6 files changed, 24 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 576cee21d86..c2866b114fe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2995,7 +2995,7 @@ dependencies = [ [[package]] name = "text-generation-benchmark" -version = "0.9.4" +version = "0.9.5" dependencies = [ "average", "clap", @@ -3015,7 +3015,7 @@ dependencies = [ [[package]] name = "text-generation-central" -version = "0.9.4" +version = "0.9.5" dependencies = [ "bytes", "chrono", @@ -3023,7 +3023,6 @@ dependencies = [ "reqwest", "serde", "serde_json", - "thiserror", "tokio", "urlencoding 1.3.3", "warp", @@ -3032,7 +3031,7 @@ dependencies = [ [[package]] name = "text-generation-client" -version = "0.9.4" +version = "0.9.5" dependencies = [ "futures", "grpc-metadata", @@ -3048,7 +3047,7 @@ dependencies = [ [[package]] name = "text-generation-launcher" -version = "0.9.4" +version = "0.9.5" dependencies = [ "clap", "ctrlc", @@ -3066,7 +3065,7 @@ dependencies = [ [[package]] name = "text-generation-router" -version = "0.9.4" +version = "0.9.5" dependencies = [ "async-stream", "axum", diff --git a/Cargo.toml b/Cargo.toml index ba91c5de830..886bb06c188 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ members = [ [workspace.package] version = "0.9.5" -edition = "2023" +edition = "2021" authors = ["Patrick Fernandes"] homepage = "https://github.com/coderpat/text-generation-inference" diff --git a/Makefile b/Makefile index 5fd978fa36a..b5811af681a 100644 --- a/Makefile +++ b/Makefile @@ -55,8 +55,16 @@ run-llama2-vicuna-7b-quantize: text-generation-launcher --model-id lmsys/vicuna-7b-v1.5 --port 8080 --quantize bitsandbytes run-llama2-vicuna-13b: - text-generation-launcher --model-id lmsys/vicuna-13b-v1.5 --port 8081 \ - --master-port 29600 --shard-uds-path=/tmp/text-generation-server-2 + text-generation-launcher --model-id lmsys/vicuna-13b-v1.5 --port 8080 + +run-llama2-vicuna-13b-quantize: + text-generation-launcher --model-id lmsys/vicuna-13b-v1.5 --port 8080 --quantize bitsandbytes + +run-llama2-vicuna-33b-quantize: + text-generation-launcher --model-id lmsys/vicuna-33b-v1.3 --port 8080 --quantize bitsandbytes + +run-llama2-70b-instruct-quantize: + text-generation-launcher --model-id upstage/Llama-2-70b-instruct-v2 --port 8080 --quantize bitsandbytes run-falcon-7b-instruct: text-generation-launcher --model-id tiiuae/falcon-7b-instruct --port 8080 diff --git a/central/src/main.rs b/central/src/main.rs index b3c51b9ce6c..a1756188517 100644 --- a/central/src/main.rs +++ b/central/src/main.rs @@ -58,7 +58,7 @@ type Models = Arc>>; // define function to print model info -fn printModelRecord(model: &ModelRecord) { +fn print_model_record(record: &ModelRecord) { if record.is_quantized { println!("\t{} (quant) - {} by {}", record.name, record.address, record.owner); } else { @@ -149,10 +149,13 @@ async fn main() -> Result<(), Box> { let address = format!("{}:{}", model_ip, model_port); models.lock().await.insert( info.model_id.clone(), + // TODO: this is not the correct values + // we should get these from the model ModelRecord { name: info.model_id.clone(), address: address, owner: user.to_string(), + is_quantized: false, }); } else { println!("Model not alive"); @@ -197,12 +200,12 @@ async fn main() -> Result<(), Box> { // print models that stayed, one in each line println!("Current Models:"); for (model, record) in models.iter() { - printModelRecord(record); + print_model_record(record); } // print dropped models println!("Dropped Models:"); for (model, record) in dropped_models.iter() { - printModelRecord(record); + print_model_record(record); } std::mem::drop(models); diff --git a/chat-ui b/chat-ui index fcf054a2e4c..3c70ef3db85 160000 --- a/chat-ui +++ b/chat-ui @@ -1 +1 @@ -Subproject commit fcf054a2e4c1bf9e1bed8936ae0443f178fdca95 +Subproject commit 3c70ef3db85b105717e5b1c8d86979cb35e2f4fd diff --git a/launcher/src/main.rs b/launcher/src/main.rs index 3b1ccb4e610..8f971d76a52 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -357,11 +357,9 @@ fn shard_manager( shard_args.push("--sharded".to_string()); } - mut is_quantized = false; if let Some(quantize) = quantize { shard_args.push("--quantize".to_string()); shard_args.push(quantize.to_string()) - is_quantized = true; } if let Some(dtype) = dtype { @@ -1124,7 +1122,7 @@ fn main() -> Result<(), LauncherError> { // build address string with hostnmae and port address: format!("{}:{}", args.hostname, args.port), owner: whoami::username(), - is_quantized: is_quantized + is_quantized: args.quantize.is_some() }; spawn_shards( From c1ed61cb936f83af3d639248c57a6e1b88a61723 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Mon, 14 Aug 2023 17:27:20 +0000 Subject: [PATCH 12/40] make central build by default --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b5811af681a..458508d66f7 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ install-central: install-benchmark: cd benchmark && cargo install --path . -install: install-server install-router install-launcher install-custom-kernels +install: install-server install-router install-launcher install-central install-custom-kernels server-dev: cd server && make run-dev From 3b362eeb85c0eee9649159f969a24e0ff70b8c9c Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Mon, 14 Aug 2023 23:07:04 +0000 Subject: [PATCH 13/40] Update vLLM to newer commit to support llama2 --- chat-ui | 2 +- server/Makefile-vllm | 2 +- .../models/custom_modeling/flash_llama_modeling.py | 9 ++++----- server/text_generation_server/models/flash_llama.py | 6 ++++-- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/chat-ui b/chat-ui index 3c70ef3db85..a2954720286 160000 --- a/chat-ui +++ b/chat-ui @@ -1 +1 @@ -Subproject commit 3c70ef3db85b105717e5b1c8d86979cb35e2f4fd +Subproject commit a29547202860dfc2518f7938d56628fe5ef4b45b diff --git a/server/Makefile-vllm b/server/Makefile-vllm index 25f0f2ce8f1..a223a45546b 100644 --- a/server/Makefile-vllm +++ b/server/Makefile-vllm @@ -1,4 +1,4 @@ -vllm_commit := "v0.1.2" +vllm_commit := "96853af" vllm: # Clone vllm diff --git a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py index e201c77faf4..cd87baededb 100644 --- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py @@ -277,15 +277,13 @@ def forward( query, kv_cache[0], kv_cache[1], - # commented for now due to move to another vllm version - # not sure if it breaks anything - # self.kv_head_mapping, + self.kv_head_mapping, self.softmax_scale, block_tables, input_lengths, block_size, max_s, - None, + None ) return self.o_proj(attn_output.view(-1, self.num_heads * self.head_size)) @@ -388,6 +386,7 @@ def forward( class FlashLlamaModel(torch.nn.Module): def __init__(self, config, weights): super().__init__() + self.config = config process_group = weights.process_group self.tp_rank = process_group.rank() @@ -490,4 +489,4 @@ def forward( if lm_head_indices is not None: hidden_states = hidden_states[lm_head_indices] logits = self.lm_head(hidden_states) - return logits + return logits \ No newline at end of file diff --git a/server/text_generation_server/models/flash_llama.py b/server/text_generation_server/models/flash_llama.py index 96fb0c266cb..f725a627c6b 100644 --- a/server/text_generation_server/models/flash_llama.py +++ b/server/text_generation_server/models/flash_llama.py @@ -2,7 +2,8 @@ import torch.distributed from opentelemetry import trace -from transformers.models.llama import LlamaTokenizer, LlamaTokenizerFast +from transformers import AutoTokenizer +from transformers.models.llama import LlamaTokenizer from typing import Optional from text_generation_server.models import FlashCausalLM @@ -44,7 +45,8 @@ def __init__( trust_remote_code=trust_remote_code, ) except Exception: - tokenizer = LlamaTokenizerFast.from_pretrained( + # use AutoTokenizer as fallback in case it's a costum tokenizer + tokenizer = AutoTokenizer.from_pretrained( model_id, revision=revision, padding_side="left", From 8f35f11cdeb079734297c9b416c217f21ac9fd4e Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Mon, 14 Aug 2023 18:36:55 -0400 Subject: [PATCH 14/40] Update README --- README.md | 137 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 84 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index 5e3eeb754a0..079effbf6d7 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,74 @@ This fork was created mainly due to two reasons: *For contributors*: If HuggingFace's upstream has a feature that you want to use, please open an issue first and discuss porting the functionality independently. Do not just copy the code over, as it will be rejected. +### *For LTI/cluster users* + +#### Getting started + +If you are new to using this library, and as it has being used in your cluster, we recommend by starting with a *client-only* installation, and using models launched by other users. + +To start, the `TGI_CENTRAL_ADDRESS` needs to be set, so that the client can know which servers to connect to. For example, in the LTI cluster, run + +```shell +echo "export TGI_CENTRAL_ADDRESS=tir-1-11:8765" >> ~/.bashrc +source ~/.bashrc +``` + +To use the python client, install it with + +```shell +cd clients/python +pip install . +``` + +You can then query the API to list the models available in your cluster, and use models for inference. + +```python +from text_generation import Client + +# get current models +models = Client.list_from_central() +model_addr = models[0]["address"] + +client = Client(model_addr) +print(client.generate("What is Deep Learning?", max_new_tokens=20).generated_text) +``` + +#### Running your own servers + +If you are an LTI student using one of its cluster (or generally belong to an academic cluster that doesn't have docker installed), you can side-steps problems with installing system dependencies by using the [(mini)conda](https://docs.conda.io/en/latest/miniconda.html) package manager. + +Set the CONDA_HOME environment variable to the path of your conda installation, and run the following commands: + +```shell +export CONDA_HOME=/path/to/conda +bash setup_scripts/conda_server.sh +``` + +This will create a conda environment with all the dependencies needed to run the model servers. + +You should then be able to launch models with the `text-generation-launcher` command, or by using one of the predefined MAKE rules +```shell +conda activate tgi-env +make run-llama2-vicuna-7b +``` + +#### Setting up a Central server + +If you are setting this library for use in your group/cluster for the first time, you will need (or at least benefit) from setting up a central server. +See the instructions [in the package folder](./central/README.md). + +Remember to set the `TGI_CENTRAL_ADDRESS` environment variable (ideally for all the users in your cluster) to the address of the central server. + +#### Chat-UI + +It is also possible to use the [chat-ui](./clients/chat-ui) to interact with the models. +This is a simple fork of [HuggingFace's Chat UI](https://github.com/huggingface/chat-ui) that communicates with the central controller to get the list of models available in the cluster, and then connects to the corresponding servers to generate text. +Check the [README](./clients/chat-ui/README.md) for more details. + +**Content below is from the original README.** +--- + ## Table of contents - [Features](#features) @@ -87,59 +155,6 @@ or ## Get started -### *For LTI/cluster users* - -If you are new to this library, as it has been already being used in your cluster, we recommend by starting with a *client-only* installation. -For example, to install the python client in a new conda environment, run: - -```shell -cd clients/python -pip install . -``` - -This will install the python client. You can then query the API to list the models available in your cluster, and use models for inference. - -```python -from text_generation import Client - -# get current models -models = Client.list_from_central() -model_addr = models[0]["address"] - -client = Client(model_addr) -print(client.generate("What is Deep Learning?", max_new_tokens=20).generated_text) - -text = "" -for response in client.generate_stream("What is Deep Learning?", max_new_tokens=20): - if not response.token.special: - text += response.token.text -print(text) -``` - -#### Running your own servers - -If you are an LTI student using one of its cluster (or generally belong to an academic cluster that doesn't have docker installed), you can side-steps problems with installing system dependencies by using the [(mini)conda](https://docs.conda.io/en/latest/miniconda.html) package manager. - -Set the CONDA_HOME environment variable to the path of your conda installation, and run the following commands: - -```shell -export CONDA_HOME=/path/to/conda -bash setup_scripts/conda_server.sh -``` - -This will create a conda environment with all the dependencies needed to run the model servers. - -You should then be able to launch models with the `text-generation-launcher` command, or by using one of the predefined MAKE rules -```shell -conda activate tgi-env -make run-llama2-vicuna-7b -``` - -#### Setting up a Central server - -#### Chat-UI - - ### Docker The easiest way of getting started is using the official Docker container: @@ -175,6 +190,22 @@ curl 127.0.0.1:8080/generate_stream \ or from Python: +```shell +pip install text-generation +``` + +```python +from text_generation import Client + +client = Client("http://127.0.0.1:8080/") +print(client.generate("What is Deep Learning?", max_new_tokens=20).generated_text) + +text = "" +for response in client.generate_stream("What is Deep Learning?", max_new_tokens=20): + if not response.token.special: + text += response.token.text +print(text) +``` ### API documentation From 3a2b24fe29b5d74441e95e0e4cfbe151e59985d6 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Mon, 14 Aug 2023 23:37:53 -0400 Subject: [PATCH 15/40] Fix issue with hostnames in local networks --- README.md | 2 +- chat-ui | 2 +- launcher/Cargo.toml | 1 + launcher/src/main.rs | 12 +++++++++++- router/src/validation.rs | 2 +- setup_scripts/conda_server.sh | 17 +++++++++-------- 6 files changed, 24 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 079effbf6d7..8271abaa9cb 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ If you are new to using this library, and as it has being used in your cluster, To start, the `TGI_CENTRAL_ADDRESS` needs to be set, so that the client can know which servers to connect to. For example, in the LTI cluster, run ```shell -echo "export TGI_CENTRAL_ADDRESS=tir-1-11:8765" >> ~/.bashrc +echo "export TGI_CENTRAL_ADDRESS=tir-0-32:8765" >> ~/.bashrc source ~/.bashrc ``` diff --git a/chat-ui b/chat-ui index a2954720286..f65ca708ec2 160000 --- a/chat-ui +++ b/chat-ui @@ -1 +1 @@ -Subproject commit a29547202860dfc2518f7938d56628fe5ef4b45b +Subproject commit f65ca708ec2d018fee108a6edf5e48f545d4032f diff --git a/launcher/Cargo.toml b/launcher/Cargo.toml index 4c47f5affe1..aebf8e7e5d9 100644 --- a/launcher/Cargo.toml +++ b/launcher/Cargo.toml @@ -17,6 +17,7 @@ tracing = "0.1.37" tracing-subscriber = { version = "0.3.16", features = ["json", "env-filter"] } urlencoding = "1.1.1" whoami = "1.4.0" +hostname = "0.3" [dev-dependencies] float_eq = "1.0.1" diff --git a/launcher/src/main.rs b/launcher/src/main.rs index 8f971d76a52..28bdddb5233 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -1117,10 +1117,20 @@ fn main() -> Result<(), LauncherError> { }, }; let encoded_id = urlencoding::encode(&args.model_id); + let ip = args.hostname.to_string(); + let hostname = match ip.parse::() { + Ok(ip) => ip.ip().to_string(), + Err(_) => { + tracing::warn!("invalid hostname passed! will use system's hostname..."); + // try to resolve hostname.into_string + whoami::hostname() + } + }; + println!("final hostname: {}", hostname); let model_record = ModelRecord { name: args.model_id.clone(), // build address string with hostnmae and port - address: format!("{}:{}", args.hostname, args.port), + address: format!("{}:{}", hostname, args.port), owner: whoami::username(), is_quantized: args.quantize.is_some() }; diff --git a/router/src/validation.rs b/router/src/validation.rs index be835bf0a07..b784dcfde41 100644 --- a/router/src/validation.rs +++ b/router/src/validation.rs @@ -311,7 +311,7 @@ fn prepare_input( // truncate encoding and decode new inputs encoding.truncate(truncate, 0, TruncationDirection::Left); let inputs = tokenizer - .decode(Vec::from(encoding.get_ids()), false) + .decode(&Vec::from(encoding.get_ids()), false) .map_err(|err| ValidationError::Tokenizer(err.to_string()))?; (inputs, encoding.len()) } diff --git a/setup_scripts/conda_server.sh b/setup_scripts/conda_server.sh index f9b217c2dc1..4740942a646 100644 --- a/setup_scripts/conda_server.sh +++ b/setup_scripts/conda_server.sh @@ -85,14 +85,6 @@ rm -rf /tmp/openssl export LD_LIBRARY_PATH=${DIR}/.openssl/lib:$LD_LIBRARY_PATH export PATH=${DIR}/.openssl/bin:$PATH -# install base package -cd ${DIR} -OPENSSL_DIR=${DIR}/.openssl \ -OPENSSL_LIB_DIR=${DIR}/.openssl/lib \ -OPENSSL_INCLUDE_DIR=${DIR}/.openssl/include \ -BUILD_EXTENSIONS=$BUILD_EXTENSIONS \ - make install - # install ninja for faster compilation of CUDA kernels and setup workdir pip install ninja cd ${DIR}/server @@ -111,6 +103,15 @@ if [ "$TEST_EXTRA" = true ] ; then fi rm -rf workdir/* +# install base package +cd ${DIR} +OPENSSL_DIR=${DIR}/.openssl \ +OPENSSL_LIB_DIR=${DIR}/.openssl/lib \ +OPENSSL_INCLUDE_DIR=${DIR}/.openssl/include \ +BUILD_EXTENSIONS=$BUILD_EXTENSIONS \ + make install + + # install flash attention cd ${DIR}/server cp Makefile-flash-att workdir/Makefile From 209d6ee7354adfc438d730edf50e985c91b228c6 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Thu, 17 Aug 2023 01:15:01 -0400 Subject: [PATCH 16/40] update README python example --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8271abaa9cb..822f41de75b 100644 --- a/README.md +++ b/README.md @@ -53,11 +53,12 @@ You can then query the API to list the models available in your cluster, and use ```python from text_generation import Client -# get current models +# get current models and pick the first one models = Client.list_from_central() -model_addr = models[0]["address"] +model_name, model_addr = models[0]["name"], models[0]["address"] +print(f"Using model {model_name} at {model_addr}") -client = Client(model_addr) +client = Client("http://" + model_addr) print(client.generate("What is Deep Learning?", max_new_tokens=20).generated_text) ``` From 022d7be9e298d16a8bc3001ccbca8a7f487b59b3 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Thu, 17 Aug 2023 01:51:34 -0400 Subject: [PATCH 17/40] Further update README with details on chat-ui --- README.md | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 822f41de75b..cc0b702df69 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ If you are new to using this library, and as it has being used in your cluster, To start, the `TGI_CENTRAL_ADDRESS` needs to be set, so that the client can know which servers to connect to. For example, in the LTI cluster, run ```shell -echo "export TGI_CENTRAL_ADDRESS=tir-0-32:8765" >> ~/.bashrc +echo "export TGI_CENTRAL_ADDRESS=tir-0-32:8765" >> ~/.bashrc # if using a single machine, use `0.0.0.0:8765` instead source ~/.bashrc ``` @@ -92,7 +92,20 @@ Remember to set the `TGI_CENTRAL_ADDRESS` environment variable (ideally for all It is also possible to use the [chat-ui](./clients/chat-ui) to interact with the models. This is a simple fork of [HuggingFace's Chat UI](https://github.com/huggingface/chat-ui) that communicates with the central controller to get the list of models available in the cluster, and then connects to the corresponding servers to generate text. -Check the [README](./clients/chat-ui/README.md) for more details. +For example, it TIR, you can access running Chat-UI server with *port forwarding* by running + +```shell +ssh tir -L 8888:tir-0-32:4173 +``` + +and going to `localhost:8888` in your browser. + +

+ + +

+ +Check the [README](./chat-ui/README.md) for more details. **Content below is from the original README.** --- From f24c87b83148b324dfe57553d84188c9ae38a785 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Thu, 17 Aug 2023 01:52:10 -0400 Subject: [PATCH 18/40] add missing images --- assets/chatui01.png | Bin 0 -> 40762 bytes assets/chatui02.png | Bin 0 -> 72209 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100755 assets/chatui01.png create mode 100755 assets/chatui02.png diff --git a/assets/chatui01.png b/assets/chatui01.png new file mode 100755 index 0000000000000000000000000000000000000000..6b24e7171b6da0d14259bc496904e9db70c9084b GIT binary patch literal 40762 zcmeFY`CF3d`~Pp#RMS{FWvRI~S(dq{xGPhZR<7lOy9KGalDVLw(l(hZQ|7MN=Dskv z;qFudxsswHqL`ABA`+swuzl(E{{9)CA3Uz(20ZTNx~}s+&*$TLrnxxT9sXVMcPS~U z!`H7}xg{m_Ypj&i-nHNMOFnt_>Q8mauqXPK-DRnU5#?pc!LPxW94|>pVUW^-JNqQZ z2k&3=ik6Z(Ir;N%kHY8xOG*kwynf}9TfG0OP`bn~Fj-H8h?Yr@`1iG8dY((we@6{C zw_d)pI`*smF{a-24@JswkrC$VW#Q(vA8xW;FFX-vH!3dGd=O+C&;NFI=cdj13m>G< zU3Wirb8zvmgD1{ha(R2Y(3jDRY9XU2O~FKfVSioWmIYdv$m>{*rNSp^%p69-WYwgD zGrkhUh!X+TeaO7@qdp@BNv_uQWx4V z5Xg|iiL5=h&zr@3zprc>uP>G<=+@zb1#MAL0%>FhhfX*Vt?wz>umAnS9b}-80m4vd z*Y~S>&*gi*wt=;!T=w@Sra%Sj`9y+rK)i3)_ig#(kduf0cgBP`q8@jh?7wlj<$=`e z_qSN30fKeV4p&yA`@G)C_@BS|eC^Rl)y;R{F|=Ah<{qhma3ZlRAdIj_fbmDHRSwTz!9~`;U_gQ9>w}8B|%Cqr>kARm09sKOqcs)1*~8G)6Z$R zib`3c#B&bzR&c-VSpvysN-ZOLe0VJ4lM(|6{u@?V+5eH#%f~ftuIE^DRY?d^%HG9) z)*j@VUzPhV_1}-2o;lK1(r!=i?dod7%R<4<qUUe0eC5l z&YiPV$ne1S$pV%BJM`l#D*o$QY}D)`cR|op%=JFA^KPfkRQ&NO=I-!r{MSM-?a#<9 zNA7ehg67znf$^JaeVZJBrg&%8O{*|3N8}7o?=3yUf=!EKlpnqc zo9hu8-1)c-{!L1uVQedTmlpi>w^-Zf;wI*O-&j)56#Q2N5NSnHWb+q#wkgqes;jH* zjE(}>(!r?7%zc<|hD-=oChYM)vod{4Xqp09$m#OrO{o6z^4c^BXf!4eaNPP;}?Qx#tA5xY)`7aMnEj+=lBA@KN@5j#x zXvPm%H16a1*spFEdqpi3=lAW}rQa;!Eb0sNk0)()n8prt zdB(7^FfHW@pktatRWpmBUzKOR6%I}>Vn~f?9GQ`tGEI=>s#6#2cbL6$b8DJd z{tlhPvS4S%PHYMhbD2=~YVv`w5b@vU`pxAPFL!%>c{s0^JXgt1-k%-dqW)hIRP}jf ziuL{Bik_XXZ{j!33HdfN38aq&J0i!M-PvWl;9AHa9ZnyuhvDkUdPps_w`y6>PQ%pt z586~qLZCyb=g_rYw~isj|6CaTDjXsVs0+1tFF?!zx_PZ0Jf)Ckgk z`S0^Q9C7<#?6da0lvp+}8UB?oyR&2?{4EycDQ~}o*7~KiqCiaHL%4H#nIY#)ZbWL> zk`h4pF*8S;K7ppU0Jlg{*6cJ}&g4Qys3Tc(d?B_^DZo-=;cF9y&hNa?wE2}mWoPi= z?xRaCg6}jtFpS43-Coz^jCl1b$qyxk320g*M)_IQ-4E*B($6G0$1}<9(uc7}Y%tJ2 zcApDB(320xdV{%}AdfZQ+>k_e7G}jN82`efkHVc*g7lvy(Z_FAL*|!EnOo@R?pv#x z6>+VM7sHsfX)Q1F;t^elBYqQ0zmQ5C@hRoGzVv53Cd^|K0YHpC2%~VB}I#*7c z5^&4tl!QRH%(NVTexs?T)hyTTt{PO5W69qNFy`}Ao=c+n0*HwH1}k$wZdhj7lQv+k z)|X4~=*C7`Dxh|+2H2FUU}7BszZ4#`7u^kVxbSQ@jNvG~Fl3^g&WxleT5t_I>5uf# z=~^^PK;G>jUSP#gMuYADf<}Ul0ZRBk8tdKY& z0rdwoi4Hxk-ihH;w!L zJAQXu5jichvhNuozD0RU*SQts?;l?)KQ;d}qsKckq6RMGw4bd(^YUT#ZPo79mLEXwE!h7JKhMD%%jA| zG4ecx!KCz)t@y^R*51gm;(VN62!*VtjlWCdE)TSAZ2y<3ex%~rGuH)oJ*E8I)CB@C zrS9Sd@-gJ!E+hpx+{Dwiz4G1jd3bf$oRSPnMOF{7$pk-W zdFu>I5hc;aweXp1Z9T0d4|{j1YJ1XEw>wkfj7eF#f7EWO2Cmtxn1g zEieAHaoIJe0Oc*sx_Q$Snu+KyPgUzXh}V7K~y?q_eI@nR*^|iKu zvK9!plPzMg;0#+mcwsQb(BJFi#5-HqnaFm8Pk2h*S$x;Fd(%D@7K3my8MZ_VCuQ$#F@wwwGTsB$_?0UABV11DOCZ>5PyZe^(=3Fzyfib>JY=rw9dGyem)JBn;-oZeVzGZkhITuSC} zuBun<{@Yt`I~>Jf9==$74wYowYd`U(%<4Gyy(nRwU~@I=bzb~;rI2leogd7aLfv`X zTI8al#d51HbIhb8y5l8fS;+|=|Ig-q42!YT|ve^)Bt?NkU4vw_F zQZcoAh11pRALCnAY|YwPt0{o(Mn!!rBta$xTHyWz4H-(k9cISR+L)%KGa9OMC=FIM z&&u^;uV`gS&~S@iMp5akbwvBegB?cTmqrBmjD>AC>#Yo)hIc;_aWsK!Ob{o2vhdD2 z3Dm{I5}Q*WLvE>AQQDjg=IxQ_4X@@c1}BE~=PchrpnVu=W?eMHQN7osWvuIWyh*P* zLA%=ub)h0}jD#h;Y*wqq%Bw4$U-EiDXaM&e{hS}0JOet!`_3l^6WV3Cd-bbsiB0x; z*;M*_w_3+NQA=~Y40C647Jp%S`0p2hi^_zmy*qwrqo+EtZ>^LC9ATg?KE0Jjcgg}) z91omp!fXxd^3&4+~~e>L6M>=VsM) zdV{A8(k0cu^LvQ_hb*npHTi2a9)_8jcrM_{z&W=vIxf;K)qJ%~B&Q}PVP2GD7BF^}LNvH*rEYrHWth`p==_`$bv{n~gNd0RiQwpo1)JM(9qjEWJ!A;u zJvr#yZfYwo|CVfR)|PwdkBv#19fbRAHC7U8=LXRqHSk`HuG2EuUb2vo;Fv{`UrR?L zus0n`SeQ+%;VAqv?v#|eN6pn}qmOyb*&pf-UzwA{kDAH+XB7j7vYfmy4*=A-ad{Fa0BDr8dVBYZfW=<#fG} zEe6|ijN;ZpeWfcyY==6V<5ijT0igd;AB*9(bFKbOr{rdD9_op^K4giFx4wqYJgJf@ zvnBdwEvf_IwFp;~&125v1-7o^Td7tecLu)Bq|4H&=pU6=V-6!rDRvK9n*5M zUl*6_0!n3j5wvB_x_ow8UV!LyoOu1dL&?VawjR{V#3{^8r|MC&c-d6y6Uh}8{yeLa zm}Z97v^8qBifp@kbLNK-tPculm@sabesx~2))94qtKANPk2W(`c#4J-JLz_*fuMZ% z2Yj1N*so#tqRI^)ckO-~83D8`ZxP<-v^_wS-&`hiV2{ zU6w_n<7&Ii@`U}3o?G3em|)__6lT7oz$EZeolb>ujNl+E8CU0Za&R&{eHaE!R|2+F zS9pux7w~&>Z+53mu65?)He?%x9pBPD^H}7@K?ZJv7Ww5>7=mXAHdkIUHF&XN?_83L6~<`+j?LLy!oYlL>zhlAcJz%)H1>UJ!q6VT>kQ^0c1zkYk$#7}gPcUW^|5cp zVQZ8VO>YFkw~eEFp`!|h|qOvBW6Im-a<NWIP z8}SpawzaSCW(Cw74D9LsGxE7D;MD~x3}2p44v#L^oL)`E;$Z1tHQ5=AkAw8aR&p?N zn`;(jYw+rQ!rM;|D)1%t#Sl8S6Ycfuf1|PAzmAIQ5!#d2A(u;!p4w-Q*a|q;*O#&Z zwLGukn1(X}DiW=e;B`HTQA1YjAzqO_rOA+3d{i3kt%6;fI)Uw}tr)<+FinW86Mo3s z-hc<=3vs8@sCz`^#yo!1o}K!*xi_Sj zB-;Clst#1G(zrCtNiRC}ekJqBo6&~&h56@MgVf?P<%BWx70anR7K+-;dG+sYI{-%9 zp}?N*?hM5nxh$q~41myI8BAJttb2M#WvkXd5Osj;J9`Hx-_S4jg?<0`hac{@weTM< z_Ma=ugo(-fFoA<&U3?6y5-1E2^uy(Fr7)UTW=LIS?1PQpBqgGl84KYmniD3BkS8x_ zlo}h#r3gq-%q2bq6t@SZ`_ab-h7%UE()z2)xd$hHVLO?xFHpiq(Bz*Ok) zi!PbT7_jfIo{zzotFX`K0X^KpPVLb@zA>Jgmu6x}9outaJ_;NTNnXb0d{_((E(k*zUGUfcb+1Zx0|r9q>pMv2=0I0|Z9lel z2rC;E))SB`3>)4uVVUT6m2$HstJef8Xt>tmmR=cg0pPvc&{+4VR%INnmn>|j^h9Q4 z&CSZm_{T@e!+160pO=%cb9RAdfN5ty%w4b>>kG1ZWbFv{TYL zt(}|wIK$_8FtQ%%0DFHygEtYTi&V-uRo&6>H_#euL+G=*uy(J-sJkcCr-(f8so;o; ziOD|^bNIkvl~!oTU3zG7YHU%kkHcX0od~;r%`4vR;h_Z4lb(%Ys~Kz6hwmcBt8U_& z*1Gvfk&j?_PUNR-djFJ6+?pUC(Z9D;nr5!9N^Liki4l?WBhxV5+g7vG*o)h?Ej1?l zq?B!(*HSe6C&wdFAyR+I^IBI2p?{3|)OsYFixv<`r_fo3A!F~mqQ7Cg7Q5OO%ObPV zWZ|$>nwyrWEloE4O%JCV`0w~nNb&HcqE!tve=|cq;HaALJ7K-&yz6Ujg-N$rWY1z* zqRRI($$saa*g2|5eJxlA#z}oa9FS2-mz3`~7q)PAPUM&*=FcIWlM~&$zV6dIkLQg@ z6l_R1kU#tL6hBpHgGGYM7ayGir>)^zkEW8uZ{nBobs@X-KKZQllfhYOKr^VXgEA=d z{YJ`W(%{Wbb@vtLbDP1GfS#=bdFGPR`M-bie<{U@qXT9H=mY!A?M|t5#vCGH90;%G z@AyWgQs2$rYsf(TL{Sg_)SLK{a1X?`cv22u_N;&m{Lqh_nR@rYuBJJ=S@+qIpV&(O zTe$}4kTxM^``VqsZO)T9ly3be)=;Y4|9u(q0PsVZG=CP5p*ZAi6hbNTKWz0suBEVF zBU4Qta^TPS>w^x1LErxe$dqH>L8im-eNcHDeHRTU1>kEy+*sq-#gj!pftkLiH?I$s zw_rBD4P~pUk$WIXGymTEHw>7GM0D-8wJ}!GIPy4!XT5H>L9Hb`D=@h1k8=(^r;S;fc!O)Ugc&c{ z1vRw8{8f%;qOV{OP9U5>TThmDWM>0QO#T$@)TD7>y{?%9g67}&D1L5Ago1LlxL97< zyz{pt}fA}e6!)u1y9gA5&RuW>V=|}DEZOzs$ zq_1$)CX|DPqL-dqw}CZ)?s$i!Z_elX_?6k&uKxcV4rb4bgcNmnq96^jAj5EYP88EH zUHU32BqtRWy$1fF@UHT+w3uPqwy=2ry3GNVa;|>|WI{SX-@xK*7BUJ#z!w<2<8s`W z6BP3+$`i)A)*iU4N+$dvEj}o^E^)4A%INO&SaaV}F$kYsQEni9dmYy{e`m{4nqJp# zf)@slWaT1SE;b~Q`=;81t~((3=~JVh=F3bIf<#5x$W?`IW5-fC5;1&$kwKWmedNmZ z#mbbNlx|GO>*+Q#M#OtKAm%C*+HL?`!JH*OOc*!UAn=q;spN4aeI1krQ;O;-=b%#I zK1YWQ^9V}?N2N&|1bh76&U!57YuDcxQ5qQKfAR#vG|boc$II2_50l&Y-qj2f;UcYo zBfg6~!C#0*HseL%sJWC+ZJuwqinbDngztNLkiH;^1?yjzw&hjxfn!`FT}OMai|)@` zznB>0TE`!pHCPmK!T7)S@3;BzzA%d4UzmrU)_j!tQI*dZ{Hhjitvvz z6)_z7bghKL=$i%+iRJVuVA57!SHdK)$4X5*W(^iJ?b8&cQWWi&=|i*9tvErVvvL<_ zxHOIdYfGaKC`N*N#0CbUwR+s&QP#w^ct4Se`2~O2oubOIL$w_$;`Oy=KM0-OaKV3W z`6fwz1d7ZEoAaqNv1%w%C_@mY?&)e&#ttPD`nL0BY_6w?C?1yiW0MAl=|TC>AticQx4>Z-=;B5vYP2f(ujf=-a^aMdI^ zT!-E1R{xyCz2HlA&fJ;C*JHl#r-zmc>oF_q2?oNAfiT-rYb!aw&X$H&f`r*roHbys zE`_6R80I)`8Aa;0Fi`GMxS_X{o8FTErbQ&=4kI6$sRX&9-FEwiK?U1e)mKeRv@K{A zMp(l=wA+^-wH7I`k_RkZD54}aQWW+0a%1Nrymq?iTB*4qwUTfFJ-E5jW;7Wy`mw;v zt*W{}A=t4H1*EoAqBrkJv~@;4uA<=Np!lwB>-!;;4kp3T!@uiPP-&j+nH(^_1n1SR z-YR%)?KV~1tw+W^76@xs!`MX!BeG6}%-Pj{MxAXb! zT=*$BAZ)SCR$0h@|)|U`JY~k%s=1@JQ-p}pp0|&-|T>sAb(}d^7K9f z`uf{|mw76fg}18{D5!z*q7LkJgHa!2Wuu_=fm>AOQcE~thPEa7)@0c`k>LkcF zDKK2K0>tVHxm2>Vw8;6dAB3-$Wbr<8FprtCh-j*DE9EFtoZ{MdH3Jgq&wY=Gn`tyP zMwZHf{1Kp;77@vS`JNRuD>#V}tYtsKjr0a6aE-4FZRcn~){^VoE9pJ3N$cmXgg~$ zLW$fcuXY=3Jz>CIU4?$jwlZMVjE6@A4wa3))PX!08bayyR9&1K+VZGyG{aom z+o~e#sU^v`sH`@4gz?OAX}tpfrfyc8==Hk10ed2c+h`)v-*@hM>pP{4%2I*lY2wuY z{TgSij2MsQ5gzx3qzy(+tEqK{1PGS1Z(Jk^UUv9g4Yr78>*z#rfX#gfVX+Vcs1M)x z9>2+Ky6A&P2veVMTvWd_<89`Kmv&NhBDJ%oHGbfR=oo1Z2ZmwT6d7!6eS%xiHi4c(@lHEHejVQOfV?8y+d zxv>tDvU3T|3c&K%#q97OoR)gZVq#d*kA{J7HG8jmm8Ni}!_!ziYkoMV?#D)by)-F_ z-P3Y$t_h0`l5eZ$OqbXDq!bcgz++|wlNHm8CV*M80mmcUH34wE+0SUX=EVxzUt?e}8int#Vxh7{*P>m!%n)z|7> zqt_6+dfaneRzx?m#)zFcIUyteuBPk-xgk6vW}Fv9HPne9wh#JX z$)Y{u+e9@)4~gz-Ap_E|PX2d(LlDJyGdVzowQUYk?r#DtVB;gOe_I--?l}^~2;=iwujrFknN*B)Bm^`jPSfBD)5 zzhfD?mcR<YmT;{c>r&kM08|Jo~9{1svrzMgW;_&n66~&mORt zWEU6&+e+pfU8-v6N-@$4Litz~<rT(PBR#(yV`Cx~B{9P2q zwc0BuS&k&pQ{QNTanDpRoqDO-K4a@LL}Y(f&BqugfpH64^jujN0DXpy2U zg461zUOBe9lH*f5sOVnZE?dv}#8yj3PXa|O7t18Dv2#&*B>dsL1*b>er1^zoA*{BG(w{40BO<|4K( zs~k6ecZ;68|9Va~+xo95uEXL23_nr5O!g~PjjnY}y>+0UT#><6Up-|8Fo8!$JCt^w zZf$nRE|hpOOj71ahxy9YbG)SQ!#QE*!S!TrR0tbdv2>H#JNTg#vxcn1dm$QivpZvA z!$pj8mB+cQ)^|cxHSs|t1+id<^&yLKx@kFA=t*jiQo`E0+VF0nmXi7mj& zhVf*%bE@*^jyp$J>SVUed7q3%&K)0Ia>`ngXx<5B)1$Sb=eMe&J$!(dxznMAEaf5JqZULTMOcKNN;F^LJZtyT z3P>Yb@V&sQe(cTzyLWWGl-dXVP2$Sb3&}@MXk@Z?Z+(AklBxSRcP|HCR%7&G_gho7 z#o`A5i$!1fI9{g%=-dUDQ{1nHS%RA1>`FE#2SZ&^Ek1(p=BG=Yr`B6{W&G9uGZ`JD za(yfP?UZVz?@8cwy8|i-T#g^?Gh8AJ40kS24n!hz<<;bi#XJ^Ttsll(W4@h3xQ9(oXGH6}vrh>d*O2@?` zwEx*e_zZb`Vap>Y3Gd5B535Fe07W{7x2vPoPejl8$}g8Z2pV!;YzVekg$|NB*Nbj& ziB-kDqV6z?Pr|IG;|puE99mahq)#3$Q+Ky`x&At|<6>lTJal!s!@R)~3avxTjq|Kw zyNi@5s$*7J8UX};=r7heJKKFgwvv!3nKy~%?SrDI_^_+kduz_N`WqTnAnrvwh`zEg zbge-?Y)IAWkGapa_F3uV?!e1+@ixPY!x?r2I1SKq?4@#K7BsxNL@LGmsrs;qQ=8pX^90)CZ|O6?%r{5W?7-Aq}I&q!9$r^;>3jVv|*U)=P*U1@v)bwQp1!u zx0a+Z*E${4IY-x^Z;oM;bIt8GI{%cy>R2vew-*XswIp(D3#Q#h32BAdSqt6zboD@g zzHT^s16$^esvX~B`X-19qm_ob9(w4nobTqdIJEm?CG$pGYw)LZ&mXv118$QKm<1mh zdXnShWZ6UyYqA4u5440Mhs*j0q?ZVpyO0T^vqA5j5Wr~2uu3PducEkNaA0)`MaH}U z5w;r6wx-;1PbraeDlRf%6vsbk?K(oec(!O7)=l)DUi$$sC>rro&;mwXwT_|7)RsNz zDNx8+r#G6#Sr#BayIY)XC^Cw$ZDmk=$#VsBfwiEHWSLvCb$^c@r>sYI03v(Of|qar zKt&Jsf*c|~%*Zbm6fWu;y0`MC5JL8*ld!7K>otEUSX>YdvK^npt>b;eMQRj!x9mc1 zsmHGw5T9e3`kfh%-y4v&R~p z(o^D0OX}G>ZrSNgmBCCnyU&wjvGY_BvY;;SR^(T{!8P&sp)?FYk_6arHS6~7F+rGM zkbs{STN{WXY1?goQbl1+5l|;kEMO10*-Mt5w=r!b#M&*NKl#i+B3illqf3uUj|>k0 zG9a`v?F?pvpInBn=dog?xUP$5$Hm5tcs7s@-U;7R5_gjfL55qX>xYE)`w6(G)1^Hk7^ z^j2dw6v{V^sWe>3K{`OQ?epUZONSHK7L;EmVN*x}XVLcXd&O|t2lptXG22tJ6-FHU!Mct->W-q@Q1-o2X91Y$owD-Ysrc5D&T z_mQzCtNo>>CV36TYL0V0M;X0$JbYj1n5eJ=^YubH2SKF%1n3RUva@!7MId!+HtAn` zS>j+`P6VUA%u|&Kj*P3L&ZViq>gs+)bea%qnjKq^fbP!Ck5Q7O;)sGERGeAonU~iZ|oK zR*C-b0ZqulDkha4?XwG)ul~fLuu{Pw-U5dZJP40swVnvd%8;&strryxb6-x|MNxT( zS$Q@;Kir}Wt<8v)D6}9oHO`cJDwzH$s5g3AfHsIQ%M zqx-Bp@eO&+r~7;!ei0LOlbwQ633Hx}iq-C7%(O+}>f%ETvR{DVGxz&9j}Cee0ww4> z%QJhQ4JoIyEG-`};dgZl2Vp|4$IU0qs#v^Rp5Q_#MS)Qt14>UwM;0oDsr3Su%DOBj z{lvgcEAsCyOH~5?Q_=Y3-xu_9HF& zfLZWTi^0p%0RLm4uHIuW9a4xa%wm%vWB~-~d%T4(Hx}wI48e3VTFA~>j^`YZf%51q zKa*hj{^h?9z$EC~@91Dpo2Ke>L&%Z@P!9k{M9M_pKbT>RLES*LwvYtvoxbVt1WI6{ zt1-I-7qyrwPsps02w+I(=Lr@bc%BLQ|EO|8i={jYJE{cuv#u!Cnwlz7nbO}D>%1#f@FzE9`eI#MJ9s+rL~gh z$5{c2@ZqWg(LC6X5djgOLpy;5Yccan1#zCw=z&naKC%) z0zP87$lE?c|~Yw4Y1@ev4Zcu<1&c3(&QZCJVZ&0?9Sf@|VXOgiMmAjFnyb1BVGc5_t$gVpvv`0UPBI2WM zlHLT*)#d?lqMNC&yno;*f=znl5^TCF+BAUg@Sc+x8w%iWcL+s(QTTFl-#|vF4<9f?3xMT%J0Ft?dHbTeurd0!fDC6{XgH#QKjDQZUvK#@>8L_mGyAFLpr7W zz~%nz!?j>n?aaTS^56do2p2xEA8HGNs{SrS{dGVgNMf$^(9`{+w^zqlj(fPPI*Kw} zn|eh@C3_zmQTDzUMwde{x}?7F9K<( zEAhY#T6pZu&RT#P@ZrhJVIx_T>dEY)ApIeS%q7iWeAArw347~*&%o=RI})AJa0mow zbz30VU%LHE){tqK11Iy*Lc!&oW!GTS0sc7UM1Fmdth4so+kl)%M)gJ~2BR?GH-!uQBxR-0t@_B`2ZtwTJA2~I)v($8$&}3LaWpVst2KVGW>8(!8bq5G-V3`( zF6l;)`&M((KY)eRy{|&%8qDM-SZH30+xIUpdn)5iTteZ3XUU{7lyANyXk$cFcZo+6 zdJThpcYERotq^J^4r{ws_X41HDoC3O>Q+^4#M(j=c{#acdfAX1FLb3_9NuagH4km> zX5`ZI5!tXfT3OFVH7SHY&3l7o2UM(aUp6NPDLRD3l)T0^*B?lHxA<4zg1B8`<#tCf z&+pRRhu=Xa5XFDU5>BXpJ^z2)@8snF+z#{)03~w(qfHMm*m2*9kkR&0;!66gw`)xE zwe%QHGwgYgtlj>OcN*gXsJkW~o2B=WdUvo;h&>8&&+iToZB3?xg@tLnm&PwV?&&ye zvh>Ia#6+70-z?f%FDSKgmM_~03;Xmq)uivEeJiecr9+7Ys*DYg&A-a)D8~|9aoUH= zrMbs{egC68Xs7Z-juxp&=&;8&HgS3~#?o8|4x0JbwDX6E3+`D}D5tlcvA|0sEGt=? zTDo;ZATce{x95Co4<B?7!~%viA5p%-mws(7DBwM$G0w_)PGc{ocbBCO^7~ zI^m_8?eL5@)AQ$idIfJ*cW|}dy`Vz3?_DzjK+hn zinSpiu|2zCiTSVEj-^$&UBhTwoS*8TZY*ZFaDeev*cKxqsgmdJ13SmQb7}En zCbs2G1C3zpr<~!$4p7e}PmYkbwti9Ma~di$f~Eeu&yaKxNV;!U9kW8@Z7t)8yw>Sc zxkY=j!Y(!2w?O4^tM(U=U7ExZztYg!8El8QO=Z5=9x>k@%LNd$Zt0}1G?vj+0}!SQ z&H1o~*)QX_eaptjpH&Uy@BBdYtr`7;?ljYd+xfY^&MD$5nji&j8AX?I#zZH8w&D$# z@d|kNx+`6$)Vw(|8{5?r7mxvo*@fD=242CgkX_9pDHj#T2UgHd{4tWj$}I*lxO^|W znzN=G+V+i4iWtV=6;;IfON1bhC5oJ`NvniSn$SbDbDFlke*n+zc8Z=!EJ%EQHKJEZ zImdq>>?>fqgv{LJ=aYSFPzNqJj_rz6B|Vc*u2vcTQo_x?AzL!Q8rH%Zu#@Jebpdk) z$^8?YO_K@ZOPU&!1G?d^Q-*MglyT?OrW)sV$MeV}D`4AtY-P}S)*TNPE<8VRdZ`kI zQ^&cQ*Fq!RLKHpKL@n^;s}x4#J@edNpcNpCS-+5c!!gIaN36o{Klyo+!JqzWn4ek| zfoSS((Ndi&Jdslj9~HYg-Wu?qt^ud2^|EdNl`_nEX|5i_y_7e4uXgS73F-(xlXUMtY`mTM`UU=m=X5QS zK}o{S>iySDT8ctXBzJtfWJ3ep0tt6EY;R^%$Cpj-LTV7>JfhJlCVDk&@Px=m^CEt!txwTb-yezLrFOVeegLtHE< zc>h<648m2f`#fK}l*WnlyDl`|dOt>XW2v+?5<@F}3S=HU|2272(x7G%>XuoPFx08Z zHi#&gmi0mV{SgePO!oVOY2Q)ngT7AfJ>4qzN$RgTNxxU-jfm$Bkr3L_doN65Ywtg+ zFCvk%|2mf{F2^lQ+^&S&ABDBFAZn=pMD~{8H|vBSEfEoF*z)Os8>|l3vJ1{#7^d~} z2i0aHHD*Tv1f5O02M+*$*%JVO>gq_4=zL_UAGMBAa2-o`a!k2($|XFYV>8KLI`DOK zYEa0I%Hy2X?JoXD|EJmQMG-@)xzvoU88m4%klfcFP?PMp*gB{3ON%WV!rNXL3=6|< zHj5&8O*K_-CL$iXAD-?R&$ot;@e4$97LoL^?;Xguwihl%L<9h-VDHg0b(8jb10&}m z2=AaDjvjhHYcia16YcqjE4HC&!1+>UlgnX8W?SL`$R zt6p!BOke2rv9ljQRf*?Y>%R+tNtQ)Lo@bEw~?;e=|AP~8(FpU3*)^=X*d_P zaz%4=8(_&QC2R=Tu0Re7t5X12X?a7I+i_KQPVe&?=BiH`&#>380b(l<@d@D}eu|{@ zD5*nGyNzz=bD=9P$F7!{?|l%g_N=55iqzv(Jh}VOyt6kptWLcBQulFI2*?^^(@Uc?Z5_CuH_)E+bdtvL9f%W{5nw%6vJ{aiL~#%6rIAZ_?`K z&|Ki*x5uxzSFi=^FAHK56KAUpn0uKEj~HVc1I%IunutOp_o2VHjaUJF8>3@jJ1M#N^sEayl-MdE+do^jl7L~@ zbIY?APb*C)IgZ|~+o|*XQLVxWH`pijUG2>gn-NKz$j*u_9jS;iv9ttuRN)M?XJOQ5 z@&7ujoZ0KPnshgL$f}-+w{wJ|u~`mZFuyU1dShPi~>;pX2HI>7Km$@M0(`Am(^kLVTch zXTmM*ZGKA>^V~=#i#(F9HQ8);Q6?!Y+KA|-ie*oq*g}`75re2rUkxqNtN)J7A3f#+ zwA33B|Clu83ME{a>4DEo&F}lj%;=Z;OZF_eH z=$HC>AZ;^-Q-}T^!}HFE>g-oaNQz+2c+xt`Svxb6El7r|H#W%<&LZSv@wl+2vw@)u zojesa^da~0dT7{iSZ$)Ot#fO$E-?j#yISj`rh{*8U6UZLCyw<;L9MhF?8P8*M?2*{ zzo)s?nwrQgr!aN90dSAad(3GwO4}9sx9!s# z7p(j-rzQzJu*!+qi8Cu>%}t@^K$Jposg;!$48In90b&k6rRte;B5VnFFoGH#sy+2j z7Cgw=N4Dcdgcco;`fl=5<-6r*02!I_6kj8td_vuKXR7xDa(D8RW5fDdhcVh}Kr@Yi zE^T9IhQ!bvm@IQ!*I%hKUBc**kJw%D><>&Wukhnv4Xf4eH%P z!%0R7p?xxO_{mag1}FD*Fn^>T!#xlUp%uf`5Z?xCCf85ivw&TNJDbA6gjr_O38x6( zL5=*6)pc$3M^k2!Hno)>_iC+4dhpiBboWkm3Gg2)fYV+TSXQ6qZNyeO4`oMtgwr^G z1)5^jTa(%)%~Km05?9=LbPj`16MIzJ|HYcNd*+~fWN^5Jw8R^eapY@4aD><1QUNL^03#AD8+`LG!+#QY0_&{6a=J5@1O`s37t?wR1_2hq<5lH zLPvTh`p}8=5`jSIA+!jAKoXMN9pCq!^PV&AH^#Z&cgAhnyQG4G^<(oks72W zsRqjmb^zSY$rVn0uT$xpOBK+`?fS9-=-l(J)hZwufG~Nkj=lwaP+KhpXXlGbsy_peVDZR{aG3^T-0LBp>>=29@M4>Ttib~nq9a5x6f3S@V0t_Zw znNP3ok}H2@`z&;VV(TGWz^9)Z6g;t7zb;cKxKO+h9uXA>1l{nM{)kMu5N1riGroQ+ zJif5v%e{?63c0_2XB)7}(Oql_;<3?D`4{Y^3Z>@K!G*RS4hnsI<3r9ojX%d_u}D=% zal+4gJ`V6Ip$>{(F+2JZ-x={~b8E=-mL_1ms-rgDExikcM|aOm)GL6>K;cZxDLv{l zWuv&LIn-Wqbc|}4B)%Zhad+%(4dF=`ZBwEpco*d{g+7p9k3^YRpR3XZmZqasxsn!$k2WgSce17ZC(CO6N?ydU)pXU(Rjf<}v-ocGOb@i9 z(sgf#v%H-9u}5do{>r0zpcp+nnmS|ZN11dX}$H6ELvN5vd$uV)5u9-R*j#_>`RVk9)E%IHui)Y5DuDoZjuA_5ZzPZUFV?idxzu@`^ ze*luV7iq+*p*F4oS?$XcoeqvJ>W95?>60{{97!(|$V!eaR zDZ^`3sKn;}%72w#on_mgn!L07hl0#p!}BLU`UR5mCCB`XUMIzHO9o%Nz6_gmq(;;H7DA+r4;))++heoWPkCAFjS*O%4}g@K*@SJp zOUUoEtaNaH+*NQ2fX4XHjRW`Sbk45Z9OsqZNF@y}eUQ^BT~~vv3Ic&J>}xKk$Hzp= z*9Kf_!Olp5nG4uLm2tM^(rNfB7zgmW6paG4XzQ*0{&{DP?R4puiZ#&ucee()+wL5i zE^0Ali~gw=vNHTuK7J1Lyxkh%RbzLwO7IpF4jcm_Kz9ntWsFQ+ua$ovk{o;lEy~>$ zjYdT|l=Qpy@922c=E1XPIAfo8bS{l8m_bK(BStJcwke4H=8z>yBUt$L*CL8%LBTXJ z8O_yIBMy%nfk~xDB8-_)9A4@tEg}aCRNL{pPBnGMC@O%kD?*>yWzrWo6QU3*-L7Ig z>O>DmE=-|8>;wI ze;JtIlQK=)x}m{S_dEq#rjs|v(2s=A*O+dDOO%qyIptH>Y!Bw#i%yKxItSlO zf3T|~hZ31Vh3izEw1(Lz7Rf@VD70gJEyrf1;v@W=l_FI?Pf{NB`jXB2jAHy2^3APz zR`!5y`UHEU==*CpN-P$o3jq6TE**fvkr7S1BvMp!x`=O7tSrmJ)a_s@-O>Pei;oTA zXe0+v>d0mI`=ND;48SRqySN&c$|!tzjp>~vRXrDRU;(oE)txUd7`+te*|(b|MlY6; za1kGeHwyNdvhElV>aG)$+{iLr*JT)5b;k%8TWw<>S<$21SyY}PkYkq2Nx?|=_lD<+ z#%dO-z`Av8guy^Br?Ib&&QM}K!5>rZGEx_f5~)d@bkSI%>|Mmq%HT{W+7yG}Uq5dU z>J^|SfO#YHU3bY$&VPNe!8C}-|9q;|_lbBq8-Ua7!1W^(@%=i*RZ+9&*Q@uqDt(=R z1qMSD`)ZU-lFMTEiPB}MdHNdy2z!BRrd{Ve);FIuE>O72oHu5QR!9gpa9dh^oO|HT2R4ee*6Ie=Z? zqHwwIui*Y@9@CwgWj4=}e>0I!vq)r;X9Ub#R$1D*r%Fmf)>FBY*!(qgvR+*u zJ)~89b~(2{5^>V^Q+p%T%YS8XPTq6Yj(-{0kl3E|?~LZ*vG0_2=e`D<;{eX{v#G1A zUDI^h@n?)zVI8Mg^lvP5MMpDPnbbDkC?3{^Gl`i-Dm{>QkF#jqhQ=ZYKF3mk%^|nJ zFvqu17_iZI$5XAoO4T!CXFkTITm9+FVz>VYyN0J`eZK;D)`h5Pf{aX&$ueX0%4PZwhDV>D z90m0=rsX>Kw-zE5D66ge!)OTCz}mr7rx}sZ;RA?#7kCX;%Yg$oy%3)@(*6LFIQi@` zwt1u3XMPU6*D=!JSt>9SY@129)B#4~+mH~m;yk!Av1)%gIe0+_r~5u7bASGsn?TDZ z+g2?M%gw_mNbzg0ExI-ix-*ihiSJJFGO@KMhZ^ovdlg@0rd1! zp`(h}$;O55Ke8|ilEL>3F@XeVhiAtKmg=q?GQ)Fer^fJ4`Q)xI`?jvC#(`E!O8nO5 zJnOlEChh5_wFAV~0*_wfG+ee%N^M`&dJ@mVKVS1 z4nnk<$!!qWsiZ}6GbBGX^^dG#+0H`kqH;rgc{ur6s8RD?f5Y4@4eK>|V8 zw{Nl0JV!bM_M0Ed@JY&bOo^dtKXdo84MP54p$Yz;1FzV9Z}q>I%dAt{B-=b`Tf(-U z^QVum*}H`C_$#-p6~4m;Jsg$mRV-BCA?8d6rVfuO*K7yUq4o#nmLrIDFXN)?M5BXx zQLEL7!ahFgK~G&y*kryT-rcri#qA&#cHWDEYMknQHKqGoTd;il`tgsf;954{PljWDyybsmRJg+*HP%V>ULO4_+i2?Gk*uv@p=;4WP$dGIT{Z%q(vj z3Y?UY{yFt0i?1for-?J0aOH&Kiby4zpY<_CJZYJhET_{b-b$>Y&S%3wLNkNo*9vo* zZ#)jm*9eSGzS`}qJYcx>TnDa5R%XxxWFdBA?izWvpB{SYFjcS4*0!lSqV>&uVVBZQ zp_CcW&Q1)|ux!x~pn5e2PL9fNbtFVe9xfH0k;P@!`)qW+J6gr#`_XD&uWDf!+bEul zhMYh6k+ro~1O0V?kZT8ydG`goQ>eEt^(e4>*RHeZb4V&Yvljg0H~cISpiY?~_7 zochW`tz~VkZST99jF-Y*)}CWofj=ot1E?*&J=t5_beab!vtw8%a5GRH}3UMCEt!t0l%njo&1V` zC?q*o|i$rrCfx6Q&@QsO~ zbd3V>Rtw*LYXk|a|8N6{ms}vXh|+QFEa4S9D)3gq$e!w9=91(!#B0p#do9e*K83CV zCuT{lxRJdUM*BbYj1myR+Tnu?QSFT8vjnL4!lQO**~}s;hf&kzKrrN7%76+WEPv!e zOvc9)mb>I=KBXI^&FPdBaYg#k4zpvC=}aA?Py37>t^_gWj@kpch^0e6Od^5n&3?Hlvg(C>NJGA~;rroWkMVU*XI}s*SkngyS@sBRD+>AH~ zR-)2%u?v6}`y^GrQwV%oGy&45cev0RIg)_ob?XCHXvWU0~ex7eLP*6aeRRzHCL z)y~m~jRDl~Lst?3=qNH6HDX<6faqT6iWOOY_Vu?sOqzqh-~%X?gc~sYw17Uv!pfGT zZu~mSNX-cdpp^8Q4*)z3uL8=B$xezl{F}Sgw>OG{egDMY;+0%7oHin(wYfUhnO+#=U%=sFh7+W3A)J6acWaW;jF-1{ z$c|Z1ED^HZ1B_ezJE}?Dz(^C_ClXxJnh&-tUaf1Wz-b zbyib=6Hnym^%@yd7niAKM_nrjrL2Cgc4Oijss1UMl-g3#VmO~-vhM4`y{CYxb};-6Q|`$W=`|y z!O!`)sAXD#ee!?9jzi%5&gw~Y<|r@1p~G36&=CF@7g*_F&Al}n%kx&TXXU&iY<{dDYQT!2*5ZGnX%@Y zZfAWiCClcq^0PiKZv1Q8P^?#Gd~1vaDAuTAPT7-qSJm6 z-1qh5#J+fFo`RUAv1OIh*Nw<7f5)j|9Z|TH3k6jOEom;Gq@Q0m>MX677@I6y!(0Ta z2)_uNy;qCK(l^2be*FbRw0VU4LH7a(?p)Ca_~^;`*82tdUpWKR#>xRqf@0!yxg)~zC5%Ba zt9LZb^ulMRH(B@f5L9cVIdt+n9mu8Kjf#`;M`VTkgg-7~Pf% zw+#1IWk>%NQ{3u4C1Y}LF0OW?k3UMT-5C|*NR$Mck*EA-@s+60ZdFOXJ#)ds+%I}6 z8dzfA&KUf!l?X?0Wi&$s#!BU65#yPR(uK~wC zNCOrjP*uS9*GGY>7O?xBsr%z~7u3Ez%E?(gvka)^Az%vu&~U&(Ab_KG{|Jl5v(bWg zLpd5{E#M8}HqA1RYn%nRAQKhywZI5NW6PJHFK5LL2MTAw zk7f8=mKc>k{)gc7kTOS%G%)9HPETL<3rB27OBm%6pD2pw5l8HN*pGx?!risYRZ7Qw zkL#QWP3`dz^q+m%#KWSd%PeB3sK<5zC{WU@fSQ|y>28CKl9+9Zd z{kJkVf!fp?zpHY@JOG+=#H!jZe;Z8d*TXBzm;v&B{s9P%KSouSKMV9*no$xMKu$VN ze;bVN*9)88`3BrAV!oF9*MTs>IDMy7CUx-#{tfWa@OJ%@#ljCAL)jfH^SL8)vJx&Tyiw$Z4q(m*SPgb))@C@<8c%b#pW2FZ z)d<;4P@4aOtiLi|-g0He2Lgn&#B}1s85xCtF56umJyN&9firm7T67Sq(#iPKBpllo zTBU~k(@&nFn|T%)Y|#$TzuZ!wOYO#cL73;c2D$$)UyUwzAde|UyK9>M*by#Ps5wCN`mI|@Hgr#?& z11DpCl4{cHpkbJ^+V-o@o?H5eNFq#}kS>vTY*wXQ>hrB0A`If1mZM8>-Rym9P- zE2h{)RN}<%?<(tICY_D$L9}Ma;OXS_4+mJrB_tJa40-@*$6df&5Mp!?+T}pza;5I{ zQCzpiyxu~1%JhX4=#{bAKn={V3Sln~$i3}8yg)QE#L`GimtWWh?i#sQEm~C{P^dd# zi-q>RGlI(6ni@~L7EM*tcd;=Ia5sIlcEn^I*%&rz2g0V8 zB1tf_ZS+hsI<%-jQvhK# zqIk+mg`F=ZIASj+1zu;ro(A6p;vr<@Papmyjr;bwAOgt)|uXWP_}tsMYTk9VH>A;bJTIqW8QxUzD^HZRz2M^ zf1XD;3Qc*;SP~cHJ;q0Ph2UA~PJWpt&dEtS#k48ddhLG3&cn3yW4?-#T*i+WSF?6T zdRm4>5edO95_&7sur{EA(|!Cs0#9>`H$vz z5&gxH$BTyFsy7Ba;<`LWC{P`yKZ4!95Ngq<&F z(?8x-Rc(J3$BY-=px_iU&)vdhM{eeit+~tyGE}zpAu*JU`VupDm&lAP4b?HIVPz|g zzV66VMOBpOl`syMm|h7>M_|M{7{t2P))6)eZKtOabX5+JhBC4n4QM3;Wp8 zYA99FxYVj*?MZkCwic_~e#-uXV1#&Q7)?C)Kq3}o6Ozbgga$wJtYC3ez5p<#zvvq&v3-SfSmfGuu z0w|zm%2lX*WRCaj_v{3nm0YmJz$qjBir;g}p2BKG^D4yfU~_Gf;Y7IZff_gTc|6y> z5hKKp>X#BPKBahx{2jd2ur5W;1{Es_g_bPfKnxyD29HIsjXYIvE!30H-|v?sFtPFK zIi$5O6yv`qm(efC_s5UyevTH3(klL7dW!HW*ZcF9egxymCbpHdDNmxO?$DZY_ppX{ zqbaH<214y|SUWa~@MGll?eEM1LN!B^vS9RJ%Z)Gd&1f~l%0pCIT9zxll7vawSWp`F zXk3-KU%uDG925>8*wv1Gb8a@@!updayP4n+HCJ{CG+L6UF-!rH4Ebby6 zDyJ*jZ>rOgNn=XiM~&1x43U^-bptt6AM%5u&5LW|np`GeDtvUnaAL;B`ehy?t)L-T z2RwFg$e0o8Tj9~8ZJQju=6Pjw>r_J!MpuWpYaBMvp>4CLG|^#BQdlia)+^_mz!m6i z$8?M^%9;d^sHyRPgOilD8g;)vTO!*2c{maoXPB-?^LRnuvrUQ+W$P#Om9KYT-AIBh z8yFK?Kcub)JtioSzPS~Ad1}*0q`>g>NI7(4$;)kIL=xZSuUU>nytK%21vw)9R_Vmj zOFJRCPrzIM?6c8A5b7W^iRMlS(MyOq_G{0{<+ZU20rKLHsbbpZd?st#JN*cI1POLK zlJQWbfznh7vDgZ$1X~z~2{g1M7fsYbhjKMIxQ=vi8AdE03zQjp^-)aPrS5vLbhG|S z@pRz1_c48Ny0lVRSneD(&RW8^@t26$v16#X5HU(oIXov+Q|7XPA8zzPX(fA|xYJkZ zMxvb>8>9bL&287+TLJx;t)59_&UzeOqb7Cy(bDnIJdcvz22on{`Exv%ULtdBLOGOo z-ojq!Clp%S?#bZ$yw<6b_^9uc?AK3kDcP22)4q+;bi1kH3FnFya-w`p zLo18|bt0D4RTL|cIWwUgxy1VpnDY;39Vlu>P~M>T|ug&7asF6wHn3^I@g zua>?7miKD#r^*9guO~idhW;^6b7Q-5wKb^0C$-9pZ%k_D{b{m*L^h@uPqb}%cTIIYzPyT#Y)*Qlmhdmz|qY1m4r0k!+L<6>O=$eq+=O3r>a zI^TEYqpzKV?PLuByk%p(b9{_pE&fN^WQ-=fL6Dwb8x0ZPS@jt$4!sxj5j=%+0iEi2 zYcYj=;3d>>h1eE-HsdToc=Li>ogw3h41A;c@XR`)q)j;qk7w5zOTQYZ(KdnY#*T)B zioh5hcg3h@=*J6lW@8~GG6~R4(9bdJJjH1hNy|M%(2KuDN3c)CcQ%4Q1$jLQ&ZJb7 zr`hmz(SgVbo_2yTcT6Nv8$qu+nPk_z@g(gg3ebr^cf}aO{9hNX(^B zI%RXnH_qBx!)vXdL@TuMG|uCT?KBO7m(c14C_nJbC}tSJ2%BPnXe`gWbGms8`a z^kIJ8~t)dA7$EM6$ieKzD0P8sGpNBmE^;~5A5>hTkTSx5PUHU;x zWE$DOtMU0q-OK}twg?U`H5N5(MNl4b)XodAOYi)Ck#*oj7GJPSG4nQsMX+=JKa~Nz z0vvBV<`w`>KyHS!geU*6Y~M!(Qa>JNuAq3i8;8llS%_m%fFy}bNv7@K$2G13pg=-V zWwd~tn1uz2(;i3~0|wR68gj(KJI_zy09=Lar^XHOGR z@|i6i;>y5L55HjB;h1crK3Q7%W6D|3)8VMd3XK$>%bq2_0S$e5j+i3=$MP)munI{T zn+fAbP0FMW!lky00&@Vw%IRZ@6+Rs3S$}Tz!)g4u*MNiV3IG#zPf6x$tonuQ zykNK<=4}8*tqbt)*!Iy0%2ndYjrkR>)#MC}o5b%<)dz8~I{{7Jpna~XUXT5KnlvH+ z*JJU#rXC%6@YiMQ%a2nfRB!0=f}VRAdRgxckf#eBk3~@L!N?l_=8|?s`$y3iWl)BAg9< z$E`m3XOUM&324*2~`dHesLH_TPFmHIcK9DaI={=2+w(13sK1omZZl>7>EM2eSg$>B>^WUI9c`_d>}S z;ugjy*#56}FR}m^&=za5QJS4C>Xkfwe_{S%7xEJ~>Mj86T)~%eC#{xn9`oLJ3J$#( z!NJ#m&HzYC4fx_*0@6~CKD%Ze(YpV}R-}`Ceb?Opq-+3?O}upZlrSj`2=Z+xnc~Z* z;^^BP0Kg5BtVjHjmzE6@L;&s`z!%q`@rDjMAAc$7mEh>bP6N)m(TEt`LT1(D!!^#0 z>N@BmR@Tq`r}e>yDz?YdtunY7@1p7dz;NH2X0SKk=T#?s8=LI#Oh=g>m>zHp}Z`?WH@1j8N z?%%ExzztS5yoHQ)mG|NsMYTuJDWu><)qoYI*Te?kIc4`k6J1n@`e<=DP(?YuepTT9 z>J*)?$J5pIq@SWL;OY9(UevmHJVN64eE{GoJafqa=nMTn%FK_4|C=MAT6bLS#S#I4 zM&;!lPZbR4e%6%-Jokad)vu!4e!JbX{Hs+K{xN%!g&*$=6j+G@QkZrc>M^8A1d#p* z1w|_cDMsJv1ip}MHcl5b2?<>Q;wI$iE!?+WMbKqhtLJR(og{w_>a_~+$YCzp!V0*L z%`&hvEwUP3nWuPR*S^J@UCB*4egEa%NAHffWpW?Uoe*xz_^AL{HHxuaT%5}EIEQ-} zZcjTBZNM*W8DtKfy98$xuO&X;>9)Jd^r_XEx zerjKPnnTD2`3Ts@CN)9t7DkNnL8w)BK7GU6HrBg|F@4i$)$UKV;k^`}?Jn5N$BS~$ zrEWHz6awCo#mq@ByT7L;j<6KOu)fGo?r+!I__bQ(;yl^V7OL+2+2Huw>9j zRc}7zO*$JtA>>)JWl=AuD)N53ZqdwOaY$a&l3Bqg3~hg2<03pYehSO3$*4QLOw3h= z(SIyCy6Va{>a8Kp6CzOek|uujVzzbMeM1Z`L9SU<8%*x5ewJ?bSD%UF*{&;JUi#^w zr;R|dt;mOMcR#!;<89XE!PY2m9q&$_K5l(4Sv7XKSW;6nC$l#__FT+D9?l8x#yMST-znSw9BEd)tOIp4JsvUGkUh-n7SeMaK5z)IC?k%A?)no>UG8>GFh?Pb$L}S&?qBOx3u}iw8%zHmG|1J(MMrMgmGL}}Z2g?q1c=w!T|&G&-`$69+s0=7!5D5zXs z&xM1{7dhQKcE5f-Ysyxz;M55b&p9PC99oZtz!j6E+kod0vBs_EQ~hnC9QiyVGd)L| z>$)JO)LV_^R&V<^bKH4}K5p(3)_)#hj?UWSCgZ-oj@J1YXLpRS_3^!)MPCYPC3#VI z@6WtJ%CIryCWOE70nsnJQ~ zv4a_|NzuY5x}r}$pBom_p>&E;&uvUD$@IQ5Ll-$ZPrAp$5N_ACAP)yWDyLqUm^OLL zg}CJ0T@&v<)6lV(;8Ie4#ynNd_32Njv)HHIEiYfCOD@wIzr|xKDlWb@@$<5ZHQzR= zf`0-%b7KQs>ispxdNcMzFVLm^PQl>P(XsSkp2XX?{5~dtC!RHNl6yIc_HaM@aOi?% zsWeInsVW@dT7!y@84*H0hCoMAaTEHp5Cqmieb544ObDrU9!RS5qd#DQ=VvBIb zJS=o9ZlXq01oFDsu;l^ zy(jtVtt4g>EGEWD(sDBOEP0Vye9x;yEi0SO3r`&t>5_6c!W{=9J)gUaQ6wBT*!?<< z_s>76Na!g;o#b`)vi@5Ax=5X!;d>Y>HkSS|$UJcd>vGkpVH*7=TGEXflKY|2qL5R?;4ve=v(5(D%H%gbmZOCSks zfnNQi+y*auOMG(~73-~$r~7ANq;jBg3ecRxd^l#f%yiDO{EiGtDWckSM6E$q6<$Klsz|7Q8^@1IWc_CgdSed58pgv{;u`ANT2HCz^9Mt zdMU&Ko;90>oYJJ%=qf6B#Pri~(F|r+emY-Wd&G9o)YMpe;h5=ab21gH^J$fH%rawm zWj|MX{Sms-BCWkL_GWS*;b?oB~?Ro&q z)@Jj--UN%kSg(K-Kc%Gn17eC3DE?CG!CrCAy}bS#9a6uynap{TRda7ogjpG&qaOhM z{3|+dT#@NK&GIRcGZ@x{sW#9rH2mX$x{JQ!C%~KxEH@SC*)^%usifSb*IHRRMMXsx zHmxNkm$aBS@8U;-ja!^LRx7yV3QH}k4RCqCr$5V0wxIH&y%IAO()Y=QIx4X)oU>0jEhF(#N-1R0m#+87$V?x^C=*!gj;u&69v zt`_XW+~BaV{FEQa2Bnya+iF%8erGcePTKkU{Fa|AkPJ2(HM(<7^V(`|V`F7D3v)ll za#NJ@a?!}eCOv^|yOT#+)$Ein^46hN8*u3ec&b(l3e76T{BGJ#5Z~f_^@*#9Z)8ik zwX=ori$cRg`A+a&6qh)@>csDcPoFw9kQ=03bB)Kp{)*WtqgA65wiP)tUwk{5bD5=? zeR4_5az+1hm1@Cg;blD>UDU^iXkgeB9U%lbMYFJsC#jwf^qOmc-tLZ}Y`EVXKbxe; zsqXp+-rqNspbc-R@>?~?&Dpb3}-tj0~}dAiZXaq<7*DpM^zUkwCK zGOwFz|Nbwd-_Gd3C*#Vkf6OtK-qT6H9n)D@9Nrzj?jeDsYR{?A7Zczd$+p)3Eu$jehMbT8!Du zpQ+lW#$uCiaEW6Rt!PvOIUSMe)jOQnl;Wc5uWRwy=ptO*cyH-_e^IpQ0He=La)MHO zusl-b{bj7f;ZgYzk6-o=EB?5x<+g3A9w@#_19O)-mY)m|!=vNf7`rl_s4L{4=R?rW z$&2yF4Wqq3Ei%mIx0 zgTvKdyX}{^?Vb1Yu(1(&p^MwSx8T*ov$i9{{QamtahWxB7X8T8bJ3f?B>J|!^=Xl9 z*&ntXOhR9$lU|3&+Bms?MLY}|=v8VOCRdqK7PU3gd3po!`{g6UOZjlgwt=@vDl)@F z`o%zmj0$5;yuatVfB6ig#By=``E$xWJeBzA!xUzx`i_?xowU@^R{qKUFUOMp`vL)fpq0kqrt5R ziYf=Xv^B?6bwctod|C@TJMN!ZO^k65gKG?fZGEY0N{xBY`On=wQr^$}NhB&+7zy(Y znhwYA)Yj9mR~fW>rm5xYcMSL(_pk8aECr?oa0Z$_2c3(XuLq_?_UK;M&m!uuF#`Ne z>I!+(sfR;L#U=Y&Y~(d0MshfyaEoA*2>NhzDt|K#956`woavQhE(`nGUk0z=P`Q2> zUrAin@Xi-v9Mpt_W={(Iz^1_MuclzOs=M}{4~MdETc>@Un!YOdR6+VjC}m}#&S5e! zcIv%K{*Tz6`pu?DBMMI;1j4r0_5n)UW&~#^}DbUD-FuDcY4#(o>OBOQ3g$! zbe>1cSgP(&5{<>eoZZN5109$pXamlFC*VyW1lCcGNJDca+02Cc@U!O+wApWM*N^V@ zd81f+ZC=qXZW4AHR&aP+3Sqy41i!dBfc;56*V5X!jhT`3=w13Ug&jg_gibu`_o{=H z`jVrpK^YuQ8%R&8RIPAri#zts+7mlQIL>}O2 zy=Pu3^L<>VCoNXpBt+d>zmW&lxYh_k3PWJ`Z|)oqwkWC1+ZTyBn4|Q8&ABPnMwbELKRlOckHr3Jo7MnUNpZ6w0@{H2}HZ8)n$&+azx)(D>u4ZDP}ap1uA@6NPFt&SGux2eLQ1j zD<(N&#G)as>Tw=o59IwkrCKy~d$#t8#}SR{qbUvMQL!M=Do1RMzzKI>nw}!aUzrFH z%Pu5Uc+8w{*ooyKr5?GnhwuB9+CCs8x|Sm7Fr67uXr~C%Rq4LjW^0q(8%$ra5CUIG zPC|=#PtCM1&$MQCbuHq5MHQ;dd0L0p-mh{P(go6sSQWiq>wfB-aj9A~&%2!9+rO3$ z88~SoxTjh@eH`n?dh6QbY)Dm)6^z=nLYzv3^P=AuO;dAQASV1P6u<=u{V6-=-a~KF z_OS)rT&ZePs|$WD0}f|vlPAze51JCceV{pM8W^@L$9Q zqc4Iv&CgkqN*ZqD4MeD4L@uFHro?ol#tC~)2V0`Psz*^I;*}fBtL#nv@10mk^gIP2 z8)vE_x-779rh(GU#voOEkn;yJQN7+m{D`KMH2kRxTcEn2y%fgX+__uF%$i(}%AE?O zX9@UfXT%`3vJN$f1TQ)*!<~8RR{J`ef28uT$~E%PYeSwP=lr|?vcsG<@Yzn1<@QWn z-cUytNJh@iFxH{FU5Cx@YM}ykz->9Cz(;5i3jJSkA%z3LmhNl&c#_TLYZG4{BgGL7}VYHcaBf>>=+Nen$6xV^(}(zo(MZ z;(L?Ohqx&V5BHeEkQcwh(=W_NCVaKRcbDZYW^^eu*S2oo29jxtZz`A- zy%v-0BWk`BT2H7HmZEMDHNUil`Q8Z-(_g0hJ42r#bilmO+L$$UIWTtp*$KHomKE#2oIf(g8IkzRzb40>)6KD{d*$~Uxdt%CToYAYUH z6S6dnYr=cyQHocHN?50ywXJ(-129uB~93p3P)6Z>)S$|zv^e{%G>VG0~=vG zVV&d{+gHSXb^KD}wMzbI?~*!&EUeF@$>z_W{Blb2cUpP$2Bca@{vG9TA7RlX{1$k% zw~ZRNw1T}dO&B8Hs^w2Bq-DkM=w-*iIUp|-u^}f!diJUY+#tmyYd6g1R&#Q!bU^lGB!Pc3FUmxQ99ur|5i# zq3o#3rHhof0nfFuKEe4RO14LYWCKns|dBB&lI3>Bn_BcbrA{9i#MOI=NJ?(VE4z!uWETA{r` z@Xhpq-s!TuGcE=SW$d~7f3yoQuw>i!2mqhrlxx+jdD!V>x?@SIs(lZWu(E8d-|CAE zo#JKXFlfq}9wJL?hRsMkToyU!^VI*f-HV_gof<1LFO8&V1=?ft{TAzy(7hw-7)1Sk z;F5e!K;QzNqjsw9^t+RYDx_cq&&QayL(AwAW%F*+8SvTnK7?{HAA5P#e9^^W(F`Mr zEXbU^5J&Y`d)msc_0bSdP30_Ynw=rfR0KLba3cA}A*J!ryJj|^Dr@y+hw+-kU$4nB z$EngEbj%sUdR&RxNM-Rx)mq+$rG2rhs@PKth2MP@Xd^GJg-WjtSwY7i`L@0a;LRn+ z>XH1kP*DvR`X~o42lOT@xwBMHcRsX5e_6U?8mjIB;}XC%>0aZw2F`n3`(XHjmBp*i zN$0f126Bgpo`q|ciZz}c*d&f0(Ezkcx9H@7cqM< ze!&US{2^JH+>h|2=l;Hu5=>6s^1EeZ=n*j*%i8@lbR>19o46c5>v-MeM$OG#w(mZi z?X)WHNKC!uW;Fj0+z;9mA%+~su+$Gi0wqw@NlJk;t+{e=co+X^Ud+oQcj|0CBrqBx zE6H<+8j#a#r`{DEiWdJzs{m;7J%My3vlRDiMt<$6@Z%I(9^{WacuF=91gx|8yHfE{iIA z_~PaQVQ_n;#_k$v))#9ago^VI^Rn{4k7O4Cbsb{4+W^2g&-{8`Et@y|6sW$@GwznL z6T7EjgH!Udc7H4POOf^kDjO>oqD1rTnmq=w!`JPf4{-Pj`J65#_w75Vcp1KpiX&SN zEgdJ*E%5~}OiLbdJ1PuKb5Ho=%iQKFhBjez^^N|c4fSFk zC$#-LZbd@QSPo{)UCwL>5j>Cu-6o`rEIpOqZjis17U(&}oAF42Zz@Wnoi^5vg4jc6N zgaLJbFkA6ri^}DkK#}#VX58F%OrTe_fwDnbeo}^Zwk?a)jgv%%s(H`Us{cR? zR!ZS(Z08+r2u!sI2RJ4oV8?xEe77QL&#t{ldyd~7gZ>a1<}4m0SI3ou0n>WT6~*A* zl#*>KB;>-xu6Xc`){UpaSn3dq^mcZI-6&b2M{ar?6??mr_rQky%vOaCb~N#BnwpHH@}+d$V$hyJWqH?a zk=;j|F$XtG6>WB^GCT_MbhFtoaZwPu;qXC%iFxJ){9xcuKOIeKnUf=pqC+ouyrFy> z-dcy&Ey6x$l-m;CTPV=rLV4CwXTg==FScSfe2LV;8-aT*Z$8-63v;b2>h{!TSpvzf znwGj}HXzh|TWau@e82*}tx4sMz zx8K4sP$N6v_M>BPLUmUj-p=@c?B3T%iM{fD%eju0R!cuRo>skhXQYSxZ=L&hr)Mvo zY$OqB{&(h`OSyfErPbRX{;T_`cro?5{<75gl>5D>lB(C&RMdcwHjz1II*R8I*Tzm5E{uf2ty??Lv+?jCg`t-xB#%CX! ze{;KLtJ%Bq_p&{#Pt|@`lpjBJ_|sC8&z$=`doKFzK9*E)|s&rbRL(+B!zELWd4dR>`vTKVTAFO~X5t2%0< zR;+nl`J?%Y49B;LS|M$lzlHjz|0_Ke6JNacSLN^GN}aqo$xpKL&piC=KkLoy$xUz1 z#MSF&E7wUz9xux;yuLLdY-6p&-g!^0)^p80KeO;)?V2cHyXVykV825v+j8DqVBK!; z-;n>4^TxM&TkOy7>y|ygcG8_+i?8Su);*Pu+x~s2Pn>MQ?*~g|syB!218!T~)Ytm6 zIwa%fv;2p##rNWWrv(Y6&HC1pz5m~nePXkI%Js@|7w;+m+GRK6{;5DUqov>d1MWpF zt}l+i;&pq^j{Peq*8aZB`m@>o{8yRp$JZXPT$gHCw--3dHJf$4iTlh?XRpMkUoN`u zo7Nd7dh2Uu&ULNbS_}ugL@U<>Vz4hs-Fc7Rymnr%GX4~~uF+Yz$!BhcIH*bN@+Inx<-v&Y7pUJH*^IvkC zjbVyJt=rB0$5Q&Tfuo-eM}gxfEH4xeiA!HF_TKr!3 zfP-kimzPw7oFQjuXt>ec$H(W5ps=ub9|dDrV9W82o?U-r`v=WN;c8GIARKphy;sBf za7ZHz+!6y3v9+KNHtm~lfH*dh6*rMON65j4Bvr!vH^ zck@C4yMAp6x&R+Zya;Cn!rg(*cDX5g4*q!;-~IiAurONihC0P)!nIf{{pT0Dc(i)j Sp_mt-aPxHab6Mw<&;$S|7g-bl literal 0 HcmV?d00001 diff --git a/assets/chatui02.png b/assets/chatui02.png new file mode 100755 index 0000000000000000000000000000000000000000..775f8c52b94f32267be0ba76595d72971b78d4a3 GIT binary patch literal 72209 zcmeFY^;=X?)HaNwz=-5XNlQu!(kUPfA|T+FY(D;{~!(X+hX@r-^x-M&}3JzH+}csp)+nNycN*HTF<6~y*J z5{otvUoF-#9|ZiDlmjL45Ii((ev=3PM+5HvZTP?P_+RQU81$U1^50w_+XPOLfACj*Ixzl z5V?!Ja`_<-F5q{U1b*_nFTbksmj^@W-Lq%|t3SE>()AdQzjy2aW6_$*2f^MwPU4AC zau1*hj9x#cXfOpjqF)7l!SRMhZpdGLf>5r`cFDA?;+g{t2;bi!mRMX`FS|dJYvbK;Gh*FA>?tFUI;LCbqttQ zYMrQ3>ceNrf>#>~_x|vf%Vi`bqa>CEn!^uq8v^V2C_m zbc`MH7YS*na;BZdv6q;QjbO*tzNt4k4i=Rskmlo~{bt zynW!}ZAS3cYS)M8!NJaMkl5ss+dW<7+RhrcVM}Xio>|xXVq0L6y*xeq_xsSiLAMpZ zBZVI{nV!4xC)`%k)-XU8H@PnRnvR!n{y|M0>sNU9NAhz=t0sgY69G@-_oOI|@x%C( z2^a;Hu^E;B#^qfESD&f1m`-VECUBV;ybYP)fw7z3B);7=g97fXfBlL4Kgv3NCSmKQ zT#=JWr(}9sNWekM{n|rM$(Eg)M(dN#fFiw{gr&5;zI$}Ov#b$>OZA&?8$6uR_72X& z8AZ+s&11cI$tpXRZwKnG!o)A1WK{%kbT{H2yOBi>|Ci3?iG$N5MM` zoOja`RaI45etox)gW92sWC79p&LPVc`@cQfvT`=S9MRZbXW-uyR|h<&_@}jee{Awg zpy8N!$?M3Eo->?`i9CXe!>X*I5%C3aO=2);rgg7eyMjVXV-vIA)N~vUF&{*j=nqiz z6qc4w>wL$g@(uXFye&98%^zt9FsOhbPZE)QM)bEH61wpw8eyvwCu1f`BIfH zIg}~KL4Ey7Yo)1EY})El{RVhX|JJQEFavupOkrc`4Vb_$2|4^14igv zp)6h0bT-;wDC_LZ$s}vVI#~qH-r~b&`psRh%;+Kwqr>E_7DqlM*Z$}mePGnIs zFmZoe_`*0$fC`ScooXj2qEa7zR3>+Jd6gzHhF!6ng za?f%^nC|j*_NoMQcGF9E&!hulX8*?W;+HOrWs7LXHy9)Cw|uP+YIcEdU$?*}ui*$)XF0{4{lS zdL)}sAZR70i-P(}0X2-qsBs&Jj>xY3x>A~E8GdY-!=#eJ=AE*YxL<;Z=36v$&#Si4 zK@yY8wB6b7*=_xix=ef;AMd$f4{PzIYr5ZigR*#<9*$)}!KmUZrSE=1H0M-D8Fp9v z?OyOP@~l0~q<3KRwOxJk_M=hG2y|#(Pj@Ub=y0(CYTdksRm0VJHds`)&%Xy4Al*i{ z{ldFsW_Du+)nt zlg%G}N6;Zo5k-tM9WB|E4}u7hU%U4p)7V-cSGvpWg9e)9~KnQKK0O>GB{sjD`(4DZv1 zd6&%mx!@(&c}?59eTK@pUhQO3ktVhWBoHY}Z(PO-~DWFC60sy5u*=s4>Ww5vQ{ zL;zl|R`|ZeDZ6&r+oVMl!>fJ{*L^jnZ!5!Xsc3*WvPU+I2znw?~0h|FNu^u!)l{j1ylbPpsDKNgQs@&O8%R(!L| zw5ga2N}fbR`hsIqlS!>(e?cR~br+W0_tUtoHz=JQFB+xz;@^%mzB4(7qNUh2`_CKR zmhdOGxGt!+-9%BxAMjf{L2{4e=A0Kq&C+T#`75$@O`o^UxenSzjSDJY$v^JS!l|w4 z{f1+9-{q^bAx@P3*1QzCyQpZ^{*&FT3>Ech&N$eox{aNGL9UiNgc9)B2@jg}&c{dX zkFvP($ppx@21mQB?2Zq7*P}pnj3Y?@?sEBrU-0XSC~v`|e_lte&>Os~bE=oH4*qSx z(7B!Fzx^=j_wxC%R6M_~OD2U!|J`_=%Y~_J_{Sww)c)4L@4iGeT@(!7w?|n{es+W{ zV|%PJmxoKHP#oDE&dfF;qgaJDmzh|n5fOO~u_Gna^;5tvu}3%@+J(+U=gG}Q(@MGL z?EUI>2j9+yfCZz10IY@|>$i&5s9*|hz3^WfDz=2pR^}w5!=w;+M=uiL{1A?GrT`u~C<{s;iAv7piW2%vAq$M>Ybj3zOg)7ixS_d5Q4 zs`T`2{7-<%^q^ILv06g_>@g5DNO{e?#5>l29BD8FwH*`~EwhMa16;*@C2U+wmOv z5lioKgZOw2HNfj*7m+Bqh*GcuTgbX)W~Qd8&sf63lA}@d?DgBNyv}xi^fWV$e+(C_ zqhd3C2up>&yw?xozp-G+DJ;|`X8RLYxX0qAX34O%{iH`+3_!YQuwhGtL}_KEn)swf zoR-}FbV*KD)`?v!uwC=V7Ze3r+oJ|FPO!fdP?bJhc&^O?pCtk>3V-}l`!yDfAvME3 zJs<)>JYoQ;rS?ea6`9~jIx1ks3&fZv*kj2ssr8T7pk;?;EwrkCLi zHE%N(1q$C|S=g>N=fXHm_=CF(1a(QYUnwQarJYF4lkPDBP(NHe>O_-r;Gu7@&~8(u z+I+1ATm;}Q%^lazVZ5m;SRkYEPnZHCEH~^8d?h-F1Pa=mo^J@U5i5Z<_T6Kl#6W;L zOr)+v&$=Ot(U^R|lG-l?%GQw6i=$&XzDDEkiOG=m48Fktxzm0n(6%Nb`x78GZ}Yjt z1dPb-~@V-Q+xvBPlaKf0Lm8j+!ren+zz5; zCrVwp5`Qa?_5Zui|D&4!&(U~=19%q1=B2p-{lZ~moA=ci)5t<)L1}5ir8x34;H5W2 zo|K6$qq}piib`P7DpN8l2eLiR@DbBE!jMBwDE_nj3_tnWStII()p0Ac<>u>= z1?0Q+U|93qg_mN6i|n7)yF9*D;)S5}K%`xoHkI(jn?wo7%~#tpkm#Lg30`)x zZY3IgX<&}2DEkm?*M|CVvFyVn_<#|~P3Tk`wu5xYw@4DW#xLjhp^pkvn{u_PAsz3^CK(I)I@V1PBz}1htoo>4PhJcO12LF?p$`t9r|RskJLBSJPtjf zfpaZ=8ZCWqU|h^wl1Qp`54N@gZc7C9Cvc@{S562Is8hb9Tm`@$X^NFIcd~mWZ*;HH zJ?t zi#=NUWvJC@-0dqvog@|T`lZ+H65c=0;YX6g0Z*|v+wjEG=X!55CEO3hPzTs8ID7yj z^L&1Hesl}2>k_+PS8O=gYh$Q&+tuKIF)TlbzKyvy+j4bURJk;rGW2YSbKf%7FbB4b zNfq74Zu^$j$|+*P@V{A1Rp8K5c`6TrjCnQm3UXpeUvzAQ98u?cIoo;CZ~YG zZjE+M5P7BU^5J72F6E|MA0>ydb=8kU^4QY(028-f2#Lb&g^gL1UL8MZ4(~Kku{6e9 zxpMb<^!xn;tmT|IWyj&=Y+IS`ZQa7Akki4b(pOA(%IH7E!lvpC*R_5F)GERfxPpcm z-OSg8}Ns6MXktQXvTjR!qx>4Kl26fpbCVDqsp9TAO zZ7#!HR4mXuC1_dO^@s=jJt_p6KM<51B%cwl>AZ}*dha~tn{7H_b2)jrQInX)R8%(K zCyg(e@E(YCGWK9n`@WK;s>~Sq-hk|^+WhTvIc{eydNPddxO?pv2aXi{RN8Q++p^P! z+S`Euy~SJ|rCyY2R|Xx1Zc?w$v#V z;5gHQw}bWK;@Y52FMmN~W79-qzQ$_*72>;z+b{v0yayB4v0Ev48^StHlk zFHR4nbf?xnbp*BXI@y_~qwA*<)lErL0;6uzQDsXcM%rCIVr%8zgPAMqhsj%GAiBC0 zNN4r{RqK|0Reb$Mq)&4^C?}xhT!feVEv{n?Qb|ldPz#P#5j-7Uh>}a)Pkf?YXVD$# zw?ly;>gS@X9zLg^C~04srR$`J+*k;?9nyOxcY~oz!6~qlDB+|3h!R}LV?i8^#wULs z?+^Vb7XK=`dnEJajblacUeTWjf;@r(8! zc+aV3Rlw-phCHo3lb@(&-PQ1U~n%nX? z)&S&V*OjX>P$$Ogz%LLmoXNxKqFO*KJCT(3)UkbysUPpH-{b`vxGl!sDC>;w5~bcI z;of03Ggv>P??&*dM5itxt#(BrQV>bga2VRw-*tH>C5y7h#$6G5KIi_OfV-Ps%=by5 z)+klwt_clhy3j+RfHqTBB2UxS6bM5f0sYRn4sMYs4d53=PK z@0TvL8n$kR&NSH~$^ERtc$nPN3Je}JSGvwt?>rzSPj8`itZu~K0fz~^5fBnnuqK5$ zL^QKZpIx`85s8G837tN+f3#@Yr7Bc;cEXggyGR$V^!(*Nm78K~yQPC>)bs-z;IR=) z_#P>s%C}-<*RXMa2dy{bh5xN{d3@DN9{H1yyL@=eGfi7hAxfq19dSBPI`v2I11d`K8jUl}K;R98~_7C5>g`}0lg zrrG{qer}NIg_!I9yfj#xo<#s)Go95H@-$qQKkep!n6_+F3OU;+m~9aysvLlG%tsHk zgJCu>E zc)XLvj9%1rpQ{mqfk;G$g*&mPtiX)>?8L(o&)QHy1i32*ZFM53<0VHJ`%HgfQ8Am} z6{6>K2R!;!G&Vm!-GeI`jVgek;4dFp3F<@o#TREQTzOsPx}Yy3NG>jaLA)ySQCCg) zJ2j_Uy~}~S+XD{2mQ!!RgH?l}pGMzv742{TMd2IK4-B<>9Whc0SGy4aq1V4sw-@l# zc+*4O@W7tBi{k;(5WHhLd%4g~pMc_ZLjC3-@{}18I*B8{tC;2FF6Q-rwCHCwWq8p! zoP+G@03QNitHs4C`B&22_NR_>%R~KkxU)KbDm#eXx0|w;IBepy6xjq(;^)n5bqCz4 zr{8n7hRRx}!+1Hufsh@GEVbviX`_)BS9|+*XZ)T>ia$J7B^8CsxRS;J9>V1gI_0D- zUYzVcGkWUje!D~-dktJS>?J%yzetY<@-t+`DMDi6%neA3Tx}#CgN`X!7v+|kj(q-P zTY0pLDRKAFENTr~cmhN`*Q5`Z+w`N~`5JlfQS+t5dM%pmFR?F?!nTmori+&&R%HCX zS<6D_-p6B1LR{1pR>>R5!RMHG(PoJdZKl*VA>UZWfxBs28;^|=w{y|04n(`yIv=C{ z+h$i_tHHol&wU5A10W*FR~dDaVwWgxVE-c$e{DTn$E{rjE0`LgW-p9WH-)y{q3hLU zbGo2DI(~8GI{PD0FFyor{hjAy_m|?dWQa_AhmzfC{0s9kJ-2^80w>i~X7^HlMGHM` z5Pw5k0$X9)>&7F0S(la4=EE<7CudGM@K;LK7*@&j#@Ab(LpE+mojb>YDWSHV(0yCS z8s4hkm1-Pq7kS;ShZp-@o6EPX^k4C35ezaHg{#*jUg&E=C>q?ld z@D8)S+>u#AEP|s|0j8D)#CmTK_;MroP4g_*ttHz~<6%KBkHsfGB@W?;=90;+e8N z+l3b^p6(l10}%cdq#+R(z=A z7M}L&_m?%OG4d0d48h4;!mz>PbId(EA=>NCI}@tuvWrEC0c-OYKxYykvhenns8?z; zb_|;~1}q$U{1)T%=Ip+{zj$SZT&%@18qK}WBU0UO=EifK%p0zn-#XnV^S4?A+a>F{ z-es7ku=4|%oc`E%QW-D|T#6kw#R5D757@z!i>-m)LLI%p`1=H`XkFu0Y~ORQ$z$3E zFr)aRX1|r1>;)Bos3#~1Jg~~~$urqxdn@?#!qK#cN`21{gPqpZgNa<(C|eF74iVZF z!x+i)!#*dZ`s=ZB*$k7)gnU2dQC*_0^Wo92H$a>K0u1yY{WNa_DHl1LH94?svLrD0 zLvAb<9bj=lvWr#_NbCM*!-z#8dg?^CpfY=W1NM7cZ&!a2#Fu#QTSTl|=+0Dw5}!_; zB<@OeO|?!nwILJxlM##c)I9u1pl?DNBcW|-#OBHdOJaW@Xw_rmdP88<9q9g{bNO!s zd2X$2NX$;fR|2>KRDt+F@)-DNHGv{z?Sv-K{Xc%O71V8pc5F!e2cdnK8IXo!l-vuk zu=8VfyMuumP|JN=T`Kv)b`vIhI@CPZKE)wL7gns78UH>6ofTxLliplkR=_v^DPL&_ zO3FC^zE1|7)YO|^*oIddciu}l-q)(^ zRg{QmEhO^W>Sg6I6p|+F5>2(#Bf@0{~?U`cSFBRH$fpsL?D3rKnu+gvo`HMF!p$o{7CJJe7vzA39p zPRJk^R^=lTl+*V>;aJR+mJB0L%&&K0knvh&@;Ohak?P9qvSQhX_`<@%_h~Cpr2soS zBU>5t%v*^P&!Dv`t_-lH>%JT_?^!;b;$PW!Ix;5AiIBmK%NV}9x$|G?4H#6|$jCYe zKuWC3%nlv;ZAoUfnmb3DaLtVGe6MUy^LJw6qkx5Z=Lf`ZEWkm7c?5DR(tz`i=SCCtEYg%_R+c&+=N!?wBcSPnrca zSA6fYt1UHXMH^jIy}lcv&S&#Z20miRczC+9KvA|y`y#~jihi@X5#e5K+8o~Qt&v#D zw~O!1AZ6QBmQ90q{iO z<~Hn^pD|jtrRhMRxa|OS-Sf3Rsqs}{9sJ`JQnvwtAb{tpaa&zb{1SF+bv~Mgm~;$h z_{G%P=nM^K_>P^DylstfwOM%YLxT$f$rLxb?*0jR@y_u_Hx?*>`mbc=5Bev=X?5?w z_>(0qy@PwKPh*QDP}6ZZ!FbWv*eqs?`*wqsLuauOGSB~Qnr{(#4)LYnB>wm!K{z>m zu)(olO_LE%LC>d!; zIM{)jiHneZ9w&I%0DBrTrqD9TL#FUO5H?{)Vrl#dtW%~%D?Y0CM}E%EE+fAcU8c|J zXY`dJ`JaqaiyCY3f2Kkhi$S+nDL=T^vMuNKvIJGfHzd|KWn1yTf2LQOT`rYk;klMv zdTaM-ASR_|8ur~3cNBE%FGSIs+j!jh4(qkDVDIZ@{PB;5FBrmX(}d-vf3$pd-5-~4 zy|&F`6p#yTbb%MkKOK}y*VMJOh(ITObehe0JetODu(9lZNvHhJd+D?#Q7t0pdG*_n z&nvuIs!dY2uF35=vcDv!^?ev1C8MV-L+6JK;wTZF?WE0or#@Upi`Nq|6vI|Gr$t9R zF%a{=Vqe;XcXx==NeIbC)kK?q1s3JALjEz`==;QP{q?Pb3Od0o)+chEWXZ(7>QTCA zTU?t}>uTsxM%dCC?YE5-3e5+0BB(HFd2*tyIs7i zbego`gPaa%{#0>AlkuHTs*@DWUI-xtKoa{#Om%pg#I zOy>)q(j3ouVn(k*8TR}aP;ek(;CCyfOP@tF2-Vbc$ENftZ-)kJIyThNr!- zaa%@St5G(OIt+0rH>!lw(>h2lXfY~5y{J584x5i0d_K_1%?=K&`xF%x@{Kx0BCil^ zl(~Ck;d@h;-bH3jTPoJ}io+_eF6(+LuMTc4e`Qd7WC6)U)$VE0UaI_22!C&MHfVPg zC0rL{OF3w20sYDT=f(8}NxBnI0YdlEBGf+e@%<~clx}899d!|l^ebRhD{);v2?^=K zZ&s(tZFPKvP^>LKf>cSraT|MbPDu^%`o|E7&rwv~Q|XwGYz?4qgFT3oSUDY2i#I{p zrn9n(3fOfK3!Y(}vnLaq*(&D!zwfLDsf2AA6)*P=EfppTOoU&TG0$ucfW?AE;u@c7 z?M>^w6t{cb9It7x%115JJdTJd$7vJo&x4J|$TDd>JDt+E#U*P0so8fm%=dBM|I zxo|oKbzNGPnSdK5&%LTLY1+lR^#q2e^lbHJK?DDc)&oI*GgR(nn54bz$&V&RZDOC; z@8g(V#eAN6LRdYT@0Yeu?opq1nIW4olOf5Bu}z;T-mP@Mrvt>`ik5v9C%ks4gywlI z=Dl7<#n94=k@{=D*d9(R1HHU?@0WC(9z2pJv`4Q=p@pP>d~pxI6nCk)_Hb6nWm~E# zUS-1gu-H4&3Tz}Url5bg34h*0ZRs){&LaM1%bj>5; zU4%zlL{yhOkH8iyVk_lXwfvSSUm-Uul7!zI75$bp?P9jJ8tRHnTjB?|LMG;|xVI(7zk@c0TsWac z7g2U7qbz=zv^Uqczj1V=eKXG1+|s}NjsJtAPdoxt4^6&@+&_{c-g~`LO!zYIZL|2t zr#YM#yzybG*6DHyZJ(|6EmMA9xwkE;!4YSay>XttHFF=Ft_J+Kf8G!H`}=5d3aU0U zd|UYJ0csmRYTI5RlrWY~P`ZMmyV$M7GMDkqEV9u3!`J<;p_JQ3RiqYNg+58CTW@q* zS%*-l;L8-J*BkkM#8hPSIguf~xUo9WbV^Dx^cu_Jt7pT?`$2RWr#C8CA)1QHNtR9` zShVaZr!3Id7M-8qgiQGkEf`#4-}lf6&xj-+Yp2rCWt^St$563uZ*i%z_Sk1f?AT*@epW@wa;k)o;(vZWZ)ySQE{2dtt6-}UYe9Vv@fv> zK3VHFhlpevVUBn>?^Tq)9i$=}ATY^@|=R!7}^+5PUnKRzSFVf@JT>i zu(84kCfx@y;MMWx#xyb{bE(BCF|Vq_=Y!OGI5eKxgE|nY*VT zKihk$sxD4jV64Oi)Ug5OZ}F$cFpObd5rsHo8;(^*e5`4_(gEN1ujei5R{i}U{2{4%^`^u);NU4`P;G_T zRO|A!g4}CwpGIY&T`ZJ-=yj)~k6b;a9gKiFBR}ex)Ll|oc{f`DSWWcJ`}>GcMuog+ zxdcfrm(0()phq(1dRJ8(SG>y1ikAe8l=_{G)uXo)jd3r+q~{bHbqxr`W~!qe(x;ds zcRhhT99!GmHU`hDLZ6s1DVUNr+f+w^IL|ydF62_1uN>tX+uj(-Gx27Y;+-sjZaEm% ze80q3Av4o1eoTJ)(98iWBU#ilos;$t=L_|%`I*4p!kl^8#=v^91XJf6kZdRQbM~wQ z6n+V)`qIekndY4K{VP)fMul|eIp`t!ma)9@pRnukfX4ZsE0|>yv0Hecp=I2S@e`n5 zIkDmMEAQY{kQZ7L+%OhiPy~i-r~l$;(v6|Lp~;2hvplDpiG_)0j}@n0$9 zfrN{__|Hj()bvqQ?nqnxG=UlX-Xv#wZ4~=r-pJ?kMQmOi=MqQJ!Rs$TVI%F+9hX;e z#z{ip{W8(72~(Z7)%pz9>f{yhiQl&Jo%G6PA@2v6^pVDK!dV7revBefDn!!d5 zCjARi{oU#3%4KkLFv4nLry*JkNmmVi4mJ8ZiL^E69nUTY^8@AdJ@U0HtmRu6c zu!|O$DMN%TV!u`;by(_keL_>^<&5*%C8s1+rzs%XoZAu$VF)UCt6J~beRE~)?ya06 zaSxKW>JLzX6v)G1Pgb%1T-klS2lHz0pIw=hsvP}i3(AD|X$W%#eDHZ$(;qQ&L7R|8 z_M}RpY0~C@QHUG-ccLNnD(X}&^UPI4YS74P!G@X zf{K-j=ee_`tug+8;GD$H67gXs4EjqX;L>EL%p?a(X7uB5?NL{BgpWg zV|683zxh@PchIX$59DKI7qS#e|C@ID%t;TIK_OvMrqdKJK$wsCs#sPU(EYGCWBm$3 zl{W))yS2NHR3LCdat>qV{O0Zocs`viqxFe$bYY?819uuueu$tO{5E4$3XJZ>fYGK# z+3R;+gJ7@YQl5DxPpaqgX7zju48!ou%|ir_^jjt5EbRQ|uo_$a9dVNy+ZY1FrQ;f~~c=^v+a%F1k zsL#@2niww9-F-UP0yfx+^aU}DP7eGt`keKy8)^qN?4QRfA;BalwlU^>iTM$_-5kcD z4@?ZZe1KVnobS5k4(8G?WpL)B^v6xFB^74)M1b;!M1b=>6nH0N3fQe7r*g1j!6}VT zt^&qtM`Ya}@$FWG5pS?aeb%Y}`i*d&maLRmr6KK!Eywoudhr0EOlo*R&O80_5D8N9c@KIeo^jV5#hIzzjQou9gM~oC-oRGllOB4BMvC9OCxv6}AWTH! zL!m?fLJO*p8lWyToIG+YXKd^Mgqlab{pKrG>8_XTlpS_yX1lwfaHiQN&?`#g{ybDx z#c`n@%76!PWj-c>mbd23hK7d5x>j8HijSXXauzKsl0q+j5cd99m!JJWlAU+cgA-Ky z5R*z47Kq}#D-m*Hx7SqlewPmQOUM36B3q3q^YrjZ50d>?z=}nui+0Z^8Eq+?7^9P! zvomRN*4J1*Jvp^pymRNYzD;3`5zb8?E4}aBgK~y?`pGnp3<(+R1_&gzp9y5}PH)VnI!92I^y*Q!}NT)mSejj5@pF)zy0Rvf8yZ!!HX55|3$6EV>y_fg@ajGj9I z9_Lx~8v^D8p2<%mbVlTM<{+8)PE|&^MjtNFLSO}?^<%TRiqhBH4br{s_QwypN-UF| zHe;{X9-#Owlr^9Dlcx>C8+4NJ?%2qX_2>S04nLy?KN}X&(P&uj8nURbYqh~UK#@|7 z2UwhYA=oVhUpaR-Y0}>33}s$jeXKMd-%_hE()&CTYYeaQ&I~-a7zjP-DB-w`SrHM^c(69haE}T{QujC})d3KV;_^^yZPEz?R z#b}YQwW$e>^vo8O#+IxubJU!rURw;`ETgSjAB={9Bjes1BFEq zlX-Y5ntD_xrc)9k5EqMqtfaQr0e%sHe`Am71qx@*%4X0ww?ip7nd9-R28k#57`|xG z43NV3P{wed+OO8;EUMB&OoBi-6zMH zhY`Drm)~kaAw6pDX(z^-Jq(BJvBeAoCi6Wg^2GJQ(oOk9a&(l-nP;4_A+CE1A@yb> zWp8-~o5crHxF2x41*iz?$fapyoLq1G)^8jTf5cJuTQAV1(?Bk-YTA^6Lti0=>5D3J zEy`0RS+X5Q?qo(HGtG#eq~){TnpsQ#dyw#5SKwFb8>yYtGp;Ti>)8x7*1(JvLDhRG}5aXe?c{M~Exhvf>zzHmo`V7B(FL zpcD0WMtK%7RhIPE44_zCR-j<&4X6MwcA-?KE%!$97mz3-JZRaUep-1g)0Lx~GR=|Y zDsr|1=0@~o=)T4n82p|W`Cy%Lp=gsb#+tifzME3^@>%--Xucl3H#vcco^I7 zKd*x~n=ZL_!37NPgt7f@M-^>s(Qg*qfV%|LHR9`6>fr1z?jL@>*2EjX{pCVl!Ep=Z zrIGZUivwYoSz#BkYonY!{h>c{%!tidyJa{L8@E>p?E$PkATWC=wr}CmwKwI~y1HA(k;XLs@=1jD!$ zDClASa`msn5x!#UQbSBn#g9Z~%u{jUt=!@@ELY!NK|58;j2{YGO$R6WYeY@^;I*+6 zckcReq-%mUJeKaC#J{%qp88!Pwf5gh!I@}t?&J+}x*Sn*yWPKA8TJL;I~_k~FzhBd z3&thKW~dP|sz6XcF_ZMDvW_r&_~H~My5=xU;cgbbvz)zEW$y0zpj?@`e(isZ(1IbaxvYc zJ<^3i7d$BDb-1qZDq9v;9d5^;HLA?N{LbcE4wk%{)X{&6ph?->8~b*= zM)O}Mqq9I@UtdGjj($R!q=(WeGVZS}I|&+CGI7TdjrMoo_CGj`@pO*{LOXVdY{NW| zKmU;!jM%5&Jff2#J?Rv?@_?A9I$JwivFC7ILh;qy#z}S?<))=qn9x0PYf*vcM&1c8 zyjn9?7WB=pCB2nC8$fd|+h$w4Mgh^Y0M}1`SPM9opY5q4UDfoC=@gGz?lEf#Bjt>v&>@)N@8r=-Ksd%eU{q>CFcX z3z6LB7mI&HYd!w?dRb9kphKHuj(_8>6fT%wrq$r`Z2A&P?qwtsMyQ$gIw;cWcHu24H`h1@;lg~RV z#pR3K^>G&{qq5`lIxG?5lq$W4jqHA8%k=6*;_)YH!?S}d*3yYFhKwhKlBYmC(hEpF zY8v>sfbPvLZv2vhU`XTph3Z*ex+7kuCA$i^DM4@N#rja@#VvF?-56du?*&lP3)8vG zQLrr&`_|WwO2aI%#h$EuIhpNw$l8qvuSe4)ht#x+-1VZtc0(oO!P#qpW{yBRkvv1z>ylCWa)T?u-JD+`A%Q3G4WBAzwT`l>r0CQF#! z?KEHMyBJu;z7cy-6~OaQggx9C;|8RJiOI2-JWAL48x6fc1E={uMuKt9u^(A1-sv$z zxGX>$ohU4L_2@p^iHkl7RP7vcOLc=pAP9RIOjq0epK7%jCrUIaT0Aa$TeY!=xx}By z{#i%)JO6sx&&w|D?dybgK4vn)Zza4v3r^~JN)FTOm<#DZ^6wK+xoxHqk06wkLtqZ5 zX}H8Ye_i>&nSj%iY*k8TGivZBY;C`?lpqt&A-9w+-Cn21T4mP2Q-{h<%KSR)e665B zlzCcK*|p*(x)`2_L;8kBcz%_2eF^b|%AB~@J#^;%3sFW3?fI(=~5Z24~a#e#)Tyd)DRgoth7Die@HjXI*`osz`fOpmBpU}q$Mwr9S5mA` zT(_OD*et~Bqui@!@D`>bk8*IX{-+Y`!hB_`*&GnG{lrepKHyi|w-5k%Ur*$@u%c*w z8m#u29tWAGxlB7GG(1lV(<*pRTg>W`_D*+svmKmJb&;8OC(>%8NlgnuT_y?`mQJRA(^VcJDdI4%RyT{axv8ga6A=207P5#YU#F!z+U1O*!SY<;H1M@m78* z-K}zoUB^cQlw;q?482)kOwYtm8v{!+!q>3y?2QDpw}){xmQ_Al6xZuCDJMOsjz}FS!+#*<=nDN z^A%-e&GbL_+bOcRm+)LLL*1O0HE8^_uxnyobYR%xAdL49<6*G{*Sdm&bmm|$Z@7Qn zJiB@QW@-M`;bZDA>H3x**ieHSl>?b=F@`kv4(=`rUvY(&^4&)MW%n)!&d{O6lns))2`Qx0T=JTSd_lJ$; zd@xVX4cm4Zs14<)TbteEtobR6)OylZV2wZQWREURpq0_V=bcx^Q8&almu0Og_bdyK zNIk&Nfsdv58EU8hHe$AvNNY>0G=kkTBw$_A(Lp1V)J=6Dv`qdbcB&zAo2yzGm=wrw zxznS#f{^iBSXCmAk)C>N>;EDmHV85*QvHe2HrJ}{O~y4S z_mO6(3SCdlHuUF=nl!dhvd`*>=G%U37^yDhr;!L;hru3E7cf|B{(;akVN{~omxPl`aO@^=ohht`>{d!m(%z+g zp4(w&BnWS8kHHEbuqTr&vrD3!i|YV^3?_PxQVeHnJ&kJa^5QetJ^+^lpN>dfcz-H^ zF{S3)w_5Jl4Qxq^Z|3Ka@=ozT0V2rqT%FxOjJ%aEHn`X|nr(1eugJ}gO;41d{#S&v-7KQSD4sXn|egzyhcR8=N z0K-50o;S_xS<#)_Ptw4XTmD{bAII{QwOnQ!{nODtyZ-Y{O>MP>#8_-9kUVq^TMwcf z>S3~2TLN)%*Y9j?PGfioezLDUrM$@M&x)Q>Ntk@EnetI2Q!l(H=FOus9db}d+-0}4 zR+#_iHVaaA3F~s^5d)e+UENjI&@#6*`Niq-anGuEO^(d2VJ87V@(MJ(jPe;JM_pgu_`}JODUTBKis`%JY4_V z@B0fOK|~M)QKI)2C58~ai#p2ay^T(EiRd-DC?R_9qeY1ty&KFBy>~Kt*-QTa``&w> zeV+&C!P)1$_S0dOv1Wa*{&`;-McoE@P`zgnAuNVkc4PmLkdyN*qT7|u2eWrnMx4#| z2lzUQm=@D*XDqN-%8(gF1x!d!9z;~(>xdN+N!5cJHiK)r4}i%9Y^fjMBMx?;$ZcJh z8;`?zpBv`h7&oRkI4M-!u+raN1K>;SjX0sCQ0+(eWVe5h9DVdAaUx;8J|{Fc5@l|1*Lt`BkjwOtd}W&uV&@$_8%NHo+Ak ziZ65_pU6=8%0;%4QTzTGG4$bCqGx^8C$ACSo#*S>b#_3{$`F%aL$0RY$ZRZ=DvveJ zA`8wiXvm1_2GNAwa175%3kq>~XO@4*b|r}7Wn~RzeqK{uclrPe*|itBW;b1Pl4I9R z>R#f$r>Cl)-P&cc>_hJG!-U#cpE3*y@B^${sf~Nn3UgPg5$z=MN+=y}SYl=7=;;V2 z`%qYFe105>x2#Wl4<8bn*s?Q*$Va-Qa1YI{ug9wNgq~NCJf675ISRX*qlbr;)k%oe zi2@uu@_eG<3lrQ^W7lM!Pkxj|D9b(0Px=1I z+Ss_v5I_>Y81RxUp8;FGOOl{|3QgDZb#)XN83s`a^P;kZ!OhUx!gb6eYz^bB94L`9 z)9v6)358(_g;6aIZm5r;+kJI~Jw&)K_$+9d89Yo2i{hmJR++9zr8c;bP-vXnrM=C}*divA*IjQ@IX!3f%5e`H%JqE4K;sshdMQ6$FyMLp z6S;LM94E!?uWl3@$1pe61y!2bW{zsX(0visxlI3wQqhuRwDOU9W{wU~Xc-5V$vAM0 zz~{7zO#K4`3i2;@Rwi22;sjbX(klj#TB#QNC5N&qQO(Yg3|ui?cbM)6u*q2uM)@ z`elDFKEq#gy_HhyA^w9|-E=%~r{17zuV0S1{Hc?H+L5laX4MC$Y)GvIt`*%WYt=zg z;qWZ?@Cs6LkJ{~(Y*zX`2WEgV! z@RAd!FptpJK2cRu=JMQquIFr}uo z8{?~N;;`tN&9^DwNM6wm;vJ0Vu>5GVCu15B79$r?HZAV_KHKX>leW6A9Jk1VJPvbJ z86ShHIYctR%}dN^$^D&lMoE303-AnB*WLHQO1tR)1QFB?uttn?w9HFwb&49kw-V6o z$HXPe)j|XeKo_HiveIcTG*tV;W&_GQWwOue@jz!OCMH5+f^3^}R~YN}(@Z5`@yC^%$9CrNE%NO(h|%dC4UfhlP6tWl_> z?ecfea<<*ik6hOBl#=X%$@y|&y$wd0Htd;|STP7sa?bM0C*Pr=XWdXigGB1xcEWl& zcYNU-$@;#B-1}8*nuID{We z1OOf=|Eb98kYVLJ-84Lpzrx0zc19(UFs4MsRVCi-)Gutqffi1uEpKFptgA^3ScgNp z1zVs4`7lBV#dOibDh$7x6g0}Js1_%Y?&*^A7g|&^V51PVLBpou($X?t@Agv@v03>S z@o|#gTBi!k{zhp;l)A&Uu-&diXwgxMkVEV5fSu7+3PJ{P*_sC%tm*ZRw?G)nEk-al zM-V3Uy-{SuSuZrFyV=-;8-`&nJQr7pLsvSY8@a%g12z2JBM<8-gzWXIse4zVDrt|* z$w@A1hz*lBN0Q1$U}q6<3<8dPI}N(LWnSJ;rSR82DrT6_48l50Niip#A!#&5vgR8k z3|b?3-44iQiQhPET@1`bWxO$8ijVAO_UyAsOZt+!okqBJ+F?sym1S4ajg^@sVd&)q zl@E(S)s=ro!$H9w*42UZb28Up*v_uDjX#`ief5b{p+9zt12WkEE1v{H zq)<5%G9g`S7Qqq*<0F4L7l#fV!n&W;eSIsuK4KdsW=c=0gB*$zp5$+BRN3cj^%8q3 zrRlR2O%yq!#nh_V&@~nQy0%9|Z?@cb)~mGe{naT#`?6PC?JC41dpDVSxOkO7Ge7q< z0!kXYTmB_W8RES+H@t*nz=!=WMKS-eFaSF$;&c@0^u*n$56;h{CuNU1K3vARHaFlL z7t7ET%WBv14lA6EWh;h)e<$;n?fGKuVUrSwMs#4{5fw}(yRH%f%@L;I&~Vc8CKX|u zD@|=d;_qu+!OLExM0+#*-` z70y8p29@uUbT2mrr+$>+i4W4^<7;r(&qW6!Pu7~%A`3-UUVQwyB+(LRHCtL6;+og_ zi|1sy3Upm!DuU;nJg?Qb$uwBxv45w>%N@)|Il*Jyks@T;xRUdwo==#;@vA&%g;i)BxwY=@*!g(1cSiKcVEEGQ zs_gdLEW{kVMu2-xD{*t|e2Sx7CUppIF1s(Hije&Xfil-PSVMGK6q9EDYHf+@a#S$7 z=x~_fB_I~R|07cVfgapTmcA;f`HP~vUqj;m5WpL4oja0AExUX2m(@+c9y8UpS>`p& zKcc04Rnt*3ev=fTXF_tEs`@OgS*^rzQyhn+6PuB~C34rx2-7@Wp;{3)@Al#c?Rf3R zqFAqT=>tw6pYKK#Z@>b1McB8r+RZ5fp$4JqCdHGX9P3}0M3@SESz4*Il-cWe)Z9(U zc6^j4L!<^qtM;Cd$FmU=l@eU3m+C3{C>G6R4VNCu%IP`K|DeQx4Vgf52!3JH@7&J{ z^)8D!jL>)MXQ7YXJmy#lfezr*T8kSib%|>jqa;$I{Hi*>Lx0_ zP{T+l*!_++Oi#H3Wav<(Og-_96hYb~UbwAwmF`sKqXmFec zcfb7nnCibGsDOwzoPG|hJ6>f_ZDLF7WC-iv3%_)ebn$)=(!`EZsD-$4mnimy<93&t$mN^O5_DOjk`?qE6tJ}n{Uh!-0O=5|QRGkWy9AZ$Tat2gnSBIE!#=EQXSP?0dc&ezOzMRj8)! z8HJ^DZAPz_aQQ`d{u{){g!7zF|HRkCh`uBklG^pOXUSf@x>q9o6 z+V_s|{o9l5|4?%eE7%MGw~6-ddb;JfJK0b$CXczfCvZ~nQd}vOK*=mO?Ah@J$;4XX zG(*2M`IF?eq`kkmz*Ow}7Ru2*ZOfCykp7;GrEC^_+vZK>3QO7!nmWF3402q(%;rwn zIax;@H0cU~PR`Xw`nVatF{8`a=W$=%uD|j0ob$*y7eYnVo6qzJ^^P%K8~EK`kW9

K?Aop#Abw#&c4?9g z+&%+B55}?)8XpcrKNM^qJz5D2*&R@-zg0LDIb_mhIEAXy&nu`3|Aox+kRrZ4&Qq+t-(DMUJ&gxhd8{Vk2!T3$`$q>s zE}TvBS02Z+B(SoLy$`>WmFnl!l|@eU0DVU^s^#iay7Rhc)7##of@`@A*)>nUz{z08 zNamQ;S0(eyaY;`BnL1N5$S?b)<2jb|3G_oB3-G`cq&w!?ahdpD92J=HI8L|SvDW`~ zs<*3NlAX2s^SI@Wq0^93s@^}5!`G-DfBETh{nb#3(`3`9qOyb-dW&DU%IdcJ^XDB& zMV-QZuD3Q2nE#D;F0 z&+!Dezo46Cz~CxW+QnLUf>v!U<_Vq#0O&LXH&)=cmyF1jPt(37N&J)Zn>pKh3iC$u z#KYHb z{)by)+q_qZ@O4|kUlPFW^{;q0DuC>zi1Di^f7 z&-ftYE5CSNng?X&(NHNVsi-R`@a+C8JdY}+KBt-R)x%sI>xZ}H7CpTTs(o$YURD58!V`7-+s zH?vle7$sa^sONuUm|@N=k~uZ(IV3S$nVoG36@`*XCHukJ1+S_SoY`Kcy(@W%pYT#4c7ElY0%WsF?jh2Wn+$e8}kh z#cV-+L!OKmfL0j{DpUeK;QeLDk!{ z-VsALAle3A&u*KtAn!ximgnc_wO#Wl+yFxU>zgYEnHI3NpI}sX@A%TM42J(fls|2^ z@LQS&j?M3?#p{&omrE|?XFyd^)Jl|O2qsjWI|l8qiuO}pQI^-s-ke+kUcmM0 z#TEiu07VJr_MMQOFH~b=$jLm8TYhi@ccUM}I(oz4vOXAPaFBXh&(AhhQox|_!rJTX z25&0F>^T-+Tn^CvE<2b?9`eRm=z)Cb%^zz8^vn*fX|FAB6JF%eVM1ZmrOzuv&8}xJ|dA25}5dye0=z&Wh8^=mz~7K?q8shTR3KOl0?Xp(O5RRG1);!+oxgQGPN+N z;sy=(8f&f>p#JP%Zcb-mi!TV(1Ue)iV!c;6Wl3n@-@>CK)et2>9q4^eWI!T?@4NoE zbNM9$1+IF7Tuoy=dbRi14MBi?VZK382SYo|0IdQbLGH)joXzUrdaYf6GA+6) z+sElQ4g@r{w7$k``ZRUaec?rme9>}uD|77Na_!FjYd5%rW3*8g5a;3-@#>6Vtoq_| zW$pkt!1s^s-@}6uyAB3ih){fvx%{g?H6w14Pp%Sw`_t_-427;x^*7m8eDxRtXj-Z~ z%MblKR0Xwb(L5%I*sET zhJT}ymgmZRywJ5h{Q;UG@YxLO7xmi*FVFk-&y~Jo3pAnjy{Y;E+)1gslF=WV{>geh zxmCcY#TI0e=Y(~nu6taY5Kz*^ZMJFdfXO~1-IwYKJ2#FKtRebPhj>q$6SB!f;Vbe0 z{I;N}yah4a_+eV#7FS7pxbYBsZ)H;STB;5ETfSusB}nZf5|lz1*vK{$ z)BV6+b+6`X*b8!awxLJY_7=r9Ur!q?q{Pv54Vv}a88R=}tD8hh+>-pf5rB!qdF3XL z2T)1K{qy?kwYU;g0eX4EK7$tIbxX32JO?jQ=q}FoPXT81&HPuMKDtNV`*oj7>*T3) z>}T`%j6F8}-}eb_7?v;WcG_|wha}Ku3R>Lf#WDxN3NqNQs4gK$z4Mi9d+_46l8GUa>agVPA4{@BsNl6; zJAjKSuh`o12N`Za@YOPBp3{8|Ei97S(2~(3AyJP|aJF++XJW|BeI5~UsTp!YZ)f?i zI1j+!^1hKJcAs&_`OAf2XTHAvhX8GkAL7?&uCFm<#L>6(=%8ewu46ATgwj{-9-=tw zaGjAgj>gn5-bCXD6Ad(vjnf++UYleC8Gnz{*|#T4{1X8L{?<@+cop=Ee% zDd^GGg7R2=#Cx;{x{iFnpl-Uly1D{HL?S;?M(qN3hkAZ3(;T2X>^8n2Nxb-K?(O)$ zckirT=FI+-JVF9*+`B?xu`JI}Q&vSzrS4da?&}av_v|fJe^% zY}4UAzIZ6TtgNu^p&T~&=Zi}H+0^p9v(Cuy?>Q=e%B7UKZF6!Gr^1{E{>394Pk(2x z_9e5StNViRRqF1$A11*c+5B}AsDWr5^FX~LocQ4e@6;zXm#E_}^}a<7t`r;h9DK5B6XBD7_q016;*@ zQjVWpiyr)RsS8Vfd{q1+^CNo)M7GY4R*h1|LeA)lnC0u+pRP&e8Lz|D0upx_+(IPLKB@%C2{Kzdom*Zd}1cX+$ zlZ5;4KWRir@EBWGJIgLR&fri*)ts1IPmN}xI|Pb0A*UC5+Ws77N9mGGoS|!J`win1!noJQrA;@S*r%qK)H((v^I|@A16OIHa z7|WH+kvhEJx!2Iz<s@&X{$| zdsU&}j@h~COp$}{=;~SQ7kZLRr@%M>#@d}QSp3dMqyK=VbSBh;`vI0y1m?;ThEmHyVuD@G z{fFfxYw%cbKg)@obGuVNzQGf1M67oMNG5o0k@xg)_02kM-UxP!J}!XY4B)~mlB20+ zY5ADT8H-I{5-uMVrnV?QRmXIXjace-idTv|iruZ9vJ$j9*weTyYY7)V{xf4Ch!$}7 zAk-gyrcwRT)>unKLh{M`h-Yfv9oIPweXbUAs&?-Z-Vr5MuF5v5bra*<89Pt_l|cPk zf3vV$s}4YodNQ_S8-O2Rw!L9!F~E?6*{9AKne4Yy%2HBZn;!k{?d@Disy52dl8;?f zz&+cJ$N5Y1@g@0vedO!K%E{=*Dsz@HJ8t3nxj&D`EP8uT3p{>05EkvPl(GDk)p5R$yTLe{eKO7AP^6q z7`?0f(1L=yBw_e>b~9-v|LnTx9W0T+jpFE+R-5WPj8_vqheT1U{q%1NUW85}UeF z+$8W}dyK`aS#bii^!i{zH{bjQD!I+)?V@hA37?Z>!2OE!IN~`xNNa%q&xYJ%LCa$ z%s=`Pm}iCqyDP$9kJleFlrF4~!Ho9Vg;6D>}uN0`ZP0S9+XF$%T@N>gL%4cOJ`qVI`j? z*L|kWM`a~B%B<{dkx3=RB{^g4bapq>I+`+RUmCtYm74}wps_&lOEu|r7{#|~?Dd%} ze?X}~Kd3?M5FYniO-cOzD5BLbR{soDV~y9VwVh^=cXV8mxki+f<;r0J+y@XbDNhKM zuLqz52(hZZ`20R{44=}@QuXl$OClNBuU_RmIV*z~Ld=MLfCHWfYRCwOxLc3=n$Tf| z5h7Z7j8o@MMqfK`_{QwGV2gEt4TujQnwl`rZ!W99GA*~5G)IgyVou6{g(qW+v+=xd zZi)5a0&85^fnAStL>f@RNN}d|oxH<$A53qtllTcnJ4&#jNg3SOJ30AF(qg+P3sx7!E4Jb2@J!fse^QmPYPn&V5x?@zq3y1ygg%g$Z#;Pm?NOd8 z;Q6he|Gz8hx89?%`315qEk%0nuZ11My)$J2Xr#W9b!Cg{&e=pGWQDPfoiM*yohv5% z*|@8mehqUJ`*q(Zse;M(4_03}T&X#2HLV}UqYvTV#iOHNl^Zx`zj>8wejj!`K#!|G zR2a%&( z>Jb0_W~&Pzm+mJt=pOy`b$_>v{BM21Tq5|1&ZeE30x?G^^7{a?RYR}oFRWSj`kKc! z+I4&8cVcz>ttJ>O2h8QNl^QTjxd~eUE2T*K2l{hMn-z66HDrNJ8EO{&dA%;*O5XpQ zQFT*dz3|p^C0up-lpQOYH?~_GI1Tq!)?(-z>&ZIjC=Vgf5;pyO@@!b<4d>Ez;K}p; zz*T^dUG!l+Eq*M$@x;~jUd71s=KYdS&G5ue!J&-^urT~Lu6zRV_N}`{C2Cn@&MY0d#3^dr#hAq=yDpvzUsqMjkN$@a7 zZ0Ue)Be}|(2V6(@ze5~+TldnRAt?9$mmY3#->c7r+hTcvc0k4=)4j~Czhma+I{sO*p>2ZTo3pJsSmvmn%q9wwT^KC+aF*Ng%o&i zF9Yw(`C8#()yG*y_riJ`9N=)%U7TKhJ?@y9HvhbYp1~-Jw1<6l-I~)8xja|j2>W_8 zrFGo>hm^;rOXdFJ;>W;!{okX0`U^Au|Cg>Qkj?u4d)d;zKLo&bNfEw*>!`~lafyGH ze1P?gAd**f@LI0FUom^KEbT-a2vD+fG=U?)N=6vauw^*>x}8WL|1& z(0SlA$sG#qKsV|nA^5G^c`A(CmKX9RgB8U5; zbp}I5p;;^==A7RN-&omeP8mwCk0oV7AQ|QcSnysDZz7;?`jQ0Xc;khG=a&$r6n-*3 zAOsD}q|(;lst&}@xBTWW7yLw*v&oz&0H59`ha>&&A(zc(xi#5$)h1kCK}ZuyQJO#9 z$**81s{=NKt*Qe0u{D`D_skeQ+MCJ%7=hhy>)^c_G zXlyQv*H2_b^wzC{;l)3FSkf-;2zyqgS})F;NWT@1bXwG;3+Hw&6mR*N|BmP#C-?VD zo%qTrivT_#l%P~RhBsE*EIRou)GdaK@SZhe*l2a?7cK78`cmxk4 zxt7SSbZB2w&L!zu6d6?C1|v7Gzl}k5O9}J1!^pBSCzOSyW$dnBQa@9!#0(WZ|LAK5 zY`BSB{_S~yuH@}lbKMXWXH!hQe^E0?1FU(J z3m8YI#pe7C0he#IU4X@8<7pVtS2bwAng`76Bj7XqHN3U<^Zh<#B?DJYgp@z!%@=9 zA}rtyzuU2j^ndV=fA0wgB=#Vk77Eh@!39?d{mMTCiyH6Bn=S!=4kD$~`J{mLvr^zYcfci_i@ zo3EeJ);TY~V(Ruk@%eW5^L4k^_w#je7*j-Q^kuo@yX)f2(>L8zM!Mwa{$GoaBC&5J zmkx=#UEkqO!FtXJ4+_-BP7c=!K`vCyU^PJATYPgQms}HLII7tkWpe)R{ftU|o<0Mt z#Nc_yjp7G`ZBHo#+d9vN7kaD6rg971F!bb#@j+QabcO!insHX=g{e&7-ohj~Im}=s zz&_KVI&uDP*DoBE8O@1kw+`%d%Lk5GtQYVwOoea()Snp}~w}$~Q4Ec4F6ZnIp!gxEN zv*GNW8c+>gWNRt7zATEjH1{;+@dh@ZVnvnwjv87Xhy@9?!e`Y24!LWeB_8i;^XetW)Ys{r z8iR*b$|%`|2CT{=OH>NbGE1aL5z?E>lukhokJHi0AMGg&aIlT}Qx~!cgxPsY1=T{4 z(jZr>ioS6mz5H|i@0iYXcM+$0qdmz{-$i_LT^nM^479-|q3){Ekb&frXLdB>TmTx;76bJBnH3)) z=(h~2u^P7Q#!Vs0Sa9RwP9~HJugV<=m9u3keY-uzcc^*F=j^wjOdlF#PGvzv3Rlh% zp&}%%!oI3E@hx;Q-|0YD9bknDMJ7yX&)#MX*i4J2Ufh<3o32=Wj&QV#h42 zq^2aW?$sx_N=aGbB(at=d4!FUPBIPRg-r-ZAv-0jY|7CTx3i+u5LOZJ$)f@WrbUAi zYalUgAZ6d^>c%l1NxiT#ji@RdIW2;DWQqIQXt&Aua`eh-xQaa2@6+9W9nmAs`-HEG zKLS=+?fyJ8+eHkiz0je({aM=o%g>E%;|t<)E4fGL?7A|OJyYNPpVtywLdBLO_wFZ@6!O%C)OhsJYniExs*RE85f61-;IeU zv^~qH?lipHI}zXEb>E*7BWXrA8Aq++;Z1>{XDiMij?U8n6J1**4OzpcZ09i|=nZs$ zixBkeO*~mJJ0z1`hD-qq%3h#9yWG4zxmj4MfDe5FlgOYqRutDWbT{?G$b@NDjk)5W)2KALbQY`3l(lPca$%>SO(RRPI$=+N~P>Krg zpmdROtu*4C>1P;D6}N$mEiD-#whzBJ$wNV;uD12ZiQm#eQ~boxve($JS&0rJY9V?Q z;6qdMt5NERR5;0Lh2M+IcNwY5Kf0eUcJ|kOdVfz=I6MXx%KUYYpe=8)yutG7`juT7 z0Uzg@V0GQ#ucJMOWULkXz&R;NsPh&3;autQzEDC8SqrD1G#JdrGfhjJu(B9n`N!>3 z9khneaV$1Caq>40{xAma>$0lxly}Jl^ZCA#=E&rQBx&uv&k1%EZwuZbTlGZC?O^Q5 z1bd8=|J{o0^0!3OxK-g5z-V~VPqmTu6Sbk@m1=m{+Colsm%Kk=-1Jr+Ww8%Mh-_^` ziOUvNn^g^)!~7&2^FXqPooO<@X;>)%9~xD+W$wWojS;yL6a|zWg=+o*$yR&Q+JH z?pdOK-W0V&$(k!&`ENi67E&1XI%{fhoGTpbvFo3gpQ{n{we%2~5$@5ZLU2e@S>0M- z1E|7Sha%Z`A!!u;tiZQ$c7B2@R*DKC$^z=1Tz*r-$m z7lHKaqI2Of-quldL5S0@de1W9SYuILx{2EjFZXkBm24neypL;jnJK~|YId|4Wi>JN zWytcUwXOAY_gazEkf*SH)qCHR=13A-#9qG2PyALO#|;ZcQu9J$PbGw)Tav6FxFP0J zi8WB%)ER;&UEXCPQGmWxHM1-b3iHwn>I6%-7CuIY+Hn)3W<_Acc#!XI?{12##{1v> zz)nCJQ)qu~@UQSKgk_kUx|#1$k7{Z|O?`1pi}1*9!~k~^vIc3$p5W3GqW~jkvZ6{R zRz8J{slJ|CLMtefpx@3hGGd+lqifpCpM6tUC$drJms1sP($e`clltdK{`qR`vr#6w zcm@ws&^c2k+{s6hQ}>nTWocvL$N2FsA667t^&4@ zz(H@3I1Zc{xaB}BSW^!j;;cKQ{o`2yl+;4}5zxhP60}=96CBspn{A5IEvpSawsp(q z;MgIs;S`%K#db}?1HCH8`@!9IEMm1IU;F7NZkyrcBj}BGJBf9cJZr{CPiNkqL>O%u zI8#TD?Fmzf@r-s!=ZgxcltwhMTZt$$nT4s5_J9>R)gx`ESNxdfZH0IoGvBSWX@-;M zB+w_R9+kY)fl|2NMPg)zqoj~WZhoH($+bnnh!EWB!lJ~wr^G%it$0?F5B*5+r~vzt zhwvi0KBpgc3G^Vg9nU?G2-T5tFoJ~IqWh~U@>!1qi=$ZbI(#`4Ohl`K5ProHvo>d2 zBi5zkAiJzGbp#j#r2Dac*_r8*nFOT&e@c9ksA+yYH9|w>$0*I2&~HOU_J`^*3-qpT z_^bOG#nbEO5AJVQKPDdK+~7FjRZsK+JnB*0kEVaMCG{n4Y0iO6YOO&2y6fGM;U!0Y zl(9iq?kNh}z$hi}K6oWPmwjIyIZ-4xkHbsZ^n=XQwMkE2W<5u05gp7 zD{OuwE4Gc96|Kp^(m(e6O7FQlakaQICn7zc`oVNlz(h<~gckIEv~rTu9T~0E-kqbp z!s-6#XmtC%q%QVyhWyz9>{9)RdS6rr|LmZ)vj@_#H**{HJ$lzep}0JlUk5pA(Q|ma z%CB}KDHWv=^>FLrtYq!xfVh8}K~=s+af3&vOzW#%lUHNu`0EmXn3GPPt2dUz z!ds)?Gm^;BZfQIw@`3U0rIsQSFh(}MXC>~T1=z`8{BSX3F&k@fKc@fA$=o`{z%&u= zMS1dnV65^BTNLVib#BwRSS0Uxv9diFh-K8ZwH8yn#`zxHPr*RB7kBfEj1}270WUG4 zx&W4`$tLhFL1O`~6g>x}2LuTJy0Da26{U5tmP-p2Y!#MQ^kaG1#+-gdnNJ8 z!F(00%r*F7HX*vmV34+5#Jil#cR5PBI|SSBA_uk_o*;3;t;?VhR*TuRxC6E%%eCyC zikt5i6>VPB+4LHosGI6@_~b)xt03Q ztw~T~w8TVklC8(Kkj$u%fgZ1@Z?&P1OQtN7xG;&f)jOp?dXXf!pNd$?!(cAtBqrix zT;zGdV`SDVIsY(nboF>plto!7G9Au7)_=BYXKS4IK-rs|2#nOOg$*+emTEL}j@vuA zX}90ql-dZMg~@v^eMa0o`RQ_?HQ<>)|Dk5g^60io4cZcDrnbM(aA}VvweUu;tDEEx zes@<=nX>~N%?X7-v%+yB?wb8bS4bTda_6{S>ldz3CJzL1h!+OH&BBqjf+mjFaaw6T zB|gA%W3oyE!aqcW&L3mD8fUU0I~|NHj?p@39VQ28yfO{}Vd!YgGS24%lM!yJWg}~2 z3hl0rk>ig`e80q%Ov*r%%Phjp7(q$+x=9ROx?k)~Hn}XefqP@CBK98y;Ypii^$} zhw(DqjtcD=;@rh;?hsjNSY=EfCXFIk@>aTgbXeheT@Dlb1?;v81t?=2)mcqlT%KBB zRlxH09tiFkig<&rJH&>Ylk)K$5a=+K=!-+D-Tu5pFN?5ovzoPmD2JAukR?qy&7MKA zptw$mU*1sK44Eo7cm-~m@gDE?3AZtRNu@vIwnPSdiMzD(9fea!)l)k@XrIlre=P-S z3t|0?v~_&Dw+P z)~tshRn5=(Y_$|*iPtVqT^*8|6slSm>oH(%X`dRZbuj#<;p`Z0O;^Jeby8Qaa6@ja zh_}usif$%}{o|hW!(%5eojA+J>Qs;-@i@y9pjS@A0m6)xbsw*vYt}7EGgxn`mQw8`U;~d*HiIrvGm=edC{$3Iy`Kc^DIPXqmq1G$jM-3PIij+QE~rW$94%L} zVAjG;8*=hmCpRzQLoSd_K1fe;->XHwHBD1=!)?&*@~n$xZ~81IaX)?CrQ-woHX=^Olg{*hL6Psr4bLG{DYP)cP_xebG|3!wr($0U}d5GW7+m7MyXD_Cr@lip(`u@umV;n zYnKGo4v=_o6}m3sysbW7t{4$9m6UbvXI3q$SG8IRwGMOUe?;1>h!idOn;(zMOdVeg z+WgRe<7${yrL;kv`*anKWT5dbVTzvZhupMc> zP0l#t)Ew3Xh2|R5!+uG=A^Fn`Y|HG3;7Ia4LmGlxVfBqlRgq4-pu2UDHzkgE2efPlWQt0M@ysL}t!LJuoT+NJ-^4#%wh6~*e1z(T?= zN`JlJm>10ccUSBj;y+|JvtHX`>VA5P@lbatF#1f@x7@DGLtpXNNT2mxdPYz5N(s_I zF^yp({fcDPWT4DEM~Oc^{{34egG{b5hL;}B%C(x}5!YNU`7f`KgyuI(w3@+B$c-O@ zl7L*o)+cexVXi@It=BYKAZ$!_)@{Uh)$qd|gisk-Ai`$z6W`NhGNj2Bza)sqza6dd zG&s#{`mG=Rdtp@M<@gQKK5CjwZuu##IUl$QOtW_>^Limu)5-27)RDXVQ%xCKo#q_4 zQ^S-TzLWM~BTRP_F>PzAbtod?MLso!V}3j-u=+fi3ZBAG50OR8v{uwtu6!%LgMn=PQ}sGtPyb-O_Y!Oq%PG zZ~ZzR3T~u}4%AvZ%#=qh>{Hd2!y=`q;5u1+FCLnsa%=ZkM+?=V?1s1dKMu@xXR3Hi zx*T=Pr!>>2O;V+S3{%xK0Y(D{FNO{@4JG`SwW;G^xyLag`}N2B-Ut0ph3q(i?c<5l z2IQQUHy0y3_YK~c#JnT}`Mcvx zk7H)Rt;q>`^ehI6p*=x$f&LqtXB$$TwW_&8 zChVo96d2i?dZYaCdak6)tO2vrT|ws&f66bSHy3CZPU|r&bwB>_PfF4NO+c3J!v8qr%t^k(h?Za`70{} zvfHUOL#^otO=OM5{MGW0$56O+N;faqU?-vwfkO#@3ai6)tDGUODo+(vVqgD)8CeB*v& z4D1$Lb4BsjOJ|n7FfD!NX#?LNy$`Ulp5Oj$hFcGlc|B1)c$pw_n)>s+r*y0Y-2?4sY9)FM@K)# z8z2|&*|CC3nwI*YP}3I3Ghkbip>-g~W9K?pC66ugO{J#iU)TuB`vX=~BNR(N#j$ zm&5q(2+>c16u!kz`~PnO<;bHfRr&img~eD%)lk1RZ;d(8$EJ74x>>TUK%`VdRG?}y zz+EVz;`_!l-k3G@=ara_DVn~#>-$|=7GcdCyXRj%32rXU95bnTMcYaRxVh1SjL z-c!EJ^@L+*N~?Ovc&1-InydR{xHFDG5uyK3^t)ZNjDIZps-(O++^%V>N6r4y=k5m2 zYJ6?CWiDu?oUrTelS%KD?N zsXRd6rp5AMfy1A%>xza`lABx|W&8cF6~?fzmC{pUutnL_DEy$9h&%6^Qwz@e+6xM}x$G^5Z zwEK8sKE8N0?1TIQqEj}BUx0O9pr-ojg{!=5__1D5Q0wHgKec=PM?Bj?pv8Jmk4)0p^ahGvE-ocRK&ns2vju8qgVC1%O`?y>l^ z(!H-jUec^64L+Z6Us2*uy1PB|ixR#L?KD3>#6Lb?iP^gKu|~{0b}pFRp+8l*8_vJM zonA0HU6Pr9wCm}n32VOGE^(f#`ei!^sQKtQqeyB08+mUP)>hw!`MxzMr9fMX6bcll zI25Nqakt`LTm!{5P)f0)h2jKv2%g|j+%;%|I|O%UR^RWNYYt}i!Ccoq+WU-y5EfZk zkNuzfcLTX}0rs&|Kj%=QoGmUc?r6suP`X-BaPV43HJ!ziIyB1qUbKFs512FRCqlGD zhvL)s_57mFwH!_xB3&`v<*3vhENDx{UT2B6Ob09Z`gKvbG>?=oQLiPMMoyW+U-X77 z#R63d_+0Q4Ywe;58HoRUwxy>l1xRXd9e+3S+;KxNxz=+z)-T@DiVr#qtW|$lopWDs zOyMj`nYkC-YgCKm+qQysyA8E0KD^v6E<53~QO|Hyh2oM~w;46FiCYY(fg=0yJ{vBv zeks5~7OeF(JA}a&d{z`at#`D^%ysQ&%(>Trx111|=wrC z@!ki&JD+uL*{Qys=k$t&uFf8_h0bFx1_OedmW3<48OIuM&i&%cA_}inj?3vAgPrE{ zJEz@KUhCN{%QO}+W<)gF;ih4L<2D^UFIkyV=!g0^Me1-D9L@=D5qA9f@n&;{b!adc zK}P5~eNwmg=cVp`I`+v1BEMfU!GIRER~c0z^yPIy z&P2l8h96%4({78VrNNKoBA{dE153_lAg@+vu|3+8OBhI3sAFHT;aWNxX**ER-Ty~4xQDPG^kMJl9pXQ{xA-C44jv{yJ(WUhiHAz<+ zX<$P%0YWRT?$ejg=jr!8rOY+6m3b}M<|i20t3VEllV`1y0A${l=iy(!8#0tNaw_e< zqz*I?nuaoB09r}J8CFvTyx zyIwO0Dx!|8wk7R02`d6MvN07YT-Q0Ta#Vo_Xi2sr7rtL&uQu#APnEmvq8i*n;Kxya zfQw~1-WaT!BL+YY1O(v(<47Bv5#GN>SI@sB}E&yWDh;fDPD^J6r}of5Y_&C<}<+ z2$+>OCkVQ3F*oc~H`#G{{XPS{x=}r$dA@V5lRQn`DDa8j`h}5qlk9on>UKr1t`B3F@r0S0_snk zqXpcz?dr=Ro5+$3Edy9oI61dIT_WC#be}tZo9c&t0r>RFajMio)-8vuGeI&B_nYo1 z!3I(h#EiR0?WhfOzc=HsTz*KQE01N{>ju+=6A}Z;~FWBk z-d@?<>)ACY$@Y}SFWjtuIZbyu>h)@{1BggRyY36ylDA92BhnQ@I1sr`KMRNO=_=ritVAfjGarW8Av) z`D?RbAva!GWO>058gTb{t%1GPjwkZFf^O?A$1Iu1N*Rx=l6hQ{719k!ri)%yO=oI8 zJri`g;gIf4vP@;9zdO^e+uLS3X*!%XRqokH2M1Y^7K6TEW(jF?bzqVRU!Hu>@rH7* zeLucumVWD1uJ5!|w@!Q(*Q=Jdz3NBf{#Vy;XB)X>O`pyX{I)jzW+_~4b;^5}Qb;kY z<-Y7Z9w^Ifj7aU+#c(-fpT{t?%ndhRh@nKzM!-j19Tl8b@9g)xymJW8y2F^VmZgtt zoHq>4``B6rV*1(ml}j6rO!#j`0#>6eQ<+=%zZ%VWw5wL4hEVcRdvoS9fZ1^K^-iYK zpXO_sGD9H+qD?gl{3V-vj@~D*)Gozdbwgbu}K*f}hlL@}hBxR-~xxlwqJ* zZI8)2G#_e;Vf^g0vMgH?bYT!^6QwBtns%ug%u%zv#v-cm#K1E&nl$tVT{?9oT!l7Q z6`~ROQ6CRiW7oxe%H|)o_K0;yLT1liK~n5t&Ia)YZk|azpybH6##~zm)KTN1GtL&n z6TQ7ggBN}JkLO49tcs!VWRNYnJR3p}AffFvaDApzQyBX}gWjnc25K+qB(pC$+r zG1zTPYgjlz+obi7%=RET+kxXzrEgQ@J7ra4lv3?GNN91|35Cz5Xx(azyrPrmZ+)jN za}q!p^Okhg?2i|^S73d*)qbu>Ow(X+9^Up8A;(mjgZg9DmTX&6S~DCS@{YKsMfmPQ zt!`z=uQ$o>Cibn@^vk0KO!D!KFjT-|MR$61X1g@q# zv+rdv8RIT0g(O9yqJ$2hvMh0UN)l`%<7NN+6ydGI22#<3{ zBt1%Yc%aiy)PJWS$n6R0YgI{sJ(xlCHW?pA`#^=qdDFSs`artb{OsjX0yb;BzqoW? zMrO9z;uZDU=E{I@(#BG~%J`&kiThfd=5Iw_q)HTrPm|Z_Z*DE4!MBLIbKGU_d7aOe>K|N*A0!6blGIn6l@Ij6C3Hu z30U~_{XBa1fGc!(-50p)cuOoXbmCMzN|@3XkNCYU-rM)KP+qY~P9A$j6H@9RJV4`= z6|5IHE#jl*m@1pduFLHeumq<^U;(lX-}o@r7OB#@yZuUZB&yFLN<`U)pB|82O0sbcr+{%gh@~x#pL2>}HbQVJhjFD2fKtAyJu^eb{$02P&NKBCg(pkIqs;+GY9W8^!tR(ZYG(Eo%;|BLXTUol5Bh5SvZ)7k8d)0&1> zpDcYkvqSS`Jl$(z*;W>T&W@#c(ip(RQBby6bA8#46zz=@t4$qFam@RVRC4J(+Vx0_ zy5g>Q9@*yn2bxRN)mT8Si!QB-guf3&ndMX%8Mb#gkdb*^EID11-E(5ec+Y^Q49pgc z?vY&{`YEgi_4OM|`a=toZ3w90-ypZ62Y%Om?DAu)M;tGv>fWQsg%>Ml zF3T^7Dg~X2XM=e@K<2f!ZNpegVS4L8xeQg50jpiJ7_z&04b;#AXz=+M#_ zY=E@5m?*YyjO}QI=?L^xc$NjKeRt|%6HX+}i6eAjWam_iFvdJ%FA&&mjoX*u1+^d{ z4jdVM16$*~v|f)ox~*A|1$@28RNGd^GR1?ES69o9{O-^FDiK41{g3cIzT=%caVL9U z4)(ke{@puXh*)g~BEIb8f&%RMS#Xi&mb#A?1Mqs`dJZf^Mjp*KT{H^WQjudQ84^>0 z_U{h=kh}bO;AK04j?>b|dLXdB^|QEivUkgm@Ax_Ya!7mDOFA|g3pfvoZ}Z#j+Lv8Z zznS(%9%BE4ecBzs1H&b4; zx@~9yWh40o5xQ@tzBsgc^{%8d7AeqGUd%nV7XF@R_V3*`rzy7|ZMs!QjbNvj zX5Y2z?%We37`{`!+ke2nmq=~CQ(V@!re$PHAC_`qru=ng<+;fGF>yrbbq&%q3|^8Q z@lPk8gzwcB(JlX@eX-i0FM==7k;$39f4pwj4i?_N3%<4R<8SL(3f?Jm&Qh7l1Catu z7#v{mB@+AIq4E7b_#$&YnG}Av$nf$O%ejuV`Lm&?Yh$Y4(EeHtzy%@`q}fQnG+)~@ z=lr;$)g-s-xmlQb2^=J?HI5QW_KDWq90h4vndEaYL zgnO&zbD+oiO^1157vhjFbl!G+G=II{tGk{RfW5D^;6r74CP+J*jd$Dt(`Stm(qk6&2bdLwJBiQPXhHKLHVN9iy-uuYrBv zRIyFaE{22abPaSOnhyu5^~;fP>tYT*O|=1fpQr^(HonNowHw@hJP5;ZvXH#+?al{l zR4KIDW=`%pMcA}niK>PJaE0Ot3iksw4x!s@t8U#;vP}Ai5hAZUydf{966O+n(D?M* zn?^0JQxmFG(6K!1Q}{9?_^RiaE&P|mob9(u)8Q6ADGXfg;l}u4>&E>(jVhxHcdll? zn~c?d)9ya6+pA<6_ctr8aE+Nn8PhV8FDT&{L)SvMuTfZs)frpW$=W{Xj{NL?!mx5; zJ*D@A&9c_PzU$Vs8lUG%rNVv}eM=hqT0K=Y!e8-xT{sa96hd?C&dB z5?|}hPmjZjrQ2e2v?Xw1=n7uoZucfM(MS42M@#MU+0|24T98EoncmACg-7Eb*b0QK zzClEu8ouEby*uwfI8C=+_p%Ct@=Tv2_&l zs5xdqWq>zbWkaKevs>5b^?AKER5(BbZa2vJ*QLRC@FvMJjia|6kVd6Atk!L$CGakI zshJ*P`5_c}%98^iLI!h z0FXUjlulw#Fxd|W>1aL!9aSB3OWRL6H~7c13scVZ+=PKUU#1m9u*Ti>;v)g7K-m$& zz1r*(LwL;{>kBD$^6LdtPWxHDx%BsHd3}n!3nih0JxlFtKz;HXc%eAGrbAt4{8-;x zJa4NVj%y$Yo^Cv1H`?grFg-zm@4W{?MuBg0G(m!qXji1SBz=p6J!_?q_sQ=l;VW&w z^~DcWY(^;?ZugJyk~{YCBXs(<>^h?3wZsYsP$Qi;B}?CCUc+7rB;(BcPw~L&ZlB}sD_32@uIw> z{N{-6y`Lt%y$C3>9mlUhR@x%+o&2|2h67x3TI2(3Y)?X^P&(NQI z{t2+OZ{#{7xOMD-!lxCyT=w%KeqA)4IPIhxN3MkCr`j7*AE$wjSv*fF=vNQ()8d!V zc))mjRxEb0OyT+ZNASfjFpEg~U2X|%okUecFZ-Sx?zcnq3h6#zdpursp>miRc`@rd zB_t4Ye1}Pqlw*f~ENbey^+nifadJs*_#+#h$kh*i9)~SQFTm4m_0jP;hsVf2i2THj z!=h)t9srrpXi}hYXs*F+!mCLLYVvHzehLmz`s@e1dQjMxTF|a4H=KlDq4>~$WBy*a zq-Gz;50+KwY(0NK_0Kyv)NqG+rix#(>!d zh9iQPRbw8u0xd>a_2q24{H5+DOqJm15r@uN*K?fGw=t}-7=Qfuoc}xKZt5DwNd3AT zs(U^nj<<#O8c>y%A2z(xj0+uu!vny+O;PiGSNpg3agJq;RC@MxxvAdsbytIY5+_?# zm2`Nu+~-s1(%5}VU$%UR+t0T?2clzrw`cvO4LekV`;Aq1QZHXc2WrGs0)y-ReWj1B zCskRpUF0JYYaRH0#j$-bQ|iQ(%JVeu-hE&9tjL(#jk{~}F5sN01K$_TT7(NgRA9Dj z-DKY3p7!*c^j0@6t&iwh^{jK_%+BZB5!g$=3lT0Rmq7wr`4_#!MuF1H1brnl&TZ{k z^>MOK&#zC*%xetX5%&hcew3A4Ql!0m4SUPq--?og33jN@nD!9vi$5k%`V3Km>sAnS zVDAF4vA}i+wM%4tmqeL+iK}CoJ4}}5fJORXNN|7KxpkPuoF(A|pR1(?W z?4a=HZYON`)F^M$hasSNEb*B$?ac%*)4AM8%dYXc_eFz}74+SDB`Z%g^^mcc^)N*j z!VA~X^L2; zvb$o>-TC--JYf|jq07fxRxze7RWo#?y`yxa?KRSeY>GElw$o~}Ef9LwqtNa?!W0nI za1(nM&7A3A#k3;Nj7N1BBOR`v1E4<*ytxf3Hjj;vLJ#BVI}YL;rO8ew7!2LbYt+~q ztW8tnjb7?U30+n{ql=;*J@8)S0kz^1w=VSOOY<9OHI ze4u~j)lsYTI#{Ld&*?r;{mQ$t#){Z$P&PF%BYVM@_IbCH&>Ss#l;Jhb)s*QbteBQ2 ztWq=pBhDkZmbXB)JnK(5ujY>26$|{)lpa}#H$yz}lG%t#j`DXH?Z2XLf$&7G*DV9d z)FK))bx)_mPJT_{=N5)+gX=IrlOaKrHSVDr3gh;SihN6|h>+0g z>YERGpOxx=71j~FcNs^8*P=r^+^ENL9BzPr;d;O_#hkEvrMcOnSy<>d(e?Bkf1^%` z>=4=<^q}PdQOs?sJ#;dr~wtWTEQ**QATIXx=h++YQmxfH|IR7-#eqecNvYPv%_ru}oi4!Xvj3#73FumJ`?HBA zR)o<^ngHl$<58dX)>K(KFit-z#hrTtEmB#Td?%%O&>(TaLq4MW__6M_Kf!NfI+u6a zz`}hyhTFoUh|V}DQ{jZ1EklbY{q}Mtu!QyM_h8W{%gYS><*RbiVFmKZ zpx>AT3?_bI6jhI1+I`eV3jgLC1JoOxoo(MLNrEC^0^x^9O^G!t)tal=EMp#wAXqs3 z(D54qk%_6wS=iO$ce0;Bw97udLR_D29m}8no|rT~AJ#r7)HbRnGuj}!fcc-APUM+n z?o77mv(6X7cDM_~?)e=Ei_aKi!59hsl7jFUh~h_I?vL9>*MoaTf9JolGmEVO=ryue zEmLE2GiH_*{${;NI5>Bk+hYG!2MUj=&nU%e>_`{vUcVb}HrpP|GFmF|m}4Y>(5S+(*%vf(J-9i2xAFbV=)woLCKmX zqlNig1y6M;yI(q_-wb9*)lWS4OpE&muoy`4Y!VBkDa_*p6xWrM;z8qEWLXaIMu$qm z_|=QpzO0ykky-}X5nM@5ie*qN#JRj8vg9RByRq>5+y@P)VOH( zck{mv8aztykDY8nFfXsATkNYlUaLZf+b?4e$v0{S4eo7zheutXk4j2e`-nK!M83U# z^o8|^N8#lf7av`cxG;~fOEHgBl6?5lsgVHivy{&Wo^Rt}84qWDblq6St!_HL}0FDI;+b2@m;q zBe>CxQVGR5@B{bpP|pAJy2`GKr7Usmzm1zwt0ibc>KRHBP`6NUnW!3re?^wVPl269 zyOOC**|3muU9bg~sY<3Ww|-9he|AYKe`yQWI8xHR#;H=ZA%_0q5q|b{S#fhlre99iem8UKIMgN+08*KJyB zDw?aW>n_V`={DDetamOO!V{i24p{#m(|w)(d++%bZ-dp_3w zjVFe2KKJs|`!S+XO6HYIx&%&tFrc~6yE`kN=U*GCcl@u%is`60)@yXVR2;4j3pK2p0v5aNn< zRD%jv)oK%~XttMEXa;i_qJ9>#Z+Ut*5Xkj|Kp$pFx61PZRpOE+-hur8zruwxnmGbi0D z_x}Zm5@2?u2IIjmrYo#!87Uk7Bf!Z-mHW~b2dvxOIY zPxSGS3GoKnS@)k*3JtY36dI0zuV6Hj^%#v0$1`isX=S@iKxwI>Nz^yAOC??Sv%su04jXf^hC~BHHgv7u^C{2EznY;n*h8{sPpgx$ z$o8r2dw(SoEpT*;p`9v$c<4R?tZ!!=zI*9QXwK*eprY*jS7~p1`)$MZ z=5ScZj(>c~ggQomQq@TFuXX^I%Td1BE`VK)5s*=m28eH;khm5B|C&xB-4hZ(Dflpd zs`rzYr4|-Q{2?^cTz-e8&SdlOPb)`9x31P+;*%`lstgkHkAJg9$wIa;kr@r60L9Nz zGnUgNNmELapk(coPF-ys!DkYqbs7Jg9H2TUz@HFGZL_jHBJxZZ`@uVCzxUnvo2|bD@!tER~Bjr zjw5--TTS#HliWTg0gdS7tD|MSs8>%SSg7hRF5s_JI8~#rXSg`Ao|(xuxkssIfK?{E!pat+ny6FgVU#nsyj}jCf(aFjp)+3H zT<&+;K7zSD?t)~=`-cMX6Y&`uE!Z4VNiO)RgkMN;6Z|3nR4vCwatnU$bs$-IdLR-);YCnnc2DuHLosVk_T#Ad$oA!Z^5h6+*99psAM{rR4H5hG+hp zX4UT?Ri#fO541EBY{2oF1%H%>ZR7MrxT#+LzJB-?6994Eip)`!U9VfzImlA5XrCw0 z>YH9e%5CFco@Ng96*ax|#pBE&%M+-asE)B-@N;^5x;1YbO`JLAVv_P5E-a=@m_=^+ z;}~b~+c^!33&l39g6(m7_Be-vU_1HbYov|K2Wa0@Q5$cl8N~X2dTEJgg6@wC((e`N z+aJIG;>>WpZR0mj=F5Y_rJJ~qq2*)Dh-$_OYkpyA>_MC26Os;O1f2M*bmZIjrri;-U0qVx6E`?-@jDPK~CfeEcO$=vdOx7I?(aO0`slwQl7*o7E7OPLO-20 zZaARIuy~`Ij#9zFBiH_pm8f|heV|m5{~BcUp4I3a7*=}~pJUyZYe|#1lM~;36-FVLG?^2+ zDZuI$gTm@kQE#$+wK8u%RxgUq)H-JSwxR%VeSYXTYr_sBda{Y;d=?zVq#P%7KC9P% zL8aUwbaT)?#_3aAVpOBVV?X__JqS>~)H$1-Ojiipu;?`_5DHhT2q00h^&F+Rcfg-f zkcuE@xz}#eJwtl4x!Vu-1KI>S?_F4pkSv~kjnL{<;zv(Q&%;0>b3~pZXCJK$CWTYE zoy!?;8}G@^3ah?`pTT*i)+uKe)0CZ^s}$x;i*}m!g}u_;8#XwdR-f)#PL~@yEk3~P z^Es~~6?6-?bNkCb%%f<&CLT2rEOj@PC z$=s2$no!-s1gE~e23w(0mdL|z4Vt!7rEz(Abnf95m0dHK6kG;wu2Y(=fo)|mh_d^2 z3)jvs@s|0cI7jjOX6R_<+ZKf0YALYFam7ZhVe;Cm84biy*qkQx_(nB6Y(dNW zMZq=?@5X;cH?pSgvNhRUxAlxu$QaV|@A&jxI3Jo{y9eDOW;t(Ze*J=2$8HU#kygj9 zY7dQWE+fRvR@5373RfvE0qtVLoXEp!(ZV|@RpdUJt>)uuw0NVrS(#$IoGXXqN}np| zy7jSTvdqM9FaE$O?V?=+vc6(1Xw2kGYO@`_gz-Q7Od0eFsRgp_lTeht$pL%b7SPLX~kcfmUi3%*Xkd1OCN!fTqtv=?l65~PAx=X(mRt9#Oce`$tJdb+mbD@KGe;k~Lvs)uB zo$-X!yXqudW-85(mcv+EoltF(Vbu$_XDF@Ma+bYXIS<{DX?6D}p`*XCaFU;|iMCa_ zxaQMIp;mqeq?paED?u{7?$>JXs4OYsN-4#b157h~TgPxcC!Qy4@qHz=?=4cQb8rk! zy_ryy9mD^${Mw@VMSwPq0^dB^g4g;eu%46<*6&IG9I+8#hC7V4;V*EGro-=!_d1hu zf*f4`Gl~vuGWc+;ouNxo94S-$OB8Gsz?it*(S8k0{Mn`JvpM|Q1Zww-99sbZgcO;B zT+{$9T0Wpf6KkBYnE|k*f2-58c!X|}=HKen2&hgaH`ghwC9R$|K5YI=84_kj_|X)o zmz5po?S2o4<^+IXO=2E32{t#EO8tqFs-$-+OXS(okuhC#Hx)ZB`+HKo$=tRlQ6FQn zMzg!EUi~<_Iz^k0<%A}`{%4?er?Q_FiF*fZx7O#F#WEXKYU5WMfaf}}@VR`^gabcZ zQD+stD(&7j9)9|J7( z*Jr{-Nm5eG!sYFw$3f11q4^4P?T~W@2!O@)TIwW%ieOqqyCJRVVUISQd8c^IBy5Z6 z+-u#a=UiI{MyD%Q%r_wJ@v3`B7Z;b>oc9CvII&G<^J;Fhj`TC0M?+)tGfQdrggPzu zO+)ON3r*)pltGGYse?$v>$G?qK_OU!DW>T z8V!aF7baBe;t;kJZ0}osH_Bah)x3(TyQco?eTqRbzr3+&F65G_&m)JFhm(w;V&fvm z=f^=7f4pM|>+yiu7SZ=%)otQD$kN~0mR^Lt#U8k zKQ|O6x=O6OIF!S8mE8$TVV=QN;k;%n+bLr**+q32{7m)atP2f zF@EV;%dPpdrDcRtuL@s|pmk@o7m>2+eh#y=$YS>@aw_oDdOY!-htn^*AyM_jRJ&Vta*g5ICO zi>SIYh;ZmL1=m%8gs>4FQ}ux)HO2F4tH_*)GVyx`E0zI?Zqn5Csp*afAdAvPQ|bss z*riyDsA+<=siBx0fQ}x=02gCu7`FewtywhGz5cx1jZ5-DPA2?o|MPT-b{4*psW*Eu z1?)^!PHV>`Rlr2G_WZ}B$T!g1;)e*GOf{p+J_#k^OS2jC%E6C1Pa}8rx&>Zsd=@a^ zeANd6o$YhmWpI}?C^bojoQi+29E{PrS`njgIcP=tDKwX)g%_)lds+Qr)`N?k&AA)# z-~GTh!#tmbVK@qFYLpiLqSnnFrS+0YQ^_CBZ(pzwf1{_gQRsqQItFd{Xgv4XhY6~= z$U|Y#c!d7-HT>(xoQ*{oI<&@h$S&i1VA{c$&;`lsrQ_ z5PRC{MyAW%Baun`sZ!tJeY6Js@oZC0F9b#CNo>|wQfhiwfGq+evWyKXFO&SF3RIfxPa|xYx#8&oon!;^Ivn0=a#4Xc&`1!TP0_N>q=4{eE zgh`_xXVN{>_%P+NTA&K7K|F0LRc7OvQu(@#IGZT&Ioc zF;0cQRfQ)|K_Fb!S$V3lhMVLu$!CwQI@oG@7!p1Nq6}Wl-u;}y2wZ*UO5tlB=6l8R z4JIXHqH=!9i!-HXCRukl>c;OOm#u$Z@iIC5N%YB=){9a>t{y)~-27F$n8Sd8@!`2% zl)cgo`NaIwve4(&!G)1_+)y**dHP#vaQ!(6jZ^^_ zWI8Vc8Z{CdSkV1u$w&OdqL3{1T(B2t2fKBH!Sy*}s63cE5Kc0uP|PV7jWlG^u|$fv zdDJSxu-GAI=^YrDl;(?Cxay+B>RxiAXK~9%uHd`4N4tr;6~=V;chC2cU%h0(3=Yx_ zLRrRUS4vGn@1|4>O|>2xDIbh{4>kk#oZ_3#=euDQQM8zIxmEhNZZOnAGq2T@e{4x4 zTEp_nk#Z=Z5(2jmLz~{#J0Q>52q^Dl{4NsjzpNJF<$tiNoyT%piVI&blGnGr4rys= zyYO7Q(b?HZihjI$gEha8_YE<+QVTV?7Z%207!moVvrL>+nyPU{TpHqpHEu*F>_%TN z=0l_pK7!)WE}PcabV%PGFku!sh$Jp50DU8G+a~dkUs4)H#zjL3@>V-XNV~GpuqkBT z??@h^)CpD$w-Ka+lGdvlXGfB4;22{eRI^)G!xN`u6_#9hMro+`#zIQ^J7lZ+E$`Ef zR4cn?yHfRiV(94%dE#KJ0A&dgNeLEyDQ0Oz1J|9Gq=|i#ebK}Q4+Zw4KCb6s++^L4 zK?Jh=Gvm;^605|{KkWV1Vaksbd_p=i-tznfED+-KcLc`x0e}Sr09Zh*mmfO=aH)0q zNc`eEa$Q~Mc7Lt`YV6*?7OBtuX7R8u#ybDm$4BXg(Po`3hbRv9H>@zOj+oN>|QbP2p! zp%`U@kHb{_h_$W31m^?r`P9dJDpcxod;U6Vmbd9vQX%u{uF z(}|bX+?)k?AeAR-5D$+Q0<}@?FudC&v648JF;oB0zeCWH2>ZV z6Dq}9b)(d|I{>kvMUUS>Xprhu)0Wk!!08Sk67q4zATt~$GWu+R zD@&NV!eHAx*-iJ;=AS&Yc&P*VmBWT8QqMOBZ9TWUVz(rMw!UL5rq73#$z&_RbTajy zP343IC5`_2^Tvf|WNL;rn}ueK1o;liDt)p1rhSe3LrQ~du|2#0#w~JY@MwY;H({y$ zH*ii z{~bO((x&q-ZL((rZ5&A0Y_$r^MF)RXAG&T*Jb?x#ZF&SCZb(x2G6n|-eC%>63VDn+ zd98LhK?CwKvb8T7pS1~AqVtz}dk{Qc57P50Lc@M+6Nb&2o&TkLZOmK6`y$7Rj8U%r1(%;EVl z^?PbblF-nWp?$#0@^OU0i152?n1+&`(Ma=KP%A!KdxWRVHO%!z%~t!2{RVfk&R;yz z*d)9fLs(!GAh&1(&sK)e$ym%n@_wGLOg@ChMb0*#Y3_QJ4$C?D75uJtqZ4D?J^{^e5Z&1AlrLW8r zMYF}w--BwtPipc*>i za_tQC@5Xwa!~1NsMx?AEAG>AHdAek}JfOAki~0=XB&GgL2WmKX2l@qYU0zWpuIp1l z7KspPFeM$U3DNn!B)Fie(#FATcp=s;al1zoA(scf?{yr{_bwAP`5}i01wBJvJr&Xk z@k)I;#LM+)KDK%UTrkzu{K6;lb2t0$b8l@mtK!kYYDVhEg}bmA*SuTP;9!(`%0CF} zds(~xh8uw$s7@Bb-TQ8i%%#~bvShNx+cN@g06R^Zdyzi%W;7rNz&lpIP0tRrfN6N) zP(d}7=$BXQ(zl=2^%bBc+MkmMLglL=4Nr9VC%vTjK^ni@mV~o%czk*(m_6;4(1hrB z_Rc}bib&c}S+3u4ih4S#YoKCLe5TeP@+iQTXmz@wFi^Vt5PJx|!`*c8(!%byW?A{-VhkLr$*O4dwA`2Z+K z1UvJj%Uk?v4VpDGwVJD{EMvk)?acWs!Z_@Ql!O6J)8m*#0701B~ypOgq}S4Od+> z%=R%;*{Dtyse7Yf$)MjRZdAC>x=O6>=_uu^zvaev%?#5AzcfE&Bb?RK*;3;6`nOK_ zJ;~Clruar1m1vEm6b!|UDO95NL<=g@f1L-rF{&~%d)FG6{IA&HU?MzzSy@-^F5p2m zhKp}bB8XmSP`JJuWj?WFj8M!b6Yka? zM!r$1CZwyL*tCdWD6EClsR5@D$d*`%6uHOO;cg8tQuww*F2=9KW#W@*6?;1eiMPz7 z%&B14DpKIw1xCX)_9}i_z`8N-_y#Z1F znp8npn8TLj@4m=Fnb4Y2g5tZ{2{ANuHnt@8JH|gCmm0y0#36XPw%k->_Tn&Z6;!oX zrFJe}8uqTrDI#W$%e6-q`$ ze>I0gOKkYo;XeyguabV*%BaQWyswMqr`BKP3UhA|``kt#n`au=Q=RqmrJSmy>iaEb zN?Lt$$@dzg;=Qjc(DXMI2={yg?g1x~hK$&an5X0x-MXkQ0c43}&Zu(EDBa%#OEa|Q zsvAW1xU+E*Zq9Wu8X81UjdzIbWB(xVim!?!=-(J*TAVR>giOHM*T|QmH=paXvMeA& ze}2_%)^To*i7j|Bpt+l4+dwnM^Dn1yUEbyU%zBkaEi;eRNsgpMA`oYmiT@Tx6kCTJ zqFj&j_LyIJ@4K;fbO50yHwZyueJJ~lh{A+Jl-`@31RvXnFq8CqBZ$zAWYHDC(;!Vo zCjtEp_CW_95h-4YJ#iG&$MqGFndg+8{<) zgHVFLn56vsZ>mx@BDJ4#yGO2&@LWYf8huK%)F7tr-P`iy%ADD{& z8J9#N<0t<})01dwV)jdV5OEDOb}5-s8OWX_p2_r0YVpNOCE|+80a-dB&0i!rw=Y;2)Mf<-vA*fMW}J|XUBoS7dROLCL>m^hWBjR`|vNOy2cT}R1fw% zO#eAUk|lY34FsmxXY6tDEMt$1Xn@mLw>)69(;@LtA=fc4AP|DG<*vR2!%P5fV**&) zDRnUbXbl#}oeIjO4zy=hyhoa=?IfB0g;aY)snu)c!z%b(Pv`|w^H;oBLcIIsJ>?|C zJ*k{!R9(k%h*;~q?(lfs=2Z~;%vI-l3<(Df5F#}FYjJ+)uzZ(`i8o)R?~4*`C-NF5 z8dcKl%6u~l?&!_0U}Zk|)Ybo}?yaM${{N@lvRG=fR!qB!*WLI8b%lbcd86>ysiiH=_NvIv{i5g?<#@uf<_jn8%7`&3 z=YnOKqrG*U)`F>A!H|j;u7zfr&-zS|GQyqHm7N>2hywh8DZAW~&cN93I$wvAM5Of| zk6Av;zbqa8QD+vcytseq<_4mlNTv4T!MebV>p!ULbAlMlw?(6lUJ=A8;~LunU#qo6 z(!WMlc^C1mJ;^R3LVEvd&OfdP#3t+ScFESO-@cd$xc(Y5bHwCZvDP3`s#D@uKS&1p zjwLWM|54Qtv;k7sb)^_eC3n$|rHfSiRl3aNx$ z{9823XMD=SG{+mg?}{7qjCNmY>%L`e9lM5`*~`z6_WyIyw%q9Ai>RWG^;p;BHtc7| zc24pRSlR5T2469fz6(f^wo?49Tun2+acX-i7On??>03GY;R_|WS&rU1V@R(}*~D|S zShclD8P#Wj)ur-QV&IyiS?Av0aS( z3@DOl#P&PB;0n}?>=?G?6JC9ieZF>nd}M?v&IDXQLMOlMJg#Pp?R+LMS1)0n}Xo?zhqHfpO~^3Ir&saGO1@SR<}eRugsaH ze#T_y+7UPwo0rj5OO971Ij3MHGQdZl&uWp9oB6q-);nQZ)FN$WJ%o36`yoS1Q3(uTgim0inEZoEhmy zb^YCbk(>RVT6baktd<_PPpaKHlsJCRlxa#UErS?$C&=BCzXBS ztzSH4Mo?Zu&(n;bG#r6#Q&x=t3%~LIw+by#2GNMwAwlz?V+py7RFofA`*P&EGAFm_ zKU{y?Kh^pqIY9k|e;`GmxO-TaB2TR8cSTEWXgX~9+mAfL7=zJ)!(v1(r*y6NV=d>} z@~r@)gu0gKEOV0dOukCY?K|%q!Ja)Tfyl6_{K_^p~w zF@@Dp=C6GzzfG5H=bnl#{QTKmY?kT!Wn^^zP>FE42y%*?h99p}f&`k5A4}0sPd)l6 zEaWkUWw7I&r<9oyw)sjpev0JlP?gnF?fl^eu?u_kgL@6T@l%#}@zRVU>7fg!=*hD7;+KkNoUfxOKejsy| zh%cn?TJ6>|VPjM2uxbV%<6?j(PSN;KWnH<)wO2%Ngzfv3a534E1lK5`OUA0%^xqS# z@jfjm$<6u}H%5MMjNPJ7zm|}xj?adqDJQ3?Gf~4eek@Gr@KkKRS>4L-%++!xVd?|L zWs`Nm>FsXEo0)Hk_d~x=TV7oB>D(CztJfP-`90(2yBrzrM0yaz$fz)gx4*;SALWr- z+M*|Dvp^5jL{B^3NkwGiQ(fP>C+(jtm{PYEwuZf#lQA&u+^3zga4x8bSqWwRAZZK+!oAP(25`(h@8zIO%c_pRAl51+Pcg!{x7PWu zjETh9YW*AoC7RP8RuX;|o^1?Fe*WNFE4;l`gyK)Nk~aBl$x|#kw7Uc1Hq(vcS}@+x zwIJBo(-+nWi7`X8uj#hgS;~gsWl8AHnFn!WeKUT^MKO67n#CvOTFEo7!oO!em9%b> zqP+V-^}->MHdtQXSH3z+d%v?1KDOyvB+(l!^Ys2(Q20)0{`NC0q%gaLFQz`J2;N3T zS+^SK;2M0N_o;=I)p@Ww;?MeFz+#7;ueqw0G=JCG50f5|RbiLwMyRf$UF01)w2U3| z6#i`1l!w3Yro%OzBj(kvk@|}Mf)3WvB13jOSAK93#80gBZ&=haZ z2V~}7L(*nF?|BCpbnrL-2(McOk4w#{3>y2M=E6I}cdmYJ*}Bq4Cp&pEu)@-K^~SO` zQI!VmhNm-C>&|@_qt6P+bPpogtIL(;ye!`pGcv!~U`*1$T1mfOr@oHuIiOgCyo&Q_T2TXLElc1cPPpi?UMCmomleJaSD4MXy6erUz@=RMVf z?laC6SN)MeUGDdGST1I=W94SS9Gz2~e|{D}fBDugqbQcSMRl9AW`EhW{-UtJ0A1>f zb-Mf*v-)9E_^d1}x$$`k9JPD<QC&W688NMoI`BNntL(4X6v6J&}s?5;^`>Sl4g!jT-abajqau_Da99R_dBK$Y|?a zMS3SYD(+A-?;Hy(0R8c*Pcb9w@74vw@%G=DQg{82H1N)Dj8II`(uCF}719D&f)Yv>6o#Y2Vi9fFL5trr_dD#9(!>5x< z-4}Ar1UyA6qVYW>Gb}c0@;62}5Z!dPCDi_*A>5ZMlxn~ihm;#X-u?wMolnzyH=+TF zeKRr`Kgao1r12GG*f9B3e`6cfjUJ;xwGtbxBy)UqRw`eUoFM)5N=ghMmow^Y>0H6T z`rqgM+NMe^h+7do#Z9v_5H0W6%oQGh-P+g~GHatLQ_YrT{ozGL;6s6QaJ~Hfn{`*Q zmZ`Q18O`9`x37Y7S6rpjvgh*JUS~#Itn|ybk*y2jg^0>2vbjF0^NV|&V;ZmfBg{YV z2g3sBdWWm)%oy2qX>w`|DV)l;jr|as7Of}E&dsKs_*g|-&ih%Az7|#)32#Hnwke*= z6HtOX;hBF>SN!<3^{a{4&U(EU!SB`I+p)`b?Ydt2(7x!Y>rEq}M2iDz!!sdjahTM) zk4HBtxis~hTF#xBI-hFxyZP8_{W4yc-rIzD{jgqW{^be!@XB-_KfZN9^Awc-64-a8 z)=IMY*2tlWb7dFT-~=o>stepl9{$GJDgU8=Tm4&N`$XMY?MYeFckh7r@1Lth5dKJp zoT!axF`(vpp+yQo4&pc+S9qLq!74X$i&|6PJH15fI)*EkR_ijm7#Z6-3F-0`f)@?9 zJKoT1t)!64Y2ayja9G?)#-S*4W_J(i{6x=_TQHUvDuo}x#LJ-hBBZm6=f!geS$V`y za~O`*+J@LcBE#n3QN~L1$r_Q#UJ?(0g4GV?wT^F2Pv$BgId&R3HG_7JJ6Kfxr)W(i z;$MVS?!9t9jw^b?l-o0&h(qxTYB_Ru#fqBo+w&I-Z=b&sRggDHh>aO)es1QLn?ppH z9M9M$wj$bNXt=&ZvbHgM7@Lis${NeOx7G5COu{2>AYDS;Jnp2ft?aTt+;QZ%yT`Uk z`7Gn%oM?wl6K$?-)3U2>L6BaE{WX`In=M-4tXtipi`@u(?p!Y_S)jv7MzO&oN@`fG z?m@ALij3T0RM;jMTz>jbF^N5U^wC<%alDTjR-*DJWAoHHrTm+r{%W^zvHP!!lvuY} zbFc2H`Dav<$cq;aE1NLpr;7_>^8C)|-^&(OXJqvZGYJ(QPLb67;fSzFL4 ziw-!yQL=4;6L^GjDf8byy(t=(A7~b-T8lY^_e_$@i9=3X6D>SDbz!@iJ#~6bOvBFU zuqWN+KAySkAGeHb2;2uPSo*sqVF8Rh?M|W_U)|+z2mJE;xqM)rrD)zR8Ng>{jXb8g zWFYGN)odN9X7dHGFk|>1sD(svQ+a=M!6qV=axJ0sgf5F#J*u*S4 z5C&Z95jqT|F3^^ic8m-8%)Jviq%yan4@gc|tKa{cxz2{z&2B@oKkc@Dc{#wD%Dudc>2c32fxE-GL#eKJ zFBy}iuH-#;w!g~QuiZ=(Gn-d~-HgLbeLpf4$uO@+%{IYQDaL2>HG&3=w07N0_-)a9E>!N)4^hEHP=*EN6p z&zd>Gx2t0X{-iXY4qXK|eGDIi2`zjNtwsC#>?V@)`h}S^Y*Dv;A{~8Vi^vIA}5<9Oc`g|d=sMp(`gFbj`9FZx2}cI!9mbWuH*p)L~o9E)xo><4)Bl4bGp z7*W#OkX8=LFUiG-tC@gDb}EE2jUzo|#4&z#n?;5Niy5=^4pr9sAEGt_@ek=h`twX( zJ=@vU8Hfm%E{t9uo#}daBr}f~lan9Itb~FjP1wTv19X*=4*4>TQ&Llscpu1M@T7|l zJs<>Kvpp1@oMQO}sugrtFdxbEEZyl<)A4MIaPq$k2tCw(*K^_FN+WH*mi(inF}muQ zg&+5eb#x;ONNt70EH1Bn)oqv0PI7;}C~Z-=4_r zI1j?x1J1ssX!!f*R-w3du3GhCL8Vk_Bh3D|?Cqt(UZ;AyN5nPPerl_txjz0*kvV3x z4&*!~(v!kA7IrK9qB3&xek#2JLg21+QitfF=IzgSXuyr-8NTv-zH3h=GJq$Gw?A?~ zm|r_&mQRrzn_0==59N~!Bd4Hn862tDV755?;I^Qg$c}f|t>b8th1{`0$(}*U^ZkWv zxOIGV2ZiZJryyLby|}=@9r!*jo&OsgV)rIDdu6u|C;X1ji>qsE?f(d|KJVC@uDVPt z2^w2PWTZ;@Lh6vQoZoC9X_+H?u{yXo(K z!v)a{CKtd4F26JQkKvF;a&sDawF{$DtP*!|$vi=J!3-PJsomkI2Si7h9tC*mce(YA z=Grd;9Kxa;fp-@`MZ?>O(ViC*8hQfEkDRp5`NmO@h(!$_*okF(oM2ufaC@{wT|5g# z47p&k6v>cy*VXhMB^WI{1Is>&FmuvLZpZ^d;27VaOEOLJT9exKf-~Wf4s(|oc`+~| zT|3Xk6emREQz!(R&JQoligYo=M*x?M99&-l!$3Fq(1;#PVyrHquz%(pS3n1eCh$Op zcULL8L}c!OM$D0It+H)G-bDY04@ue_P=$eG{&F0S;EU}qYVAjD%GT@ zEOknC(!iysyVd#)0n3|RdsHr|4!MCTnDi%a(_BxE9pMdjfDsD#F?KhRx)PBBVn z3f+3mYq6q?oQL7wH<9T){bS-KT9Bh{oHdrk>paEjmX9MHcZ2W$wRKLcUHJ+*QbF5% z#Z;#+X$}+a9&ETcg?9DZf*i)_*b}2j`E*E44-Qw!9~j@UN6Mm}d$>4!ZB%sDeyB;3 zmaDnG>m^0&%^;1ySxW0%!R?~Mr1weLxsPHb&gD<^%;-%`Gjq8ay%qaY8@E1)=V2js z=d)1i_=awXHvASXGtOmq;`(vu^^8z!>Fu2BriA0+Ee`Y@;&z|r3X37_ycm9IT`G0* zDBo(J)@IhZMYT|{jb?)A*Cf7GIDFN~AeJmBvt)7!QFfuXQ9D*O%#j!hzivui^}cO| zsDGIMGw>|0-`D3D(F+nPwh9TBGhIX6xg zyS{%p6|}$*G?>gZ%%Stmx5S*1SLwjp@UsGE!d3QH>#2&@2aTJk$qq;S$*weM$;`d62Q8SW6z0fM_%X^9Z^Gw~8A}X%)rj>gwlhg-Z8x{zK1j*g( z=NgDr^afw}vqyHzbkpvW*POpyq=5Ki96`&u-pP)-!MDx8yID<3Gu|tF20)uPu%ANz z{7GU|%VUF^VY}W%-Ab~HeHui`@O7LfW&Wf^m-HxdE%1)hWwNa2%h~?t%m*<)(9CJL zq%Oc)&X+txb?=@wf8}JGS6Ef`%;%)apnqKo5uWBTa(s1qHCp34-bt_<_2l)S0N0%u zQ}LjRD5_u%I)4=4cgsHRY@3557wMb+ocH}QxD0EC4VrJ!{Mogw*Mh)vReCWOPL(ew zCb`C>uId)OGlxoUI&Ob>dGxBe53(81WnQiYt<4{bD?l!ian{m=TBKzoNa@Oj4?ZY8 zKd?Wpw?RAFt^O`16+Q{U?dKQ1sH!wvx%p^!HXw z<)tr4v%xxBCL%{NUraB?Kbh@lM_nZ@6Mk3+eOIqc;Pn$kC18qH+R=dLg0HrHv)8bW8ZECAw^vz{sE0a{d8Wfsh z7g=j#lfv!`McdqVnW;1lWvT%ma_!#i2G1J}>Pjj8bAM zDOqq}v_yKsVTVNR8OB0QC9oKWkPg2>nypDw3GW$jl@zPW|IWsS1t6Ol0w4STi ziuFH|c$H*eFaS6UHl(?Ob)9Uqy$#GKR{2I%6q!at3FE&C& z(*5hE@6Df}rp)7hlgp6!#>ZlFBVkN)F?+oNi_}T!&)Tmt+*;m}_#!sU7ZYQZg=NhJ z4n2?P{_(CZzb=kfO{OZ!XqN4$B*ibs$R%_#V&!NR-xa^IK|=T42AVh`gIn3l zZ7!eG2Gg3O<(Dp~7e8LoBruv7Zpssx=3S#jzQvJa(HVk1$?j*Xnm zc1l#D)Ro1%n1tq~x{p`8EY-f|U`&c(4113CJb;+Ii%BVynz6{I;@w1@?%*0Z=oL8R zjCIR+TrLWGUPI#!#U2OCF1%P~-p9iBh}$(W1kAKyHBf4+dp>&3YyG=|^B_?Sd&%cE z+;NF>iN~ROEB-L<O1t7K5wONeofBp2>k6l68JCEvH^lp+We%^p85U|~M4Z(M z^%^;u_fBnT5ED-LdW`pq>O5QG~rBz?iU=8HTL*^yxjwi(}1kZWSESpV#>PqvTw@2Y+c#zmHg z#Qw&DD>J)xF?w#(BJw^_)$G1Lxny!^zbZ95T9h zI{rFAa(8`BgNk8u5ch3@H9s0e8t&n;VxQ0T9&6LSsFu&oDBD^n;&SmMbMYp^Yjx!C zdS+*8VuG2(;N*7T^y6(RT*>FQNvfB^0Fh!6`1q4n4uxHV-jMfUH`KiarbjcT#XUD9 zwtZpj^~MxDzHtTdz08Q~8%;>zBzf@Z3&gF2d%xI1{$`|>?2R%VnHXZOZX1`F&L8AV zXc>CUDlyOSQ}P!3q2r-khggXOKs&3OYWEb zS@2YXJ&?e(YF>bHP~pi#UZzYT>afrUJ&MBo_9^oZJ2Pqvqd`^ zrIidFhztVOmne1ZFzwKNELp@W(Xg@p)ZoItEv3c^w)tt1iTz3UW#U!eU74QWIYwuD z)9X^R3QyFwjsLuOJOfeFh1kMxE*EyZC+wyevtQz+)lk>ZpR`5#Y5?XBgTZyX??d=I zQAvv+l@vW_u1bF)3)N-|a0*WiBAxim296S5Z-+nqBZMvC&G(?7_AigxMDEktiJj15 z+2WOVXY+IbQC}HGl zKqgQy&0-;E1e4+SrLagLo{nBm?jjYx_Z(MbVuhe!LO^CI9;) zdcvcf$$6wh{NO7(|A(LCiRi$}M-DaVf9(T-_siKT{lh+<>ALV;tM%>N{^mOP$F1o6 z9XKU6T@E!!_^$A7j^W2#<;rWDtxfv+ACH*~#40{I42mkHxRMIV>I3N*UYGM;NZ@_2 zRs-q=+a$IPyV8j!>dv*jWCM!dImG#VHVj&38oazQYwpw zp6Yq7&ZoSia{jrX4LOd$T&wRe?@K@DiUBD}rP;oyyTv>P*}@2YKBbV*Hbr;S{y|Vq zON)d$$&m5f)SpW0UzQ?Mu%@-?N0o;eO}jkrWoXn51hp~@U|%H9e{f2RZXA)F#E32a zmcr@=Q+MMAr`l(#!c4?|v5?pJ3L?rDu*qu%#oEsY>oiJ1tCTtCZZ9`|V-vsAZ+L_i z#(D@MS4u7lA)5r9Lv32NxgTPpq9*>HkW(|Nts!%g7BgRiS~X^s8F^sbYO!^$b7kPRuyGUhmD$AXFYoNuFLgdQ_$6u&uf+&(A%{w_aG$b?>Ko&(9bER5@n1p z_B=LI!ZnjUKZ9FEbUu*rL%mq{sEUN_S#o0&yNC4;@tQS}#n}4IhH0i3eCx1$s^VdE z+HayJGZ5b0ti&d#IeHvwpI5gwHv2%f_=CuPXUCd=r8Dt`bW?z}9Tly4khhqUgBK z8i`+(X6;mJUPgQvzsoGu|F8-nscv#u*i^VD>i(s<+BVzTv3EG>HG|&BNEV;SyUFUC zOWzOlPCtFr=iMVGrrE@~7Etr?&A*sdXP#_*@4bCkvfE zO?OEFol~@J54YsZ>Y^%ux$29~`gh{F$Td!s=y4%F8kO$t5Q>SH&(GILBfyMqBmhNA zt|ek!R_^Svy_nF6i3G;BHX5JK!`AbMO0QI9Pxy-l;zVw?7%CiLp}p@6g_R8%>wBI|M*5Kmq^qtqQINLtAg{wx)WTYmYzMj|RpqRHCyo66d<>w;Z5M85Uj zH_CUh{_y)%dza&tqY5kiQ->6JgyB9Nm*dn?$YV&;1 z6-J#K`p!Y`I37zF@iJ3)#|oRy-x(BuatwrlnVO#KvFz9s+wbD;ChB|cyiP`6k@Kjq z#nkSPeSPpdIgOrHIu(M@@3rCW8TW1Jm~^EnDQ=u*ayyrT)|cstdYwUI_>_p;W~4WU z#8oFX^nCkU90&2-EZg~;|Kfj`drd*qn_aUsQ7qb>c2|!PcQmDf0hIPk&T`e)~lx7=dDL>e_Zt8ZcF}e z)Whz5s(F1eMrAkn0L00F|Azv||7;K<2l^EA+cqOgqGH8O&M#&)L;5ENGo==FT86Li zQca)*4*q8y$dl_f5WeVY(B?Jn z!nGrdAF}h@q<;KgVF!-@WYM+Rz1VsR^z5*n&ogCK^pApLtlsZp2vZ=<Ginz{n1TPBFY+_X zd-^CSWH9;X(r<_g$w=)hd_d(IgPycCNjUlyNW}zD)n^rxK+%*c%((gO%RW$^eS=5* z`Xqr8)Gt3BhlO^?vw|xZO?M%QB|S^^vagVb1o#jTNin^|A~V@?>p!+qlS7tfpVUWy zW8N)a$ew^Rzirb(QZNEmDSH3bVc`PzK&e+&7!oZGrq0_VgKTgkY?O;~)S#Y+tfr%b z#{gNweKlmcf%9Vhu&ng)3-_hA0ABB*W5;oUNlz6`8C>TYuT9Lvx8fSqojwQsB>fIBGk!6t>*~ z9#gqkb%-y{n}UY*-fq`==gj-V-izqTT72RxopHsOm#6G56hBh_qH9<{Zcp?7Q>gHL z-Z3yXJAwauL0~L0OJG+2m(XuaAV%WjaGFDjF8tN*9qVy0ANsmFX>F%Z9yv~kEv7c? z2WVxOXX){-(=4^%69a@-(9KW`|=xE8i z25q7|9!H=`CTR^J7HL>G+6?U=l9*RJ^mi4ErcpbHwk zv(SMrJFN}=rD8ZGe^{>Y4WsMIemQOi`(p;{zVm!I#P=5vr+bEnj4~qc59oLu@~ONE z&uFE8;u1-I*03~V{xqv5rjkY*MD1Nvb||v*sC+zNiD7H?n^rt4}k}EwS4dc@qlqz$B z+pb*ECZN3_Bj+F+Nxlp~;jXYfk}JWbSSr(kHzex(Wt8K!G>Z0l!BNuqw!_P#j-JZv z?1TU%L<95gQMebTnx>%!E4|@xw_KSfD6(+4jZ8f51!L2YFr03x@z)6$Oq{4e3^Stlt52VuG1N z83{X~tdeyJgxUEcL8iB#RT6QnY!f1el$DK%WH7`@ySX!rbUQ9&<#2r&w8m38F$L+&oL?4el;$B|0P7uMQ-4i>2%lbv*$-Sfa=_~&fYuk<+KcFTF z8NQHV>KDLjJ;6;1V4|s6S%rpC{&^A}9zODC`WqqyC6o;;y*Y zq&mB5xNV2jmOo4^|BGT{U??PV#;4jg${#drQfpNMp&6~p=Ne_YtQ3OQ1y@&~B87p6 zhsSF<_Bff}B1SP?^lhF}DznqZK(ukBghrlX@=7)a#@~{{=S_`>|0GQI7a<Vobz_9gZS*l$al0tO#7KR)@gIz+1FtK&+>XOFW^rogy{pr_FN?M9#C>3R(JpL`W0%e~3&NL`XkX9L0b{$$cX5X28yme z|2HGT3)2A^Q7tBL3w#v?CuvSY#~|$@duMpFvSP1>4ZNaR^QOe%GC4?dKB2FYbn(a_ z)2Fe!UWeF!bE7Ro%GGn>Xvd?)f;ZANRZsqvKf=0OU$+VEt>CN{wNQg>hhS+ja0y{} z){Q7#ZGBy6hvcDb>>40Mkqp)>ROI}YIkC?xyjP_3@L$8cyPM2>bEBMDeo zua3V-Tggzp!|E8dh-F-w^GL5H5%2FM;g|2hkWxDhOPqfUjfw+;WIdp%D=V1qy-iTi zSx0eNMC^5$gF&!)7svPk|J+E02sK_u1!S^wy>y7-7+GQsxnypUk1iuHhqT}fYEAel>hZU z4$iooj5OjkGVe&W`a}=uqBl~|Q~}Al-dlo!Nl=gdD0en(ZeU=n>wGL^vahhaemKk- z%Qy5f`#k0wezypcNB|}E3c{x2sYe&O&GH-b0WFHxW~=}s4~N9^@)H{V82U;sDbIU< zuL}m6c%W0KB51e3NJT|e*#Xq}TW@EZ)~pP!QzBn&+lqe#^AhSgaP<15S4<^=NJae5 z!aZ*{>coiIg~t0*&K)@qAnKC&~>rt$I|0Rw)6EXe{zbN2Awpb;~#+Iw67B zeK~++O~3UMF}dl$is9#vg|p^zCh*M>g|&>P!-O==Mvr2e@aqWilgBVj z@QUkKLJayk2xiISlljb<>$W0YN8q-#P*v-g}jV<_iS*`%rK?f2fKN_r(vpH87oRWk>cUaHP7NeQ-W-D{VUF0sbd_ z$SXj{`8fZU+Wlgg@d3kK`!og~kmE(9x-xsl?AF}~K} zz7ZaMtod7)QkQG$C#RVR-CO^=|BSI>s^FlGNdXZg6?@KOm!q%%2>Dayf%h9~+ywar9GLehhJdi?k z^Y8qR8|E{QK;E#I5RsX?ZS$WA-u26VgR)VsMr#`z7i{IT#iHuEx_;LDj3)#G2DEG$ z2CN3~K24Yg4Ph$H?Urv7Bj0@ZFcGo;s=Bk>&m%pxN&QoE0|mKz|DG60#*u?V#8Kbm zZ{`)>vH1%;%e?^Z>&Y;Or?QQgC5Lhv&;eyyyghK!t*p|Qji?T&s}DcxRq>I5pJPh> zc2sm*Rd_(`?}vHNYS8b$w)}7B!Drg{Fj>ehgDd*mJxLqE{hzm{16$F&RvWlV`ma!> zb{^E7>#H|0vid@>a0eOl@I~^#;eHlliM?S$-zWS z#KHQ+tSyX5M(scR*qdJU@j9Ds?SHFz(v&Qx`Oi`n*^~X|gr9c_ zY)F!#52E%c@Iku}nABBwbq&MS`wvD@VF)%6bI;{w=AnZBm{2RCgJ%6QzIeSV^EQoB z*Pz1iv)6X00kp;l(hSb&4?Zbig;>#oGnl_2m{DG z-*NjGQ8MIEf|)azB;i?mv&JIzPrz#zmX#Ap;>WY!u`$~$sTzt2$hCkLEW zO^LST>nr3ES%l{5chFzXATDV+x9x*Ui6nj)pC2s!E=E>AXH(c=92T7mP8Um}HAJy5M!T$w-bouglIGWcy{{5?)3~oV`ot zH4TX*C*`&%vICEb{g(h%^99M7L*ga>Dtho^Es3Fm#6s6zw_N4FgOq?hKjYr!3yUD_ zy>sD~j{s~v%p;OmP;=v0UW{M(EI&&7nG2kgjM84b+yIOgZ>{z83J16d2(*h}wDY7f z;N^gC7xfvl`|v6V9KQ;>3myYZF6#5$e|>`Ty~o@3W>g3Hqe%Kyp-C|qmtu z$Y0zqSg#+{`o7>NKZ=sw6S_>&dVwz0_zGO*ZrCJgT^(lZH^7n}rA6Z^60n-+0X|L- zkZA9#4I4Ih)eq(rz`-Ce?CdArf(-K89B9rC0i~iG>Ner<J{TqCv*2B#^fd%ghaQl|@bm_||L6y-SgK&w>K@%>*)B!jFQaw$8bi zt=v$W#6%%?pge5^F7oPgdkKWHB=7Q6E zJ8z;-&R=}@_X)j6m*d6?Yb<80$!oeL4(1v$;JBiNVNUYH?$l^|j;8z6lIY}=YeP{R zofdc`Y+ho~@kl7ih13?mka-g-d1pSj^m*wO*?@hX#aGp05(kbe)|XqY`TN*(&k`-W zBkA`;pa#`6yxbtg!5P~@|C8=x36m)`RgA88^gkPiN@J1DjlE9;%gu%N|D0t{XBKhX&3q1Kp(uDRJC#| zxza6L-xu#g&ouiXatJVHL)+mbs=kn)4`_0PzFwDWVA1`047=JcgknhSgEk|7B%3_< z=%KQiIlMPeT`sj^6+f4{Ol{x3(g}y$f;_;)S}SD!R^USaX1D&Sz%Wmq#w{+o@F+jZ zRw;>^U8l>rF4{L6m02Pgxtdiux0y3d-K?hL96@f2oflvL=XJQE7CuTd)TW%`?Hhwh zpQIf@G;d?TUz_4o^qk{*ztoAc1ekfwSO?0d3+)!#X9mcJ#b|+D9m*TLSZx}$E6pI- z_6E;O>#_R_&%>A&D@#db#wPN&-2*V)NOJY_*K;PI)r;S48b{sShO=2Vl`JtyzMP}h z=&sGpLp=B(DVuNmt$=fx%}z1N&WOp%`%vnu$RoF|zj<=g9tJR+c08d$wh!7t&_ z^xAm|WxH<8?+M;`OyIhx9;X4pQRNl%!j9Xwen(e)c9^8lLMfD0y;hz zh#>`z!o0bol8uf&Dwr|M3ie4j%sFK56kRim7en4s-=Pbw@Axt z@cO@xSq&9#5#NX!x+`GiLv5$8O6Ee=!2al~B z7Ti6uhr#xXJZTvLu0Tuuu@+c6CliPVufhqUwAGbb$J12o#7WdBK-fiVfqmI3Ct;nz z*j8FueF)%`pIKwObum!p8ewJMn$qWXFtM>EnIoT^I^q|avv#FJp-3(?+GlO1hvR1* z{)O;m#ZlTEef|0^mcQP#hB~uoKWKIBC-5CoBdJ_5+RdM=+iVV) z)$Bh7qQBk*FOY#B27fP2QzO~^+ngQvas?01w8FQUHIn5?3wxdg?Ky?`q*z?!(9zjV zEf5KE2y?se=$XTL%kb_ zBj&hJl1Eqg$Po7#LRxqAUU98{_8J)ecjy@kYCF$`472|jg|~pD11_a5h}_SvPS7Vb zxwh=nAfnWC{+&=5>6m6WHgx?5lG?Kq2k}9LcH=b1fj}$}N3Kch{lb8T017+)`)v~Z zBOUZSlnYn*1@MFQz_pb#-pcz@wzg)Ts`?p-aHDLMM?z1F?}B9N8mhU{?q3L4I-@8k zTw!nz%vcf-Q~Sm`>Zc64nG{nn9>R|3vAHxLhQOZ%c#xk9pPhm5j6n)JX-8kyy`Tqm kLctvyuhn-t1Yd7_%>*b~2!l~kkpJXgC_gWgG5qph0NpVx(f|Me literal 0 HcmV?d00001 From 2dc46a1f8e547127a1d50e66118c4f024061e170 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Thu, 17 Aug 2023 01:54:20 -0400 Subject: [PATCH 19/40] Further update README.md --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index cc0b702df69..590a8a275d1 100644 --- a/README.md +++ b/README.md @@ -90,9 +90,10 @@ Remember to set the `TGI_CENTRAL_ADDRESS` environment variable (ideally for all #### Chat-UI -It is also possible to use the [chat-ui](./clients/chat-ui) to interact with the models. +It is also possible to a simple web [chat-ui](./clients/chat-ui) to interact with models running in your server/cluster. This is a simple fork of [HuggingFace's Chat UI](https://github.com/huggingface/chat-ui) that communicates with the central controller to get the list of models available in the cluster, and then connects to the corresponding servers to generate text. -For example, it TIR, you can access running Chat-UI server with *port forwarding* by running + +For example, it TIR, you can access a running Chat-UI web-server with *port forwarding* by running ```shell ssh tir -L 8888:tir-0-32:4173 From e7bff7bec7c88de3c3f8d25db7bb87a028ad333a Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Thu, 17 Aug 2023 01:57:11 -0400 Subject: [PATCH 20/40] Further update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 590a8a275d1..3d023330db5 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,8 @@ and going to `localhost:8888` in your browser. Check the [README](./chat-ui/README.md) for more details. -**Content below is from the original README.** +*Content below is from the original README.* + --- ## Table of contents From 4f433f64600ee385a999843dcb45a8f3e9ec3c53 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Thu, 17 Aug 2023 01:58:25 -0400 Subject: [PATCH 21/40] Update README --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 3d023330db5..940136ac7da 100644 --- a/README.md +++ b/README.md @@ -28,9 +28,9 @@ This fork was created mainly due to two reasons: *For contributors*: If HuggingFace's upstream has a feature that you want to use, please open an issue first and discuss porting the functionality independently. Do not just copy the code over, as it will be rejected. -### *For LTI/cluster users* +## *For LTI/cluster users* -#### Getting started +### Getting started If you are new to using this library, and as it has being used in your cluster, we recommend by starting with a *client-only* installation, and using models launched by other users. @@ -62,7 +62,7 @@ client = Client("http://" + model_addr) print(client.generate("What is Deep Learning?", max_new_tokens=20).generated_text) ``` -#### Running your own servers +### Running your own servers If you are an LTI student using one of its cluster (or generally belong to an academic cluster that doesn't have docker installed), you can side-steps problems with installing system dependencies by using the [(mini)conda](https://docs.conda.io/en/latest/miniconda.html) package manager. @@ -81,14 +81,14 @@ conda activate tgi-env make run-llama2-vicuna-7b ``` -#### Setting up a Central server +### Setting up a Central server If you are setting this library for use in your group/cluster for the first time, you will need (or at least benefit) from setting up a central server. See the instructions [in the package folder](./central/README.md). Remember to set the `TGI_CENTRAL_ADDRESS` environment variable (ideally for all the users in your cluster) to the address of the central server. -#### Chat-UI +### Chat-UI It is also possible to a simple web [chat-ui](./clients/chat-ui) to interact with models running in your server/cluster. This is a simple fork of [HuggingFace's Chat UI](https://github.com/huggingface/chat-ui) that communicates with the central controller to get the list of models available in the cluster, and then connects to the corresponding servers to generate text. From c7d8e9daef0c1285c00ee45ec2044ea2ffb90c62 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Thu, 17 Aug 2023 02:00:17 -0400 Subject: [PATCH 22/40] Move core picture and remove badges from README --- README.md | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 940136ac7da..0297e999827 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,7 @@

A Rust, Python and gRPC server for text generation inference. @@ -112,6 +101,8 @@ Check the [README](./chat-ui/README.md) for more details. --- +![image](https://github.com/huggingface/text-generation-inference/assets/3841370/38ba1531-ea0d-4851-b31a-a6d4ddc944b0) + ## Table of contents - [Features](#features) From 355fd66abc42e7be50fb933d49116a711f314f82 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Fri, 1 Sep 2023 19:57:24 +0000 Subject: [PATCH 23/40] Update setup script and minor bug in benchmark --- benchmark/src/generation.rs | 2 +- server/Makefile-flash-att | 2 +- setup_scripts/conda_server.sh | 16 +++++++++------- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/benchmark/src/generation.rs b/benchmark/src/generation.rs index b57c652b9b4..85f236dcf94 100644 --- a/benchmark/src/generation.rs +++ b/benchmark/src/generation.rs @@ -218,6 +218,6 @@ fn create_sequence(sequence_length: u32, tokenizer: Tokenizer) -> String { encoding.truncate(sequence_length as usize, 0, TruncationDirection::Left); // Decode tokenizer - .decode(Vec::from(encoding.get_ids()), false) + .decode(&Vec::from(encoding.get_ids()), false) .unwrap() } diff --git a/server/Makefile-flash-att b/server/Makefile-flash-att index 268a635a350..d938480e094 100644 --- a/server/Makefile-flash-att +++ b/server/Makefile-flash-att @@ -3,7 +3,7 @@ flash_attention_commit := v2.0.4 flash-attention: # Clone flash attention pip install packaging - git clone git@github.com:Dao-AILab/flash-attention.git + git clone https://github.com/Dao-AILab/flash-attention.git cd flash-attention && git fetch && git checkout $(flash_attention_commit) install-flash-attention: flash-attention diff --git a/setup_scripts/conda_server.sh b/setup_scripts/conda_server.sh index 4740942a646..7b1e06a17aa 100644 --- a/setup_scripts/conda_server.sh +++ b/setup_scripts/conda_server.sh @@ -13,7 +13,7 @@ N_THREADS=8 # seems un-important, as it only affects BLOOM/NEOX BUILD_EXTENSIONS=false TEST_EXTRA=true -BENCHMARK=true +BENCHMARK=false SERVER_WAIT=180 set -eo pipefail @@ -31,14 +31,14 @@ conda activate ${ENV_NAME} conda install -y -c conda-forge mamba # check if `module` is available and unload gcc and cuda modules -if [ -x "$(command -v module)" ] -then +# if [ -x "$(command -v module)" ] +# then # get list of loaded modules, grep for gcc and unload all gcc modules found # TODO: Fix this, it's not working # For now, unload manually # module list | grep gcc | sed 's/ //g' | sed 's/(gcc)//g' | xargs -I{} module unload {} # module unload "cuda*" -fi +# fi # remove possible extra cuda and gccs from path # (not sure if needed, but added during debugging and kept for now) @@ -49,6 +49,7 @@ export LD_LIBRARY_PATH=$(echo $LD_LIBRARY_PATH | tr ":" "\n" | grep -v cuda | gr mamba install -y "gxx<12.0" -c conda-forge mamba install -y -c conda-forge curl git mamba install -y -c conda-forge "rust>=1.65.0" +mamba install -y -c conda-forge openssh mamba install -y -c "nvidia/label/cuda-11.8.0" cuda-toolkit # bring in the conda environment variables forward @@ -57,6 +58,9 @@ export LD_LIBRARY_PATH=${CONDA_HOME}/envs/${ENV_NAME}/lib:$LD_LIBRARY_PATH export PATH=${CONDA_HOME}/envs/${ENV_NAME}/bin:$PATH export CUDA_HOME=${CONDA_HOME}/envs/${ENV_NAME} +# add cargo bin +export PATH=~/.cargo/bin:$PATH + # add protoc export PROTOC_ZIP=protoc-21.12-linux-x86_64.zip mkdir -p /tmp/protoc @@ -95,10 +99,8 @@ rm -rf workdir/* cp Makefile-vllm workdir/Makefile cd workdir && sleep 1 make -j $N_THREADS install-vllm -make test-vllm cd ${DIR}/server if [ "$TEST_EXTRA" = true ] ; then - make test-vllm python3 vllm_testscript.py fi rm -rf workdir/* @@ -145,7 +147,7 @@ if [ "$BENCHMARK" = true ] ; then OPENSSL_INCLUDE_DIR=${DIR}/.openssl/include \ make install-benchmark python benchmark/dump_fast_tokenizer.py --tokenizer-name=lmsys/vicuna-7b-v1.5 --output=/tmp/vicuna-7b-v1.5/ - text-generation-benchmark --tokenizer-name=/tmp/vicuna-7b-v1.5 + text-generation-benchmark --tokenizer-name=/tmp/vicuna-7b-v1.5 fi # set default conda environment variables From b9cdcca9da38579cc8264cc99c98efec022970f8 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Thu, 7 Sep 2023 18:42:13 -0400 Subject: [PATCH 24/40] merge --- central/src/main.rs | 4 ++++ setup_scripts/conda_server.sh | 17 ++++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/central/src/main.rs b/central/src/main.rs index a1756188517..cc51b453f89 100644 --- a/central/src/main.rs +++ b/central/src/main.rs @@ -20,6 +20,10 @@ struct Args { #[clap(default_value = "60", long, env)] ping_interval: u64, + // The maximum number of failed pings before a model is dropped + #[clap(default_value = "3", long, env)] + max_failed_pings: u32, + // By default is None, if set pings a server on launch and if alive registers it #[clap(default_value = None, long, env)] initial_ping: Option, diff --git a/setup_scripts/conda_server.sh b/setup_scripts/conda_server.sh index 7b1e06a17aa..4f4cba3ff83 100644 --- a/setup_scripts/conda_server.sh +++ b/setup_scripts/conda_server.sh @@ -3,7 +3,7 @@ # It sidesteps system-wide installations by relying on conda for most packages # and by building openssl from source # TODO: only got it to work with a static build of OpenSSL, which is not ideal -ENV_NAME=tgi-env +ENV_NAME=tgi-env-test # get the directory of this script, and go one up to get the root directory DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" DIR="$(dirname "$DIR")" @@ -25,11 +25,18 @@ then exit 1 fi source ${CONDA_HOME}/etc/profile.d/conda.sh -conda create -y -n ${ENV_NAME} python=3.9 -conda activate ${ENV_NAME} # python can't handle this dependency madness, switch to C++ conda install -y -c conda-forge mamba +mamba create -y -n ${ENV_NAME} python=3.9 +conda activate ${ENV_NAME} + +# remove possible extra cuda and gccs from path +# (not sure if needed, but added during debugging and kept for now) +# export PATH=$(echo $PATH | tr ":" "\n" | grep -v cuda | grep -v gcc | tr "\n" ":" | sed 's/:$//g') +# export LD_LIBRARY_PATH=$(echo $LD_LIBRARY_PATH | tr ":" "\n" | grep -v cuda | grep -v gcc | tr "\n" ":" | sed 's/:$//g') + + # check if `module` is available and unload gcc and cuda modules # if [ -x "$(command -v module)" ] # then @@ -46,7 +53,7 @@ export PATH=$(echo $PATH | tr ":" "\n" | grep -v cuda | grep -v gcc | tr "\n" ": export LD_LIBRARY_PATH=$(echo $LD_LIBRARY_PATH | tr ":" "\n" | grep -v cuda | grep -v gcc | tr "\n" ":" | sed 's/:$//g') # # Install dependencies -mamba install -y "gxx<12.0" -c conda-forge +mamba install -y -c conda-forge "gxx<12.0" mamba install -y -c conda-forge curl git mamba install -y -c conda-forge "rust>=1.65.0" mamba install -y -c conda-forge openssh @@ -153,4 +160,4 @@ fi # set default conda environment variables conda env config vars set LD_LIBRARY_PATH=${LD_LIBRARY_PATH} conda env config vars set PATH=${PATH} -conda env config vars set CUDA_HOME=${CUDA_HOME} \ No newline at end of file +conda env config vars set CUDA_HOME=${CUDA_HOME} From 359150b04b335959aea9c92cac9edf1f48006761 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Thu, 7 Sep 2023 21:49:19 -0400 Subject: [PATCH 25/40] fix coreutils missing --- setup_scripts/conda_server.sh | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/setup_scripts/conda_server.sh b/setup_scripts/conda_server.sh index 4f4cba3ff83..db909c3d448 100644 --- a/setup_scripts/conda_server.sh +++ b/setup_scripts/conda_server.sh @@ -31,12 +31,6 @@ conda install -y -c conda-forge mamba mamba create -y -n ${ENV_NAME} python=3.9 conda activate ${ENV_NAME} -# remove possible extra cuda and gccs from path -# (not sure if needed, but added during debugging and kept for now) -# export PATH=$(echo $PATH | tr ":" "\n" | grep -v cuda | grep -v gcc | tr "\n" ":" | sed 's/:$//g') -# export LD_LIBRARY_PATH=$(echo $LD_LIBRARY_PATH | tr ":" "\n" | grep -v cuda | grep -v gcc | tr "\n" ":" | sed 's/:$//g') - - # check if `module` is available and unload gcc and cuda modules # if [ -x "$(command -v module)" ] # then @@ -49,12 +43,12 @@ conda activate ${ENV_NAME} # remove possible extra cuda and gccs from path # (not sure if needed, but added during debugging and kept for now) -export PATH=$(echo $PATH | tr ":" "\n" | grep -v cuda | grep -v gcc | tr "\n" ":" | sed 's/:$//g') -export LD_LIBRARY_PATH=$(echo $LD_LIBRARY_PATH | tr ":" "\n" | grep -v cuda | grep -v gcc | tr "\n" ":" | sed 's/:$//g') +# export PATH=$(echo $PATH | tr ":" "\n" | grep -v cuda | grep -v gcc | tr "\n" ":" | sed 's/:$//g') +# export LD_LIBRARY_PATH=$(echo $LD_LIBRARY_PATH | tr ":" "\n" | grep -v cuda | grep -v gcc | tr "\n" ":" | sed 's/:$//g') # # Install dependencies -mamba install -y -c conda-forge "gxx<12.0" -mamba install -y -c conda-forge curl git +mamba install -y -c conda-forge coreutils "gxx<12.0" +mamba install -y -c conda-forge curl git tar mamba install -y -c conda-forge "rust>=1.65.0" mamba install -y -c conda-forge openssh mamba install -y -c "nvidia/label/cuda-11.8.0" cuda-toolkit From 1b5e7c06cce32fdf5b54a0827c2c33eab38d76a7 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Tue, 19 Sep 2023 14:57:43 -0400 Subject: [PATCH 26/40] Update notebook with async vs sync usage --- notebooks/test_client.ipynb | 166 +++++++++++++++++++++++++++--------- 1 file changed, 128 insertions(+), 38 deletions(-) diff --git a/notebooks/test_client.ipynb b/notebooks/test_client.ipynb index e4c24b3cd2b..0d55d578893 100644 --- a/notebooks/test_client.ipynb +++ b/notebooks/test_client.ipynb @@ -2,47 +2,34 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 8, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/media/hdd1/patrick/miniconda3/envs/tgi-env-v2/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], + "outputs": [], "source": [ "import text_generation as tg" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# set environment variable\n", "import os\n", - "os.environ['TGI_CENTRAL_ADDRESS'] = '0.0.0.0:8765'" + "os.environ['TGI_CENTRAL_ADDRESS'] = 'tir-0-32:8765'" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 10, "metadata": {}, "outputs": [ { - "ename": "NameError", - "evalue": "name 'tg' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m servers \u001b[39m=\u001b[39m tg\u001b[39m.\u001b[39mClient\u001b[39m.\u001b[39mlist_from_central()\n\u001b[1;32m 2\u001b[0m \u001b[39mprint\u001b[39m(servers)\n", - "\u001b[0;31mNameError\u001b[0m: name 'tg' is not defined" + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'name': 'upstage/Llama-2-70b-instruct-v2', 'address': 'tir-1-23.eth:9875', 'owner': 'ltjuatja', 'is_quantized': True}, {'name': 'chavinlo/alpaca-native', 'address': 'tir-1-28.eth:8080', 'owner': 'pfernand', 'is_quantized': False}, {'name': 'NousResearch/Llama-2-7b-hf', 'address': 'tir-0-15.eth:8080', 'owner': 'pfernand', 'is_quantized': False}]\n" ] } ], @@ -53,67 +40,170 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "first_server_addr = servers[0]['address']" + "server_addr = servers[1]['address']" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ - "client = tg.Client(f\"http://{first_server_addr}/")" + "client = tg.Client(f\"http://{server_addr}/")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "among the most successful in the world. Our graduates have gone on to successful careers in academ\n" + ] + } + ], + "source": [ + "print(client.generate(\"CMU's PhD students are\", max_new_tokens=20).generated_text)" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " among the most successful in the world. Our graduates have gone on to successful careers in academ\n" + ] + } + ], + "source": [ + "text = \"\"\n", + "for response in client.generate_stream(\"CMU's PhD students are\", max_new_tokens=20):\n", + " if not response.token.special:\n", + " text += response.token.text\n", + "print(text)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Benchmarking" + ] + }, + { + "cell_type": "code", + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ - "client = tg.Client(\"http://0.0.0.0:8080/")" + "# create 4 random sentences\n", + "SAMPLES = [\n", + " \"The quick brown fox jumps over the lazy dog.\",\n", + " \"The five boxing wizards jump quickly.\",\n", + " \"All questions asked by five watch experts amazed the judge.\",\n", + " \"Jack quietly moved up front and seized the big ball of wax.\",\n", + "]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Sync Client" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "expected to be self-motivated and to work independently. nobody is going to hold your hand\n" + "\n", + "The quick brown fox jumps over the lazy dog.\n", + "The quick brown fox j\n", + "\n", + "The first step in the process is to create a list of potential candidates. This list should include\n", + "\n", + "The first time I heard the term “fake news” was in the context of the \n", + "He was a master of disguise, and he had a knack for getting into places he\n", + "CPU times: user 36.8 ms, sys: 3.42 ms, total: 40.2 ms\n", + "Wall time: 1.95 s\n" ] } ], "source": [ - "print(client.generate(\"CMU's PhD students are\", max_new_tokens=20).generated_text)" + "%%time\n", + "for sample in SAMPLES:\n", + " print(client.generate(sample, max_new_tokens=20).generated_text)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Async Client" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "import asyncio\n", + "import nest_asyncio\n", + "nest_asyncio.apply()\n", + "\n", + "async_client = tg.AsyncClient(f\"http://{server_addr}/")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " expected to be self-motivated and to work independently. nobody is going to hold your hand\n" + "\n", + "The quick brown fox jumps over the lazy dog.\n", + "The quick brown fox j\n", + "\n", + "The first step in the process is to create a list of potential candidates. This list should include\n", + "\n", + "The first time I heard the term “fake news” was in the context of the \n", + "He was a master of disguise, and he had a knack for getting into places he\n", + "CPU times: user 105 ms, sys: 5.03 ms, total: 110 ms\n", + "Wall time: 620 ms\n" ] } ], "source": [ - "text = \"\"\n", - "for response in client.generate_stream(\"CMU's PhD students are\", max_new_tokens=20):\n", - " if not response.token.special:\n", - " text += response.token.text\n", - "print(text)" + "%%time\n", + "async def batch_generate():\n", + " return await asyncio.gather(*[async_client.generate(sample, max_new_tokens=20) for sample in SAMPLES])\n", + "\n", + "results = asyncio.run(batch_generate())\n", + "for r in results:\n", + " print(r.generated_text)" ] } ], From 02d53ea1b04b684963df94740da81bb8708a1077 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Tue, 19 Sep 2023 21:04:48 -0400 Subject: [PATCH 27/40] Add arguments to install script and try to fix CI --- .github/workflows/build.yaml | 37 -------------- .github/workflows/setup_conda.yml | 22 +++++++++ .github/workflows/tests.yaml | 82 ------------------------------- README.md | 5 +- setup_scripts/conda_server.sh | 78 ++++++++++++++++++++--------- 5 files changed, 79 insertions(+), 145 deletions(-) delete mode 100644 .github/workflows/build.yaml create mode 100644 .github/workflows/setup_conda.yml delete mode 100644 .github/workflows/tests.yaml diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml deleted file mode 100644 index 1b20aeaefd8..00000000000 --- a/.github/workflows/build.yaml +++ /dev/null @@ -1,37 +0,0 @@ -name: Build and push docker image to github registry - -on: - push: - branches: - - main - -jobs: - build: - name: Build Images - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - steps: - - uses: actions/checkout@v3 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2.2.1 - - name: Login to container registry - uses: docker/login-action@v2.1.0 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Downcase repository name - id: downcase - run: | - echo "::set-output name=repository::$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]')" - - name: Build and push Docker image - uses: docker/build-push-action@v3.2.0 - with: - context: . - push: true - tags: | - ghcr.io/${{ github.repositor }}:latest - cache-from: type=registry,ref=ghcr.io/${{ github.repository }}:latest - cache-to: type=inline \ No newline at end of file diff --git a/.github/workflows/setup_conda.yml b/.github/workflows/setup_conda.yml new file mode 100644 index 00000000000..535c64a75d1 --- /dev/null +++ b/.github/workflows/setup_conda.yml @@ -0,0 +1,22 @@ +name: Test Conda Setup + +on: [push, pull_request] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up Conda + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + python-version: 3.9 + + - name: Run Conda Server Setup + shell: bash -l {0} + run: | + bash ./setup_scripts/conda_server.sh \ No newline at end of file diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml deleted file mode 100644 index 7e5ba52cb5e..00000000000 --- a/.github/workflows/tests.yaml +++ /dev/null @@ -1,82 +0,0 @@ -name: Server Tests - -on: - pull_request: - paths: - - ".github/workflows/tests.yaml" - - "server/**" - - "proto/**" - - "router/**" - - "launcher/**" - - "Cargo.lock" - - "rust-toolchain.toml" - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - run_tests: - runs-on: ubuntu-latest - - env: - SCCACHE_GHA_ENABLED: "on" - RUSTC_WRAPPER: /usr/local/bin/sccache - SCCACHE: 0.3.3 - - steps: - - uses: actions/checkout@v2 - - name: Set up Python - uses: actions/setup-python@v1 - with: - python-version: 3.9 - - name: Install Rust - uses: actions-rs/toolchain@v1 - with: - toolchain: 1.65.0 - override: true - components: rustfmt, clippy - - name: Install Protoc - uses: arduino/setup-protoc@v1 - - name: Install sccache - run: | - curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache - chmod +x /usr/local/bin/sccache - - name: configure sccache - uses: actions/github-script@v6 - with: - script: | - core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); - core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); - core.exportVariable('SCCACHE_GHA_CACHE_TO', 'sccache-${{runner.os}}-${{github.ref_name}}'); - core.exportVariable('SCCACHE_GHA_CACHE_FROM', 'sccache-${{runner.os}}-main,sccache-${{runner.os}}-'); - - name: cargo registry cache - uses: actions/cache@v3 - with: - key: cargo-${{ runner.os }}-${{ hashFiles('**/Cargo.toml') }}-${{ github.sha }} - restore-keys: | - cargo-${{ runner.os }}-${{ hashFiles('**/Cargo.toml') }}- - cargo-${{ runner.os }}- - path: | - ~/.cargo/registry - ~/.cargo/git - - name: Install - run: | - make install - - name: Run server tests - run: | - pip install pytest - export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} - pytest -s -vv server/tests - - name: Run Rust fmt - run: | - cargo fmt --check - - name: Run Rust clippy - run: | - cargo clippy - - name: Run Rust tests - run: | - cargo test - - name: sccache stats - run: | - /usr/local/bin/sccache --show-stats diff --git a/README.md b/README.md index 0297e999827..09364d094c4 100644 --- a/README.md +++ b/README.md @@ -55,13 +55,14 @@ print(client.generate("What is Deep Learning?", max_new_tokens=20).generated_tex If you are an LTI student using one of its cluster (or generally belong to an academic cluster that doesn't have docker installed), you can side-steps problems with installing system dependencies by using the [(mini)conda](https://docs.conda.io/en/latest/miniconda.html) package manager. -Set the CONDA_HOME environment variable to the path of your conda installation, and run the following commands: +Then, run the install script: ```shell -export CONDA_HOME=/path/to/conda bash setup_scripts/conda_server.sh ``` +*Note*: if you are running in a cluster with `module` installed, make sure you deactivate all modules before running the script. + This will create a conda environment with all the dependencies needed to run the model servers. You should then be able to launch models with the `text-generation-launcher` command, or by using one of the predefined MAKE rules diff --git a/setup_scripts/conda_server.sh b/setup_scripts/conda_server.sh index db909c3d448..36d9dbfea8d 100644 --- a/setup_scripts/conda_server.sh +++ b/setup_scripts/conda_server.sh @@ -3,49 +3,79 @@ # It sidesteps system-wide installations by relying on conda for most packages # and by building openssl from source # TODO: only got it to work with a static build of OpenSSL, which is not ideal -ENV_NAME=tgi-env-test # get the directory of this script, and go one up to get the root directory DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" DIR="$(dirname "$DIR")" -N_THREADS=8 # currently can only build in TIR without extensions # seems un-important, as it only affects BLOOM/NEOX +ENV_NAME=tgi-env BUILD_EXTENSIONS=false TEST_EXTRA=true BENCHMARK=false SERVER_WAIT=180 +N_THREADS=8 + +# Parse command line arguments +while (( "$#" )); do + case "$1" in + --env-name) + ENV_NAME=$2 + shift 2 + ;; + --build-extensions) + BUILD_EXTENSIONS=true + shift 1 + ;; + --no-tests) + TEST_EXTRA=false + shift 1 + ;; + --benchmark) + BENCHMARK=true + shift 1 + ;; + --server-wait) + SERVER_WAIT=$2 + shift 2 + ;; + --n-threads) + N_THREADS=$2 + shift 2 + ;; + --) # end argument parsing + shift + break + ;; + -*|--*=) # unsupported flags + echo "Error: Unsupported flag $1" >&2 + exit 1 + ;; + *) # preserve positional arguments + PARAMS="$PARAMS $1" + shift + ;; + esac +done +# set positional arguments in their proper place +eval set -- "$PARAMS" set -eo pipefail -# check if CONDA_HOME is set and create environment -if [ -z "$CONDA_HOME" ] +# check if CONDA_PREFIX is set and create environment +if [ -z "$CONDA_PREFIX" ] then - echo "Please set CONDA_HOME to the location of your conda installation" + echo "(Mini)conda does not seem to be installed, please install it first or set CONDA_PREFIX appropriately" exit 1 fi -source ${CONDA_HOME}/etc/profile.d/conda.sh +source ${CONDA_PREFIX}/etc/profile.d/conda.sh # python can't handle this dependency madness, switch to C++ conda install -y -c conda-forge mamba +echo "Creating conda environment ${ENV_NAME}..." mamba create -y -n ${ENV_NAME} python=3.9 conda activate ${ENV_NAME} -# check if `module` is available and unload gcc and cuda modules -# if [ -x "$(command -v module)" ] -# then - # get list of loaded modules, grep for gcc and unload all gcc modules found - # TODO: Fix this, it's not working - # For now, unload manually - # module list | grep gcc | sed 's/ //g' | sed 's/(gcc)//g' | xargs -I{} module unload {} - # module unload "cuda*" -# fi - -# remove possible extra cuda and gccs from path -# (not sure if needed, but added during debugging and kept for now) -# export PATH=$(echo $PATH | tr ":" "\n" | grep -v cuda | grep -v gcc | tr "\n" ":" | sed 's/:$//g') -# export LD_LIBRARY_PATH=$(echo $LD_LIBRARY_PATH | tr ":" "\n" | grep -v cuda | grep -v gcc | tr "\n" ":" | sed 's/:$//g') - # # Install dependencies mamba install -y -c conda-forge coreutils "gxx<12.0" mamba install -y -c conda-forge curl git tar @@ -55,9 +85,9 @@ mamba install -y -c "nvidia/label/cuda-11.8.0" cuda-toolkit # bring in the conda environment variables forward # (not sure if needed, but added during debugging and kept for now) -export LD_LIBRARY_PATH=${CONDA_HOME}/envs/${ENV_NAME}/lib:$LD_LIBRARY_PATH -export PATH=${CONDA_HOME}/envs/${ENV_NAME}/bin:$PATH -export CUDA_HOME=${CONDA_HOME}/envs/${ENV_NAME} +export LD_LIBRARY_PATH=${CONDA_PREFIX}/envs/${ENV_NAME}/lib:$LD_LIBRARY_PATH +export PATH=${CONDA_PREFIX}/envs/${ENV_NAME}/bin:$PATH +export CUDA_HOME=${CONDA_PREFIX}/envs/${ENV_NAME} # add cargo bin export PATH=~/.cargo/bin:$PATH From d783fc33d5cc16b0d4e07d707597783519fd96b1 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Tue, 19 Sep 2023 21:14:39 -0400 Subject: [PATCH 28/40] Attempt to fix CI --- .github/workflows/setup_conda.yml | 2 ++ README.md | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/setup_conda.yml b/.github/workflows/setup_conda.yml index 535c64a75d1..a6c65a67e18 100644 --- a/.github/workflows/setup_conda.yml +++ b/.github/workflows/setup_conda.yml @@ -13,6 +13,8 @@ jobs: - name: Set up Conda uses: conda-incubator/setup-miniconda@v2 with: + auto-activate-base: true + activate-environment: "" auto-update-conda: true python-version: 3.9 diff --git a/README.md b/README.md index 09364d094c4..5b7f7c4d29f 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ print(client.generate("What is Deep Learning?", max_new_tokens=20).generated_tex If you are an LTI student using one of its cluster (or generally belong to an academic cluster that doesn't have docker installed), you can side-steps problems with installing system dependencies by using the [(mini)conda](https://docs.conda.io/en/latest/miniconda.html) package manager. -Then, run the install script: +Then, ***from your base environment***, run the install script: ```shell bash setup_scripts/conda_server.sh From 2ea4d7018d04b36cda1c24f270bc8b4800342ab7 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Tue, 19 Sep 2023 21:22:22 -0400 Subject: [PATCH 29/40] attempt to fix ci v2 --- .github/workflows/setup_conda.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/setup_conda.yml b/.github/workflows/setup_conda.yml index a6c65a67e18..4dc1a7d7a8b 100644 --- a/.github/workflows/setup_conda.yml +++ b/.github/workflows/setup_conda.yml @@ -16,7 +16,6 @@ jobs: auto-activate-base: true activate-environment: "" auto-update-conda: true - python-version: 3.9 - name: Run Conda Server Setup shell: bash -l {0} From 261dc6bdcd5ab111d3d3abb9b2d3f59a4efbcdd6 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Tue, 19 Sep 2023 21:50:10 -0400 Subject: [PATCH 30/40] Add disk cleaning to CI to try to fix disk space issue. --- .github/workflows/free_disk_space.sh | 47 ++++++++++++++++++++++++++++ .github/workflows/setup_conda.yml | 7 +++-- 2 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/free_disk_space.sh diff --git a/.github/workflows/free_disk_space.sh b/.github/workflows/free_disk_space.sh new file mode 100644 index 00000000000..e9b64d47293 --- /dev/null +++ b/.github/workflows/free_disk_space.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# +# The Azure provided machines typically have the following disk allocation: +# Total space: 85GB +# Allocated: 67 GB +# Free: 17 GB +# This script frees up 28 GB of disk space by deleting unneeded packages and +# large directories. +# The Flink end to end tests download and generate more than 17 GB of files, +# causing unpredictable behavior and build failures. +# +echo "==============================================================================" +echo "Freeing up disk space on CI system" +echo "==============================================================================" + +echo "Listing 100 largest packages" +dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -n | tail -n 100 +df -h +echo "Removing large packages" +sudo apt-get remove -y '^ghc-8.*' +sudo apt-get remove -y '^dotnet-.*' +sudo apt-get remove -y '^llvm-.*' +sudo apt-get remove -y 'php.*' +sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel +sudo apt-get autoremove -y +sudo apt-get clean +df -h +echo "Removing large directories" +# deleting 15GB +rm -rf /usr/share/dotnet/ +df -h \ No newline at end of file diff --git a/.github/workflows/setup_conda.yml b/.github/workflows/setup_conda.yml index 4dc1a7d7a8b..8d809200d1b 100644 --- a/.github/workflows/setup_conda.yml +++ b/.github/workflows/setup_conda.yml @@ -10,6 +10,10 @@ jobs: - name: Checkout code uses: actions/checkout@v2 + - name: Free up disk space + run: | + bash .github/workflows/free_disk_space.sh + - name: Set up Conda uses: conda-incubator/setup-miniconda@v2 with: @@ -18,6 +22,5 @@ jobs: auto-update-conda: true - name: Run Conda Server Setup - shell: bash -l {0} run: | - bash ./setup_scripts/conda_server.sh \ No newline at end of file + bash ./setup_scripts/conda_server.sh --no-tests \ No newline at end of file From e117bcd01a7295c7632a4944dc4135ebf3b72cc5 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Tue, 19 Sep 2023 21:54:31 -0400 Subject: [PATCH 31/40] attempt to fix by reorder --- .github/workflows/setup_conda.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/setup_conda.yml b/.github/workflows/setup_conda.yml index 8d809200d1b..d71125115de 100644 --- a/.github/workflows/setup_conda.yml +++ b/.github/workflows/setup_conda.yml @@ -10,10 +10,6 @@ jobs: - name: Checkout code uses: actions/checkout@v2 - - name: Free up disk space - run: | - bash .github/workflows/free_disk_space.sh - - name: Set up Conda uses: conda-incubator/setup-miniconda@v2 with: @@ -21,6 +17,10 @@ jobs: activate-environment: "" auto-update-conda: true + - name: Free up disk space + run: | + bash .github/workflows/free_disk_space.sh + - name: Run Conda Server Setup run: | bash ./setup_scripts/conda_server.sh --no-tests \ No newline at end of file From b7d9db89ea3a5e39977194c5778f27a53c44abac Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Tue, 19 Sep 2023 22:01:24 -0400 Subject: [PATCH 32/40] attempt to fix --- .github/workflows/setup_conda.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/setup_conda.yml b/.github/workflows/setup_conda.yml index d71125115de..1f1b59c1e13 100644 --- a/.github/workflows/setup_conda.yml +++ b/.github/workflows/setup_conda.yml @@ -10,6 +10,10 @@ jobs: - name: Checkout code uses: actions/checkout@v2 + # - name: Free up disk space + # run: | + # bash .github/workflows/free_disk_space.sh + - name: Set up Conda uses: conda-incubator/setup-miniconda@v2 with: @@ -17,10 +21,6 @@ jobs: activate-environment: "" auto-update-conda: true - - name: Free up disk space - run: | - bash .github/workflows/free_disk_space.sh - - name: Run Conda Server Setup run: | bash ./setup_scripts/conda_server.sh --no-tests \ No newline at end of file From 199a0f50dad5540fb9c8582dd980244ab2790aa9 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Tue, 19 Sep 2023 22:10:24 -0400 Subject: [PATCH 33/40] attempt to fix --- .github/workflows/setup_conda.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/setup_conda.yml b/.github/workflows/setup_conda.yml index 1f1b59c1e13..d7b4a4c5e97 100644 --- a/.github/workflows/setup_conda.yml +++ b/.github/workflows/setup_conda.yml @@ -10,9 +10,9 @@ jobs: - name: Checkout code uses: actions/checkout@v2 - # - name: Free up disk space - # run: | - # bash .github/workflows/free_disk_space.sh + - name: Free up disk space + run: | + bash .github/workflows/free_disk_space.sh - name: Set up Conda uses: conda-incubator/setup-miniconda@v2 @@ -22,5 +22,6 @@ jobs: auto-update-conda: true - name: Run Conda Server Setup + shell: bash -l {0} run: | bash ./setup_scripts/conda_server.sh --no-tests \ No newline at end of file From 6fcc036c6a2a6d974d01403b3cb137043d5d5cad Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Wed, 20 Sep 2023 10:55:55 -0400 Subject: [PATCH 34/40] fix small bug with CONDA env var --- setup_scripts/conda_server.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/setup_scripts/conda_server.sh b/setup_scripts/conda_server.sh index 36d9dbfea8d..f79c9bb83a2 100644 --- a/setup_scripts/conda_server.sh +++ b/setup_scripts/conda_server.sh @@ -68,7 +68,8 @@ then echo "(Mini)conda does not seem to be installed, please install it first or set CONDA_PREFIX appropriately" exit 1 fi -source ${CONDA_PREFIX}/etc/profile.d/conda.sh +export CONDA_HOME=$CONDA_PREFIX +source ${CONDA_HOME}/etc/profile.d/conda.sh # python can't handle this dependency madness, switch to C++ conda install -y -c conda-forge mamba @@ -85,9 +86,9 @@ mamba install -y -c "nvidia/label/cuda-11.8.0" cuda-toolkit # bring in the conda environment variables forward # (not sure if needed, but added during debugging and kept for now) -export LD_LIBRARY_PATH=${CONDA_PREFIX}/envs/${ENV_NAME}/lib:$LD_LIBRARY_PATH -export PATH=${CONDA_PREFIX}/envs/${ENV_NAME}/bin:$PATH -export CUDA_HOME=${CONDA_PREFIX}/envs/${ENV_NAME} +export LD_LIBRARY_PATH=${CONDA_HOME}/envs/${ENV_NAME}/lib:$LD_LIBRARY_PATH +export PATH=${CONDA_HOME}/envs/${ENV_NAME}/bin:$PATH +export CUDA_HOME=${CONDA_HOME}/envs/${ENV_NAME} # add cargo bin export PATH=~/.cargo/bin:$PATH From a597e5094d78cb9815dc6f6a97739d3a5eb452c2 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Fri, 22 Sep 2023 18:34:13 +0100 Subject: [PATCH 35/40] attempt to fix ci v5 --- .github/workflows/free_disk_space.sh | 4 +++- .github/workflows/setup_conda.yml | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/free_disk_space.sh b/.github/workflows/free_disk_space.sh index e9b64d47293..416884b6a4b 100644 --- a/.github/workflows/free_disk_space.sh +++ b/.github/workflows/free_disk_space.sh @@ -37,7 +37,9 @@ sudo apt-get remove -y '^ghc-8.*' sudo apt-get remove -y '^dotnet-.*' sudo apt-get remove -y '^llvm-.*' sudo apt-get remove -y 'php.*' -sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel +sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox microsoft-edge-stable powershell mono-devel +sudo apt-get remove -y '^gcc-.*' +sudo apt-get remove -y '^g++-.*' sudo apt-get autoremove -y sudo apt-get clean df -h diff --git a/.github/workflows/setup_conda.yml b/.github/workflows/setup_conda.yml index d7b4a4c5e97..853c439900c 100644 --- a/.github/workflows/setup_conda.yml +++ b/.github/workflows/setup_conda.yml @@ -10,10 +10,6 @@ jobs: - name: Checkout code uses: actions/checkout@v2 - - name: Free up disk space - run: | - bash .github/workflows/free_disk_space.sh - - name: Set up Conda uses: conda-incubator/setup-miniconda@v2 with: @@ -21,6 +17,10 @@ jobs: activate-environment: "" auto-update-conda: true + - name: Free up disk space + run: | + bash .github/workflows/free_disk_space.sh + - name: Run Conda Server Setup shell: bash -l {0} run: | From 8d0ff33974639110e1c13ae307b501e0d0e4bd25 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Mon, 25 Sep 2023 14:17:49 -0400 Subject: [PATCH 36/40] add option for light installation to fit into GH --- .github/workflows/setup_conda.yml | 2 +- setup_scripts/conda_server.sh | 54 +++++++++++++++++++------------ 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/.github/workflows/setup_conda.yml b/.github/workflows/setup_conda.yml index 853c439900c..816b9fd6db1 100644 --- a/.github/workflows/setup_conda.yml +++ b/.github/workflows/setup_conda.yml @@ -24,4 +24,4 @@ jobs: - name: Run Conda Server Setup shell: bash -l {0} run: | - bash ./setup_scripts/conda_server.sh --no-tests \ No newline at end of file + bash ./setup_scripts/conda_server.sh --light-mode --no-tests \ No newline at end of file diff --git a/setup_scripts/conda_server.sh b/setup_scripts/conda_server.sh index f79c9bb83a2..002f2ee3789 100644 --- a/setup_scripts/conda_server.sh +++ b/setup_scripts/conda_server.sh @@ -11,6 +11,8 @@ DIR="$(dirname "$DIR")" # seems un-important, as it only affects BLOOM/NEOX ENV_NAME=tgi-env BUILD_EXTENSIONS=false +BUILD_VLLM=true +BUILD_FLASHATTN=true TEST_EXTRA=true BENCHMARK=false SERVER_WAIT=180 @@ -27,6 +29,11 @@ while (( "$#" )); do BUILD_EXTENSIONS=true shift 1 ;; + --light-mode) + BUILD_VLLM=false + BUILD_FLASHATTN=false + shift 1 + ;; --no-tests) TEST_EXTRA=false shift 1 @@ -36,13 +43,13 @@ while (( "$#" )); do shift 1 ;; --server-wait) - SERVER_WAIT=$2 - shift 2 - ;; + SERVER_WAIT=$2 + shift 2 + ;; --n-threads) - N_THREADS=$2 - shift 2 - ;; + N_THREADS=$2 + shift 2 + ;; --) # end argument parsing shift break @@ -125,17 +132,19 @@ export PATH=${DIR}/.openssl/bin:$PATH pip install ninja cd ${DIR}/server mkdir -p workdir +rm -rf workdir/* # install vllm -rm -rf workdir/* -cp Makefile-vllm workdir/Makefile -cd workdir && sleep 1 -make -j $N_THREADS install-vllm -cd ${DIR}/server -if [ "$TEST_EXTRA" = true ] ; then - python3 vllm_testscript.py +if [ "$BUILD_VLLM" = true ] ; then + cp Makefile-vllm workdir/Makefile + cd workdir && sleep 1 + make -j $N_THREADS install-vllm + cd ${DIR}/server + if [ "$TEST_EXTRA" = true ] ; then + python3 vllm_testscript.py + fi + rm -rf workdir/* fi -rm -rf workdir/* # install base package cd ${DIR} @@ -147,14 +156,17 @@ BUILD_EXTENSIONS=$BUILD_EXTENSIONS \ # install flash attention -cd ${DIR}/server -cp Makefile-flash-att workdir/Makefile -cd workdir && sleep 1 -make -j $N_THREADS install-flash-attention -if [ "$TEST_EXTRA" = true ] ; then - make test-flash-attention +if [ "$BUILD_FLASHATTN" = true ] ; then + cd ${DIR}/server + cp Makefile-flash-att workdir/Makefile + cd workdir && sleep 1 + make -j $N_THREADS install-flash-attention + if [ "$TEST_EXTRA" = true ] ; then + make test-flash-attention + fi + cd ${DIR}/server fi -cd ${DIR}/server + rm -rf workdir # override protobuf From bb7fbfb4109b73c6ba70876a37410350c320a93c Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Thu, 26 Oct 2023 11:32:52 -0400 Subject: [PATCH 37/40] Fix issue with installation by upgrading vLLM. Update README to reflect change from TIR to Babel --- README.md | 6 +++--- server/Makefile-vllm | 2 +- .../models/custom_modeling/flash_llama_modeling.py | 2 +- .../models/custom_modeling/flash_neox_modeling.py | 2 +- .../models/custom_modeling/flash_rw_modeling.py | 4 ++-- .../custom_modeling/flash_santacoder_modeling.py | 2 +- setup_scripts/conda_server.sh | 14 +++++++++++--- 7 files changed, 20 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 5b7f7c4d29f..eafec6d4016 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ If you are new to using this library, and as it has being used in your cluster, To start, the `TGI_CENTRAL_ADDRESS` needs to be set, so that the client can know which servers to connect to. For example, in the LTI cluster, run ```shell -echo "export TGI_CENTRAL_ADDRESS=tir-0-32:8765" >> ~/.bashrc # if using a single machine, use `0.0.0.0:8765` instead +echo "export TGI_CENTRAL_ADDRESS=babel-3-36:8765" >> ~/.bashrc # if using a single machine, use `0.0.0.0:8765` instead source ~/.bashrc ``` @@ -83,10 +83,10 @@ Remember to set the `TGI_CENTRAL_ADDRESS` environment variable (ideally for all It is also possible to a simple web [chat-ui](./clients/chat-ui) to interact with models running in your server/cluster. This is a simple fork of [HuggingFace's Chat UI](https://github.com/huggingface/chat-ui) that communicates with the central controller to get the list of models available in the cluster, and then connects to the corresponding servers to generate text. -For example, it TIR, you can access a running Chat-UI web-server with *port forwarding* by running +For example, in Babel, you can access a running Chat-UI web-server with *port forwarding* by running ```shell -ssh tir -L 8888:tir-0-32:4173 +ssh babel -L 8888:babel-3-36:4173 ``` and going to `localhost:8888` in your browser. diff --git a/server/Makefile-vllm b/server/Makefile-vllm index a223a45546b..1621c481571 100644 --- a/server/Makefile-vllm +++ b/server/Makefile-vllm @@ -1,4 +1,4 @@ -vllm_commit := "96853af" +vllm_commit := "3d40c83" vllm: # Clone vllm diff --git a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py index cd87baededb..d528178259e 100644 --- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py @@ -272,7 +272,7 @@ def forward( else: # kv_cache[1] => [num_blocks, num_heads, head_size, block_size] block_size = kv_cache[1].shape[3] - vllm.attention_ops.single_query_cached_kv_attention( + vllm.attention_ops.paged_attention_v1( attn_output, query, kv_cache[0], diff --git a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py index 6dac3e26b6c..6d524d308f0 100644 --- a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py @@ -164,7 +164,7 @@ def forward( else: # kv_cache[1] => [num_blocks, num_heads, head_size, block_size] block_size = kv_cache[1].shape[3] - vllm.attention_ops.single_query_cached_kv_attention( + vllm.attention_ops.paged_attention_v1( attn_output, qkv[:, 0], kv_cache[0], diff --git a/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py b/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py index 0c371665f3b..fc731a95cb5 100644 --- a/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py @@ -214,7 +214,7 @@ def forward( else: # kv_cache[1] => [num_blocks, num_heads_kv, head_size, block_size] block_size = kv_cache[1].shape[3] - vllm.attention_ops.single_query_cached_kv_attention( + vllm.attention_ops.paged_attention_v1( attn_output, query, kv_cache[0], @@ -334,7 +334,7 @@ def forward( else: # kv_cache[1] => [num_blocks, num_groups, head_size, block_size] block_size = kv_cache[1].shape[3] - vllm.attention_ops.single_query_cached_kv_attention( + vllm.attention_ops.paged_attention_v1( attn_output, query, kv_cache[0], diff --git a/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py b/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py index f7f93c4e1a0..4b587752d7e 100644 --- a/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py @@ -281,7 +281,7 @@ def forward( else: # kv_cache[1] => [num_blocks, 1, head_size, block_size] block_size = kv_cache[1].shape[3] - vllm.attention_ops.single_query_cached_kv_attention( + vllm.attention_ops.paged_attention_v1( attn_output, query, kv_cache[0], diff --git a/setup_scripts/conda_server.sh b/setup_scripts/conda_server.sh index 002f2ee3789..2da4e053e16 100644 --- a/setup_scripts/conda_server.sh +++ b/setup_scripts/conda_server.sh @@ -16,7 +16,7 @@ BUILD_FLASHATTN=true TEST_EXTRA=true BENCHMARK=false SERVER_WAIT=180 -N_THREADS=8 +N_THREADS=4 # Parse command line arguments while (( "$#" )); do @@ -77,8 +77,11 @@ then fi export CONDA_HOME=$CONDA_PREFIX source ${CONDA_HOME}/etc/profile.d/conda.sh + # python can't handle this dependency madness, switch to C++ conda install -y -c conda-forge mamba +# we need to add the base path to get mamba to work inside the new environment +export PATH=${CONDA_HOME}/bin:$PATH echo "Creating conda environment ${ENV_NAME}..." mamba create -y -n ${ENV_NAME} python=3.9 @@ -90,6 +93,8 @@ mamba install -y -c conda-forge curl git tar mamba install -y -c conda-forge "rust>=1.65.0" mamba install -y -c conda-forge openssh mamba install -y -c "nvidia/label/cuda-11.8.0" cuda-toolkit +# pin pytorch due to some cuda-issue in pytorch==2.1.0 / something with vllm +mamba install -y -c pytorch -c nvidia pytorch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 pytorch-cuda=11.8 # bring in the conda environment variables forward # (not sure if needed, but added during debugging and kept for now) @@ -130,6 +135,8 @@ export PATH=${DIR}/.openssl/bin:$PATH # install ninja for faster compilation of CUDA kernels and setup workdir pip install ninja +# export MAX_JOBS to limit ninjas parallelism +export MAX_JOBS=$N_THREADS cd ${DIR}/server mkdir -p workdir rm -rf workdir/* @@ -138,7 +145,8 @@ rm -rf workdir/* if [ "$BUILD_VLLM" = true ] ; then cp Makefile-vllm workdir/Makefile cd workdir && sleep 1 - make -j $N_THREADS install-vllm + make install-vllm + cd ${DIR}/server if [ "$TEST_EXTRA" = true ] ; then python3 vllm_testscript.py @@ -160,7 +168,7 @@ if [ "$BUILD_FLASHATTN" = true ] ; then cd ${DIR}/server cp Makefile-flash-att workdir/Makefile cd workdir && sleep 1 - make -j $N_THREADS install-flash-attention + make install-flash-attention if [ "$TEST_EXTRA" = true ] ; then make test-flash-attention fi From 012d30fd6b736bf5b943a5b2ca82712ea4f187d2 Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Fri, 27 Oct 2023 08:53:35 -0400 Subject: [PATCH 38/40] Update README.md Add a note that install takes a long time. --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index eafec6d4016..66fccfa381f 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,8 @@ Then, ***from your base environment***, run the install script: bash setup_scripts/conda_server.sh ``` +*Note*: This **takes a really long time**, up to 1.5-3 hour, sit back and realx while you wait for it. + *Note*: if you are running in a cluster with `module` installed, make sure you deactivate all modules before running the script. This will create a conda environment with all the dependencies needed to run the model servers. From 850556a99454b54b66e8c7a416024cb484a5337d Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Tue, 7 Nov 2023 17:02:11 +0000 Subject: [PATCH 39/40] Upgrade HF's transformers and add option to extract top tokens at each decoding step (#4) * update HF and add draft top_p tokens * Add top_tokens to python client * Add toptokens to non-flash causal LM * Update test notebook and README --- README.md | 13 ++++ clients/python/text_generation/client.py | 35 ++++++++++ clients/python/text_generation/types.py | 13 ++++ launcher/src/main.rs | 8 +++ notebooks/test_client.ipynb | 69 +++++++++++++++---- proto/generate.proto | 15 ++++ router/client/src/client.rs | 1 + router/src/health.rs | 1 + router/src/infer.rs | 42 ++++++++++- router/src/lib.rs | 10 +++ router/src/main.rs | 4 ++ router/src/queue.rs | 2 + router/src/server.rs | 12 +++- router/src/validation.rs | 36 ++++++++-- server/pyproject.toml | 13 ++-- server/text_generation_server/cli.py | 18 +++-- .../models/causal_lm.py | 58 ++++++++++++++++ .../models/flash_causal_lm.py | 58 +++++++++++++++- .../models/seq2seq_lm.py | 57 +++++++++++++++ server/text_generation_server/models/types.py | 21 ++++++ .../text_generation_server/utils/convert.py | 12 +++- server/text_generation_server/utils/tokens.py | 64 +++++++++++++++-- 22 files changed, 516 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 66fccfa381f..0d459b75f53 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,19 @@ client = Client("http://" + model_addr) print(client.generate("What is Deep Learning?", max_new_tokens=20).generated_text) ``` +#### Updating the environment + +In general, you don't have to recreate the environment every time you want to update the library. +To just update the library, run in the base directory (in a previously created environment) + +```shell +OPENSSL_DIR=.openssl \ +OPENSSL_LIB_DIR=.openssl/lib \ +OPENSSL_INCLUDE_DIR=.openssl/include \ +BUILD_EXTENSIONS=false \ + make install +``` + ### Running your own servers If you are an LTI student using one of its cluster (or generally belong to an academic cluster that doesn't have docker installed), you can side-steps problems with installing system dependencies by using the [(mini)conda](https://docs.conda.io/en/latest/miniconda.html) package manager. diff --git a/clients/python/text_generation/client.py b/clients/python/text_generation/client.py index c5f8166484d..486b4d7775b 100644 --- a/clients/python/text_generation/client.py +++ b/clients/python/text_generation/client.py @@ -106,6 +106,7 @@ def generate( typical_p: Optional[float] = None, watermark: bool = False, decoder_input_details: bool = False, + top_tokens: Optional[int] = None, ) -> Response: """ Given a prompt, generate the following text @@ -144,6 +145,8 @@ def generate( Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) decoder_input_details (`bool`): Return the decoder input token logprobs and ids + top_tokens (`Optional[int]`): + Return the top `top_tokens` tokens with the highest logprobs at each step Returns: Response: generated response @@ -165,6 +168,7 @@ def generate( typical_p=typical_p, watermark=watermark, decoder_input_details=decoder_input_details, + top_tokens=top_tokens, ) request = Request(inputs=prompt, stream=False, parameters=parameters) @@ -179,6 +183,25 @@ def generate( if resp.status_code != 200: raise parse_error(resp.status_code, payload) return Response(**payload[0]) + + def score( + self: str, + target: str, + ): + """ Utility function to score a target string (i.e. compute its logprob). + + Mostly wraps the generate function, asking for 1 new token and returning + the logprob of the prompt. + """ + # Use generate to get the score + resp = self.generate( + prompt=target, + do_sample=False, + max_new_tokens=1, + decoder_input_details=True, + ) + # extract prefill details and cut off first + return resp.details.prefill[1:] def generate_stream( self, @@ -195,6 +218,7 @@ def generate_stream( truncate: Optional[int] = None, typical_p: Optional[float] = None, watermark: bool = False, + top_tokens: Optional[int] = None, ) -> Iterator[StreamResponse]: """ Given a prompt, generate the following stream of tokens @@ -229,6 +253,8 @@ def generate_stream( See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information watermark (`bool`): Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) + top_tokens (`Optional[int]`): + Return the top `top_tokens` tokens with the highest logprobs at each step Returns: Iterator[StreamResponse]: stream of generated tokens @@ -250,6 +276,7 @@ def generate_stream( truncate=truncate, typical_p=typical_p, watermark=watermark, + top_tokens=top_tokens, ) request = Request(inputs=prompt, stream=True, parameters=parameters) @@ -348,6 +375,7 @@ async def generate( typical_p: Optional[float] = None, watermark: bool = False, decoder_input_details: bool = False, + top_tokens: Optional[int] = None, ) -> Response: """ Given a prompt, generate the following text asynchronously @@ -386,6 +414,8 @@ async def generate( Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) decoder_input_details (`bool`): Return the decoder input token logprobs and ids + top_tokens (`Optional[int]`): + Return the top `top_tokens` tokens with the highest logprobs at each step Returns: Response: generated response @@ -435,6 +465,8 @@ async def generate_stream( truncate: Optional[int] = None, typical_p: Optional[float] = None, watermark: bool = False, + top_tokens: Optional[int] = None, + ) -> AsyncIterator[StreamResponse]: """ Given a prompt, generate the following stream of tokens asynchronously @@ -469,6 +501,8 @@ async def generate_stream( See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information watermark (`bool`): Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) + top_tokens (`Optional[int]`): + Return the top `top_tokens` tokens with the highest logprobs at each step Returns: AsyncIterator[StreamResponse]: stream of generated tokens @@ -490,6 +524,7 @@ async def generate_stream( truncate=truncate, typical_p=typical_p, watermark=watermark, + top_tokens=top_tokens, ) request = Request(inputs=prompt, stream=True, parameters=parameters) diff --git a/clients/python/text_generation/types.py b/clients/python/text_generation/types.py index 548f0b639ce..d15bc82378e 100644 --- a/clients/python/text_generation/types.py +++ b/clients/python/text_generation/types.py @@ -33,6 +33,8 @@ class Parameters(BaseModel): typical_p: Optional[float] # Generate best_of sequences and return the one if the highest token logprobs best_of: Optional[int] + # Return the `top_tokens` most likely tokens at each step + top_tokens: Optional[int] # Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) watermark: bool = False # Get generation details @@ -100,6 +102,13 @@ def valid_typical_p(cls, v): if v is not None and (v <= 0 or v >= 1.0): raise ValidationError("`typical_p` must be > 0.0 and < 1.0") return v + + @validator("top_tokens") + def valid_top_tokens(cls, v): + if v is not None and v <= 0: + raise ValidationError("`top_tokens` must be strictly positive") + return v + class Request(BaseModel): @@ -193,6 +202,8 @@ class Details(BaseModel): prefill: List[InputToken] # Generated tokens tokens: List[Token] + # Most likely tokens at each step + top_tokens: Optional[List[List[Token]]] # Additional sequences when using the `best_of` parameter best_of_sequences: Optional[List[BestOfSequence]] @@ -219,6 +230,8 @@ class StreamDetails(BaseModel): class StreamResponse(BaseModel): # Generated token token: Token + # Most likely tokens at each step + top_tokens: Optional[List[Token]] # Complete generated text # Only available when the generation is finished generated_text: Optional[str] diff --git a/launcher/src/main.rs b/launcher/src/main.rs index 28bdddb5233..ad9379bc9f3 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -133,6 +133,12 @@ struct Args { #[clap(default_value = "2", long, env)] max_best_of: usize, + // This is the maximum allowed value for clients to set `top_tokens`. + // It is used to return the `top_tokens` most likely tokens at each generation + // rather than just the top one. + #[clap(default_value = "10", long, env)] + max_top_tokens: u32, + /// This is the maximum allowed value for clients to set `stop_sequences`. /// Stop sequences are used to allow the model to stop on more than just /// the EOS token, and enable more complex "prompting" where users can preprompt @@ -867,6 +873,8 @@ fn spawn_webserver( args.max_concurrent_requests.to_string(), "--max-best-of".to_string(), args.max_best_of.to_string(), + "--max-top-tokens".to_string(), + args.max_top_tokens.to_string(), "--max-stop-sequences".to_string(), args.max_stop_sequences.to_string(), "--max-input-length".to_string(), diff --git a/notebooks/test_client.ipynb b/notebooks/test_client.ipynb index 0d55d578893..f828d91b091 100644 --- a/notebooks/test_client.ipynb +++ b/notebooks/test_client.ipynb @@ -2,34 +2,43 @@ "cells": [ { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/mnt/data_2/patrick/conda/envs/tgi-env/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "import text_generation as tg" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# set environment variable\n", "import os\n", - "os.environ['TGI_CENTRAL_ADDRESS'] = 'tir-0-32:8765'" + "os.environ['TGI_CENTRAL_ADDRESS'] = 'localhost:8765'" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[{'name': 'upstage/Llama-2-70b-instruct-v2', 'address': 'tir-1-23.eth:9875', 'owner': 'ltjuatja', 'is_quantized': True}, {'name': 'chavinlo/alpaca-native', 'address': 'tir-1-28.eth:8080', 'owner': 'pfernand', 'is_quantized': False}, {'name': 'NousResearch/Llama-2-7b-hf', 'address': 'tir-0-15.eth:8080', 'owner': 'pfernand', 'is_quantized': False}]\n" + "[{'name': '/mnt/data_2/patrick/croissantllm-models/small4_equals/', 'address': 'frightened-frank-flowers-fin-01:3000', 'owner': 'patrick', 'is_quantized': False}]\n" ] } ], @@ -40,16 +49,16 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ - "server_addr = servers[1]['address']" + "server_addr = servers[0]['address']" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -58,14 +67,14 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "among the most successful in the world. Our graduates have gone on to successful careers in academ\n" + "among the best in the country in their field of study. They are also among the best in the\n" ] } ], @@ -75,14 +84,14 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " among the most successful in the world. Our graduates have gone on to successful careers in academ\n" + " among the best in the country in their field of study. They are also among the best in the\n" ] } ], @@ -94,6 +103,38 @@ "print(text)" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Getting Top K tokens at each step" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "among the best in\n", + "[Token(id=5684, text='among', logprob=-2.5429688, special=False), Token(id=4645, text='working', logprob=-3.4179688, special=False), Token(id=1135, text='the', logprob=-3.6757812, special=False)]\n", + "[Token(id=1135, text='the', logprob=-0.40478516, special=False), Token(id=3108, text='those', logprob=-2.7402344, special=False), Token(id=488, text='', logprob=-2.8417969, special=False)]\n", + "[Token(id=3284, text='best', logprob=-1.5273438, special=False), Token(id=2481, text='most', logprob=-1.5664062, special=False), Token(id=3263, text='top', logprob=-2.2148438, special=False)]\n", + "[Token(id=1147, text='in', logprob=-0.45898438, special=False), Token(id=1171, text='and', logprob=-3.7089844, special=False), Token(id=5208, text='students', logprob=-3.9511719, special=False)]\n" + ] + } + ], + "source": [ + "resp = client.generate(\"CMU's PhD students are\", max_new_tokens=4, top_tokens=3)\n", + "print(resp.generated_text)\n", + "for top_tokens in resp.details.top_tokens:\n", + " print(top_tokens)" + ] + }, { "attachments": {}, "cell_type": "markdown", @@ -223,7 +264,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.17" + "version": "3.9.18" }, "orig_nbformat": 4 }, diff --git a/proto/generate.proto b/proto/generate.proto index 57d79bcaf27..157598cf304 100644 --- a/proto/generate.proto +++ b/proto/generate.proto @@ -91,6 +91,8 @@ message Request { StoppingCriteriaParameters stopping_parameters = 5; /// Return prefill logprobs bool prefill_logprobs = 6; + /// Return most likely n tokens + uint32 top_tokens = 7; } message Batch { @@ -141,6 +143,17 @@ message PrefillTokens { repeated string texts = 3; } +message TopTokens { + /// Top Token IDs + repeated uint32 ids = 1; + /// Top Logprobs + repeated float logprobs = 2; + /// Top Token Texts + repeated string texts = 3; + /// If the tokens are special + repeated bool is_special = 6; +} + message Generation { /// Request ID uint64 request_id = 1; @@ -156,6 +169,8 @@ message Generation { bool token_is_special = 6; /// Complete generated text optional GeneratedText generated_text = 7; + // Top tokens + TopTokens top_tokens = 8; } message FilterBatchRequest { diff --git a/router/client/src/client.rs b/router/client/src/client.rs index 7753f307c0a..10212adb6f1 100644 --- a/router/client/src/client.rs +++ b/router/client/src/client.rs @@ -131,6 +131,7 @@ impl Client { ignore_eos_token: false, }), prefill_logprobs: true, + top_tokens: 20, }); n_tokens += max_input_length; } diff --git a/router/src/health.rs b/router/src/health.rs index a3cacdcd016..972dfe486fa 100644 --- a/router/src/health.rs +++ b/router/src/health.rs @@ -50,6 +50,7 @@ impl Health { stop_sequences: vec![], ignore_eos_token: false, }), + top_tokens: 0, }; let batch = Batch { id: BATCH_ID, diff --git a/router/src/infer.rs b/router/src/infer.rs index 188ddc6420c..079b28e1a2c 100644 --- a/router/src/infer.rs +++ b/router/src/infer.rs @@ -138,12 +138,15 @@ impl Infer { &self, request: GenerateRequest, ) -> Result { + let use_top_tokens = request.parameters.top_tokens.is_some_and(|x| x > 0); + // Create stream and keep semaphore permit as long as generate lives let (_permit, mut stream) = self.generate_stream(request).await?; // Return values let mut result_prefill = Vec::new(); let mut result_tokens = Vec::new(); + let mut result_top_tokens = Vec::new(); let mut result_generated_text = None; let mut result_start = None; let mut result_queued = None; @@ -164,7 +167,10 @@ impl Infer { .collect(); } // Push last token - InferStreamResponse::Token(token) => result_tokens.push(token), + InferStreamResponse::Intermediate {token, top_tokens} => { + result_tokens.push(token); + result_top_tokens.push(top_tokens); + } // Final message // Set return values InferStreamResponse::End { @@ -172,8 +178,10 @@ impl Infer { generated_text, start, queued, + top_tokens } => { result_tokens.push(token); + result_top_tokens.push(top_tokens); result_generated_text = Some(generated_text); result_start = Some(start); result_queued = Some(queued) @@ -185,12 +193,16 @@ impl Infer { if let (Some(generated_text), Some(queued), Some(start)) = (result_generated_text, result_queued, result_start) { + let top_tokens = if use_top_tokens + {result_top_tokens} else + {Vec::new()}; Ok(InferResponse { prefill: result_prefill, tokens: result_tokens, generated_text, queued, start, + top_tokens, }) } else { let err = InferError::IncompleteGeneration; @@ -520,6 +532,24 @@ fn send_responses( special: generation.token_is_special, }; + let mut top_tokens = Vec::new(); + if let Some(top_tokens_) = generation.top_tokens { + top_tokens.extend( + top_tokens_ + .ids + .into_iter() + .zip(top_tokens_.logprobs.into_iter()) + .zip(top_tokens_.texts.into_iter()) + .zip(top_tokens_.is_special.into_iter()) + .map(|(((id, logprob), text), special)| Token { + id, + text, + logprob, + special, + }) + ) + } + if let Some(generated_text) = generation.generated_text { // Generation has ended stopped = true; @@ -527,6 +557,7 @@ fn send_responses( entry.response_tx.send_timeout( Ok(InferStreamResponse::End { token, + top_tokens, generated_text, queued: entry.queue_time, start: entry.batch_time.unwrap(), @@ -536,7 +567,7 @@ fn send_responses( } else { // Send message entry.response_tx.send_timeout( - Ok(InferStreamResponse::Token(token)), + Ok(InferStreamResponse::Intermediate { token, top_tokens }), Duration::from_millis(10), )?; } @@ -566,10 +597,14 @@ pub(crate) enum InferStreamResponse { // Optional first message Prefill(PrefillTokens), // Intermediate messages - Token(Token), + Intermediate { + token: Token, + top_tokens: Vec, + }, // Last message End { token: Token, + top_tokens: Vec, generated_text: GeneratedText, start: Instant, queued: Instant, @@ -583,6 +618,7 @@ pub(crate) struct InferResponse { pub(crate) generated_text: GeneratedText, pub(crate) queued: Instant, pub(crate) start: Instant, + pub(crate) top_tokens: Vec>, } #[derive(Debug, Error)] diff --git a/router/src/lib.rs b/router/src/lib.rs index 7dff7a114ec..a948cf444b1 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -67,6 +67,9 @@ pub(crate) struct GenerateParameters { #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 1)] pub best_of: Option, #[serde(default)] + #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 5)] + pub top_tokens: Option, + #[serde(default)] #[schema( exclusive_minimum = 0.0, nullable = true, @@ -144,6 +147,7 @@ fn default_max_new_tokens() -> u32 { fn default_parameters() -> GenerateParameters { GenerateParameters { best_of: None, + top_tokens: None, temperature: None, repetition_penalty: None, top_k: None, @@ -235,6 +239,8 @@ pub(crate) struct BestOfSequence { pub seed: Option, pub prefill: Vec, pub tokens: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub top_tokens: Vec>, } #[derive(Serialize, ToSchema)] @@ -249,6 +255,8 @@ pub(crate) struct Details { pub tokens: Vec, #[serde(skip_serializing_if = "Option::is_none")] pub best_of_sequences: Option>, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub top_tokens: Vec>, } #[derive(Serialize, ToSchema)] @@ -272,6 +280,8 @@ pub(crate) struct StreamDetails { #[derive(Serialize, ToSchema)] pub(crate) struct StreamResponse { pub token: Token, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub top_tokens: Vec, #[schema(nullable = true, default = "null", example = "test")] pub generated_text: Option, #[schema(nullable = true, default = "null")] diff --git a/router/src/main.rs b/router/src/main.rs index 484643cb252..d8994bac71e 100644 --- a/router/src/main.rs +++ b/router/src/main.rs @@ -27,6 +27,8 @@ struct Args { max_concurrent_requests: usize, #[clap(default_value = "2", long, env)] max_best_of: usize, + #[clap(default_value = "10", long, env)] + max_top_tokens: u32, #[clap(default_value = "4", long, env)] max_stop_sequences: usize, #[clap(default_value = "1024", long, env)] @@ -74,6 +76,7 @@ fn main() -> Result<(), RouterError> { let Args { max_concurrent_requests, max_best_of, + max_top_tokens, max_stop_sequences, max_input_length, max_total_tokens, @@ -258,6 +261,7 @@ fn main() -> Result<(), RouterError> { compat_return_full_text, max_concurrent_requests, max_best_of, + max_top_tokens, max_stop_sequences, max_input_length, max_total_tokens, diff --git a/router/src/queue.rs b/router/src/queue.rs index 2d8d6d1c1c2..aab3530da47 100644 --- a/router/src/queue.rs +++ b/router/src/queue.rs @@ -235,6 +235,7 @@ impl State { truncate: entry.request.truncate, parameters: Some(entry.request.parameters.clone()), stopping_parameters: Some(entry.request.stopping_parameters.clone()), + top_tokens: entry.request.top_tokens, }); // Set batch_time entry.batch_time = Some(Instant::now()); @@ -323,6 +324,7 @@ mod tests { repetition_penalty: 0.0, watermark: false, }, + top_tokens: 0, stopping_parameters: StoppingCriteriaParameters { ignore_eos_token: false, max_new_tokens: 1, diff --git a/router/src/server.rs b/router/src/server.rs index 9af94951b2a..b3ba94edf73 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -193,6 +193,7 @@ async fn generate( generated_tokens: response.generated_text.generated_tokens, prefill: response.prefill, tokens: response.tokens, + top_tokens: response.top_tokens, seed: response.generated_text.seed, } }) @@ -206,6 +207,7 @@ async fn generate( tokens: response.tokens, seed: response.generated_text.seed, best_of_sequences, + top_tokens: response.top_tokens, }) } false => None, @@ -387,12 +389,16 @@ async fn generate_stream( // Prefill is ignored InferStreamResponse::Prefill(_) => {} // Yield event for every new token - InferStreamResponse::Token(token) => { + InferStreamResponse::Intermediate{ + token, + top_tokens, + } => { tracing::debug!(parent: &span, "Token: {:?}", token); // StreamResponse let stream_token = StreamResponse { token, + top_tokens: top_tokens, generated_text: None, details: None, }; @@ -402,6 +408,7 @@ async fn generate_stream( // Yield event for last token and compute timings InferStreamResponse::End { token, + top_tokens, generated_text, start, queued, @@ -453,6 +460,7 @@ async fn generate_stream( let stream_token = StreamResponse { token, + top_tokens, generated_text: Some(output_text), details }; @@ -510,6 +518,7 @@ pub async fn run( compat_return_full_text: bool, max_concurrent_requests: usize, max_best_of: usize, + max_top_tokens: u32, max_stop_sequences: usize, max_input_length: usize, max_total_tokens: usize, @@ -572,6 +581,7 @@ pub async fn run( validation_workers, tokenizer, max_best_of, + max_top_tokens, max_stop_sequences, max_input_length, max_total_tokens, diff --git a/router/src/validation.rs b/router/src/validation.rs index b784dcfde41..ca345c0bbec 100644 --- a/router/src/validation.rs +++ b/router/src/validation.rs @@ -14,6 +14,7 @@ use tracing::{instrument, Span}; pub struct Validation { /// Validation parameters max_best_of: usize, + max_top_tokens: u32, max_stop_sequences: usize, max_input_length: usize, max_total_tokens: usize, @@ -26,6 +27,7 @@ impl Validation { workers: usize, tokenizer: Option, max_best_of: usize, + max_top_tokens: u32, max_stop_sequences: usize, max_input_length: usize, max_total_tokens: usize, @@ -53,6 +55,7 @@ impl Validation { Self { max_best_of, sender, + max_top_tokens, max_stop_sequences, max_input_length, max_total_tokens, @@ -130,6 +133,7 @@ impl Validation { ) -> Result { let GenerateParameters { best_of, + top_tokens, temperature, repetition_penalty, top_k, @@ -218,6 +222,15 @@ impl Validation { } }; + let top_tokens = top_tokens + .map(|value| { + if value > self.max_top_tokens { + return Err(ValidationError::TopTokens(self.max_top_tokens, value)); + } + Ok(value) + }) + .unwrap_or(Ok(0))?; + // Check if inputs is empty if request.inputs.is_empty() { return Err(EmptyInput); @@ -263,6 +276,7 @@ impl Validation { truncate: truncate.unwrap_or(self.max_input_length) as u32, parameters, stopping_parameters, + top_tokens: top_tokens, }) } @@ -336,6 +350,7 @@ pub(crate) struct ValidGenerateRequest { pub decoder_input_details: bool, pub parameters: NextTokenChooserParameters, pub stopping_parameters: StoppingCriteriaParameters, + pub top_tokens: u32, } #[derive(Error, Debug)] @@ -344,6 +359,10 @@ pub enum ValidationError { BestOf(usize, usize), #[error("`best_of` != 1 is not allowed for this endpoint")] BestOfDisabled, + #[error("`top_tokens` must be >= 0 and <= {0}. Given: {1}")] + TopTokens(u32, u32), + #[error("`top_tokens` != 0 is not allowed for this endpoint")] + TopTokensDisabled, #[error("you must use sampling when `best_of` is > 1")] BestOfSampling, #[error("`seed` must not be set when `best_of` > 1")] @@ -390,14 +409,16 @@ mod tests { async fn test_validation_max_new_tokens() { let tokenizer = None; let max_best_of = 2; - let max_stop_sequence = 3; - let max_input_length = 4; - let max_total_tokens = 5; + let max_top_tokens = 3; + let max_stop_sequence = 4; + let max_input_length = 5; + let max_total_tokens = 6; let workers = 1; let validation = Validation::new( workers, tokenizer, max_best_of, + max_top_tokens, max_stop_sequence, max_input_length, max_total_tokens, @@ -417,9 +438,10 @@ mod tests { async fn test_validation_input_length() { let tokenizer = Some(get_tokenizer().await); let max_best_of = 2; - let max_stop_sequence = 3; - let max_input_length = 4; - let max_total_tokens = 5; + let max_tokens = 3; + let max_stop_sequence = 4; + let max_input_length = 5; + let max_total_tokens = 6; let workers = 1; let validation = Validation::new( workers, @@ -435,7 +457,7 @@ mod tests { .validate_input("Hello".to_string(), None, max_new_tokens) .await { - Err(ValidationError::MaxTotalTokens(5, 1, 10)) => (), + Err(ValidationError::MaxTotalTokens(6, 1, 10)) => (), _ => panic!("Unexpected not max new tokens"), } } diff --git a/server/pyproject.toml b/server/pyproject.toml index 3ee3351c6e0..28d1ea3bc12 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -15,21 +15,22 @@ grpcio-status = "^1.51.1" grpcio-reflection = "^1.51.1" grpc-interceptor = "^0.15.0" typer = "^0.6.1" -accelerate = { version = "^0.19.0", optional = true } -bitsandbytes = { version = "^0.38.1", optional = true } -safetensors = "0.3.1" +accelerate = { version = "^0.20.0", optional = true } +bitsandbytes = { version = "^0.41.1", optional = true } +safetensors = "^0.4.0" loguru = "^0.6.0" opentelemetry-api = "^1.15.0" opentelemetry-exporter-otlp = "^1.15.0" opentelemetry-instrumentation-grpc = "^0.36b0" hf-transfer = "^0.1.2" sentencepiece = "^0.1.97" -tokenizers = "0.13.3" -huggingface-hub = "^0.14.1" -transformers = "4.29.2" +tokenizers = "^0.13.3" +huggingface-hub = "^0.16.4" +transformers = "^4.32.2" einops = "^0.6.1" texttable = { version = "^1.6.7", optional = true } datasets = { version = "^2.14.0", optional = true } +scipy = "^1.11.1" [tool.poetry.extras] accelerate = ["accelerate"] diff --git a/server/text_generation_server/cli.py b/server/text_generation_server/cli.py index e74c03311c6..06e0862c6a7 100644 --- a/server/text_generation_server/cli.py +++ b/server/text_generation_server/cli.py @@ -161,20 +161,24 @@ def download_weights( for p in local_pt_files ] try: - from transformers import AutoConfig import transformers + import json + from huggingface_hub import hf_hub_download - config = AutoConfig.from_pretrained( - model_id, - revision=revision, - ) - architecture = config.architectures[0] + logger.info(f"is_local_model: {is_local_model}") + if is_local_model: + config_filename = os.path.join(model_id, "config.json") + else: + config_filename = hf_hub_download(model_id, revision=revision, filename="config.json") + + with open(config_filename, "r") as f: + config = json.load(f) + architecture = config["architectures"][0] class_ = getattr(transformers, architecture) # Name for this varible depends on transformers version. discard_names = getattr(class_, "_tied_weights_keys", []) - discard_names.extend(getattr(class_, "_keys_to_ignore_on_load_missing", [])) except Exception as e: discard_names = [] diff --git a/server/text_generation_server/models/causal_lm.py b/server/text_generation_server/models/causal_lm.py index cbdf480837c..7e3ea652418 100644 --- a/server/text_generation_server/models/causal_lm.py +++ b/server/text_generation_server/models/causal_lm.py @@ -12,9 +12,11 @@ PrefillTokens, Generation, GeneratedText, + TopTokens, ) from text_generation_server.pb import generate_pb2 from text_generation_server.utils import NextTokenChooser, StoppingCriteria, Sampling +from text_generation_server.utils.tokens import batch_top_tokens tracer = trace.get_tracer(__name__) @@ -42,6 +44,8 @@ class CausalLMBatch(Batch): # Generation helpers next_token_choosers: List[NextTokenChooser] stopping_criterias: List[StoppingCriteria] + top_tokens: List[int] + top_tokens_tensor: torch.Tensor # Metadata used for padding max_input_length: int @@ -72,6 +76,7 @@ def from_pb( inputs = [] next_token_choosers = [] stopping_criterias = [] + top_tokens = [] prefix_offsets = [] read_offsets = [] requests_idx_mapping = {} @@ -88,6 +93,7 @@ def from_pb( r.stopping_parameters, tokenizer ) stopping_criterias.append(stopping_criteria) + top_tokens.append(r.top_tokens) max_truncation = max(max_truncation, r.truncate) max_decode_tokens += stopping_criteria.max_new_tokens padding_right_offset = max( @@ -123,6 +129,9 @@ def from_pb( all_input_ids = tokenized_inputs["input_ids"].T.split(1, dim=1) max_tokens = len(inputs) * (max_input_length + max_decode_tokens) + top_tokens_tensor = torch.tensor( + top_tokens, device=device, dtype=torch.int64 + ) return cls( batch_id=pb.id, @@ -138,6 +147,8 @@ def from_pb( read_offsets=read_offsets, next_token_choosers=next_token_choosers, stopping_criterias=stopping_criterias, + top_tokens=top_tokens, + top_tokens_tensor=top_tokens_tensor, max_input_length=max_input_length.item(), padding_right_offset=padding_right_offset, max_tokens=max_tokens, @@ -163,6 +174,7 @@ def filter(self, request_ids: List[int]) -> Optional["CausalLMBatch"]: next_token_choosers = [] stopping_criterias = [] + top_tokens = [] total_remaining_decode_tokens = 0 new_padding_right_offset = 0 @@ -184,6 +196,8 @@ def filter(self, request_ids: List[int]) -> Optional["CausalLMBatch"]: next_token_choosers.append(self.next_token_choosers[idx]) stopping_criteria = self.stopping_criterias[idx] stopping_criterias.append(stopping_criteria) + top_tokens.append(self.top_tokens[idx]) + remaining_decode_tokens = ( stopping_criteria.max_new_tokens - stopping_criteria.current_tokens ) @@ -223,6 +237,7 @@ def filter(self, request_ids: List[int]) -> Optional["CausalLMBatch"]: layer[1] = past_values[keep_indices, :, -past_kv_length:, :] del past_values + top_tokens_tensor = self.top_tokens_tensor[keep_indices] max_tokens = len(request_ids) * max_input_length + total_remaining_decode_tokens self.requests = requests @@ -235,6 +250,8 @@ def filter(self, request_ids: List[int]) -> Optional["CausalLMBatch"]: self.read_offsets = read_offsets self.next_token_choosers = next_token_choosers self.stopping_criterias = stopping_criterias + self.top_tokens = top_tokens + self.top_tokens_tensor = top_tokens_tensor self.max_input_length = max_input_length self.padding_right_offset = new_padding_right_offset self.max_tokens = max_tokens @@ -262,6 +279,7 @@ def concatenate(cls, batches: List["CausalLMBatch"]) -> "CausalLMBatch": all_input_ids = [] next_token_choosers = [] stopping_criterias = [] + top_tokens = [] max_tokens = 0 # Batch tensors @@ -281,6 +299,7 @@ def concatenate(cls, batches: List["CausalLMBatch"]) -> "CausalLMBatch": all_input_ids.extend(batch.all_input_ids) next_token_choosers.extend(batch.next_token_choosers) stopping_criterias.extend(batch.stopping_criterias) + top_tokens.extend(batch.top_tokens) if i == 0: requests_idx_mapping = batch.requests_idx_mapping @@ -310,6 +329,12 @@ def concatenate(cls, batches: List["CausalLMBatch"]) -> "CausalLMBatch": (total_batch_size, max_input_length + padding_right_offset), ) + if top_tokens_tensor is None: + top_tokens_tensor = batches[0].top_tokens_tensor.new_zeros( + total_batch_size, + ) + top_tokens_tensor[start_index:end_index] = batch.top_tokens_tensor + # We need to slice the attention mask to remove padding from previous steps # and to remove unused allocated space left_offset = max_input_length - batch.max_input_length @@ -438,6 +463,8 @@ def concatenate(cls, batches: List["CausalLMBatch"]) -> "CausalLMBatch": read_offsets=read_offsets, next_token_choosers=next_token_choosers, stopping_criterias=stopping_criterias, + top_tokens=top_tokens, + top_tokens_tensor=top_tokens_tensor, max_input_length=max_input_length, padding_right_offset=padding_right_offset, keys_head_dim_last=batches[0].keys_head_dim_last, @@ -545,6 +572,12 @@ def generate_token( batch.past_key_values, ) + batch_top_token_ids, batch_top_token_logprobs = batch_top_tokens( + batch.top_tokens, + batch.top_tokens_tensor, + torch.softmax(logits[:, -1], -1), + ) + # Results generations: List[Generation] = [] stopped = True @@ -559,6 +592,9 @@ def generate_token( batch.next_token_choosers, batch.stopping_criterias, batch.all_input_ids, + batch.top_tokens, + batch_top_token_ids, + batch_top_token_logprobs, ) # For each member of the batch @@ -571,6 +607,9 @@ def generate_token( next_token_chooser, stopping_criteria, all_input_ids, + top_tokens, + top_token_ids, + top_token_logprobs, ) in enumerate(iterator): # Select next token next_token_id, logprobs = next_token_chooser( @@ -637,6 +676,24 @@ def generate_token( else: prefill_tokens = None + if top_tokens > 0: + toptoken_texts = self.tokenizer.batch_decode( + top_token_ids, + clean_up_tokenization_spaces=False, + skip_special_tokens=False, + ) + special_toptokens = [ + token_id in self.all_special_ids for token_id in top_token_ids + ] + top_tokens_obj = TopTokens( + top_token_ids, + top_token_logprobs, + toptoken_texts, + special_toptokens, + ) + else: + top_tokens_obj = None + generation = Generation( request.id, prefill_tokens, @@ -645,6 +702,7 @@ def generate_token( next_token_text, next_token_id_squeezed.item() in self.all_special_ids, generated_text, + top_tokens_obj, ) generations.append(generation) diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py index 7de51358c8d..c8fe1061460 100644 --- a/server/text_generation_server/models/flash_causal_lm.py +++ b/server/text_generation_server/models/flash_causal_lm.py @@ -16,10 +16,12 @@ PrefillTokens, Generation, GeneratedText, + TopTokens ) from text_generation_server.pb import generate_pb2 from text_generation_server.utils import StoppingCriteria, HeterogeneousNextTokenChooser from text_generation_server.utils.dist import MEMORY_FRACTION +from text_generation_server.utils.tokens import batch_top_tokens tracer = trace.get_tracer(__name__) @@ -165,6 +167,8 @@ class FlashCausalLMBatch(Batch): # Generation helpers next_token_chooser: HeterogeneousNextTokenChooser stopping_criterias: List[StoppingCriteria] + top_tokens: List[int] + top_tokens_tensor: torch.Tensor # Number of blocks in this batch blocks: int @@ -217,6 +221,7 @@ def from_pb( next_token_chooser_parameters = [] stopping_criterias = [] + top_tokens = [] # Cumulative length cumulative_length = 0 @@ -259,6 +264,7 @@ def from_pb( ) max_new_tokens = stopping_criteria.max_new_tokens stopping_criterias.append(stopping_criteria) + top_tokens.append(r.top_tokens) # Paged attention # Remove one as the first token des not have a past @@ -353,6 +359,10 @@ def from_pb( prefill_next_token_indices, dtype=torch.int64, device=device ) + top_tokens_tensor = torch.tensor( + top_tokens, device=device, dtype=torch.int64 + ) + return cls( batch_id=pb.id, requests=pb.requests, @@ -378,6 +388,8 @@ def from_pb( all_input_ids_tensor=all_input_ids_tensor, next_token_chooser=next_token_chooser, stopping_criterias=stopping_criterias, + top_tokens=top_tokens, + top_tokens_tensor=top_tokens_tensor, blocks=blocks, max_blocks=max_blocks, ) @@ -417,6 +429,7 @@ def filter(self, request_ids: List[int]) -> "FlashCausalLMBatch": read_offsets = [] stopping_criterias = [] + top_tokens = [] blocks = 0 max_blocks = 0 @@ -443,6 +456,8 @@ def filter(self, request_ids: List[int]) -> "FlashCausalLMBatch": stopping_criteria = self.stopping_criterias[idx] stopping_criterias.append(stopping_criteria) + top_tokens.append(self.top_tokens[idx]) + remaining_tokens = ( stopping_criteria.max_new_tokens - stopping_criteria.current_tokens ) @@ -487,6 +502,7 @@ def filter(self, request_ids: List[int]) -> "FlashCausalLMBatch": input_lengths_tensor = self.input_lengths_tensor[indices] slots = self.slots[slot_filtering_indices] next_token_chooser = self.next_token_chooser.filter(indices) + top_tokens_tensor = self.top_tokens_tensor[indices] start_slots = torch.tensor(start_slots, dtype=torch.int64) @@ -518,6 +534,8 @@ def filter(self, request_ids: List[int]) -> "FlashCausalLMBatch": all_input_ids_tensor=all_input_ids_tensor, next_token_chooser=next_token_chooser, stopping_criterias=stopping_criterias, + top_tokens=top_tokens, + top_tokens_tensor=top_tokens_tensor, blocks=blocks, max_blocks=max_blocks, ) @@ -567,6 +585,10 @@ def concatenate(cls, batches: List["FlashCausalLMBatch"]) -> "FlashCausalLMBatch (total_batch_size, max_length) ) + top_tokens_tensor = batches[0].top_tokens_tensor.new_zeros( + total_batch_size, + ) + start_slots = [] block_tables = [] all_input_ids = [] @@ -577,6 +599,7 @@ def concatenate(cls, batches: List["FlashCausalLMBatch"]) -> "FlashCausalLMBatch next_token_chooser_parameters = [] stopping_criterias = [] + top_tokens = [] # Cumulative length cumulative_batch_size = 0 @@ -601,6 +624,7 @@ def concatenate(cls, batches: List["FlashCausalLMBatch"]) -> "FlashCausalLMBatch input_ids[start_index:end_index] = batch.input_ids position_ids[start_index:end_index] = batch.position_ids slot_indices[start_index:end_index] = batch.slot_indices + cumulative_slots + top_tokens_tensor[start_index:end_index] = batch.top_tokens_tensor input_lengths_tensor[start_index:end_index] = batch.input_lengths_tensor slots[slots_start_index:slots_end_index] = batch.slots @@ -623,6 +647,7 @@ def concatenate(cls, batches: List["FlashCausalLMBatch"]) -> "FlashCausalLMBatch next_token_chooser_parameters.extend([r.parameters for r in batch.requests]) stopping_criterias.extend(batch.stopping_criterias) + top_tokens.extend(batch.top_tokens) # Update cumulative_batch_size += len(batch) @@ -666,6 +691,8 @@ def concatenate(cls, batches: List["FlashCausalLMBatch"]) -> "FlashCausalLMBatch all_input_ids_tensor=all_input_ids_tensor, next_token_chooser=next_token_chooser, stopping_criterias=stopping_criterias, + top_tokens=top_tokens, + top_tokens_tensor=top_tokens_tensor, blocks=blocks, max_blocks=max_blocks, ) @@ -831,10 +858,14 @@ def generate_token( else: next_token_logits = out - next_input_ids, next_token_logprobs = batch.next_token_chooser( + next_input_ids, next_token_logprobs, logprobs = batch.next_token_chooser( batch.all_input_ids_tensor[:, : batch.max_seqlen], next_token_logits ) + batch_top_token_ids, batch_top_token_logprobs = batch_top_tokens( + batch.top_tokens, batch.top_tokens_tensor, logprobs + ) + if prefill: if len(batch) > 1 and prefill_logprobs: # We create the prefill_tokens_indices tensor that will be used to gather prefill logprobs @@ -931,8 +962,11 @@ def generate_token( batch.all_input_ids, batch.next_token_chooser.do_sample, batch.next_token_chooser.seeds, + batch.top_tokens, next_token_ids, next_token_logprobs, + batch_top_token_ids, + batch_top_token_logprobs, ) # For each member of the batch @@ -945,8 +979,11 @@ def generate_token( all_input_ids, do_sample, seed, + top_tokens, next_token_id, next_token_logprob, + top_token_ids, + top_token_logprobs, ) in enumerate(iterator): # Append next token to all tokens all_input_ids.append(next_token_id) @@ -1005,6 +1042,24 @@ def generate_token( else: prefill_tokens = None + if top_tokens > 0: + toptoken_texts = self.tokenizer.batch_decode( + top_token_ids, + clean_up_tokenization_spaces=False, + skip_special_tokens=False, + ) + special_toptokens = [ + token_id in self.all_special_ids for token_id in top_token_ids + ] + top_tokens_obj = TopTokens( + top_token_ids, + top_token_logprobs, + toptoken_texts, + special_toptokens, + ) + else: + top_tokens_obj = None + generation = Generation( request.id, prefill_tokens, @@ -1013,6 +1068,7 @@ def generate_token( next_token_text, next_token_id in self.all_special_ids, generated_text, + top_tokens_obj, ) generations.append(generation) diff --git a/server/text_generation_server/models/seq2seq_lm.py b/server/text_generation_server/models/seq2seq_lm.py index 9e5c21d1542..5422236d4ed 100644 --- a/server/text_generation_server/models/seq2seq_lm.py +++ b/server/text_generation_server/models/seq2seq_lm.py @@ -11,9 +11,11 @@ Batch, Generation, PrefillTokens, + TopTokens ) from text_generation_server.pb import generate_pb2 from text_generation_server.utils import NextTokenChooser, StoppingCriteria, Sampling +from text_generation_server.utils.tokens import batch_top_tokens tracer = trace.get_tracer(__name__) @@ -51,6 +53,8 @@ class Seq2SeqLMBatch(Batch): # Metadata used for padding max_input_length: int + top_tokens: List[int] + top_tokens_tensor: torch.Tensor max_decoder_input_length: int padding_right_offset: int @@ -78,6 +82,7 @@ def from_pb( inputs = [] next_token_choosers = [] stopping_criterias = [] + top_tokens = [] decoder_input_lengths = [] prefix_offsets = [] @@ -97,6 +102,7 @@ def from_pb( r.stopping_parameters, tokenizer ) stopping_criterias.append(stopping_criteria) + top_tokens.append(r.top_tokens) max_truncation = max(max_truncation, r.truncate) max_decode_tokens += stopping_criteria.max_new_tokens padding_right_offset = max( @@ -127,6 +133,9 @@ def from_pb( read_offsets.append(1) all_decoder_input_ids = decoder_input_ids.view(-1).split(1) + top_tokens_tensor = torch.tensor( + top_tokens, device=device, dtype=torch.int64 + ) max_tokens = len(inputs) * (max_input_length + max_decode_tokens) return cls( @@ -146,6 +155,8 @@ def from_pb( read_offsets=read_offsets, next_token_choosers=next_token_choosers, stopping_criterias=stopping_criterias, + top_tokens=top_tokens, + top_tokens_tensor=top_tokens_tensor, max_input_length=max_input_length.item(), max_decoder_input_length=1, padding_right_offset=padding_right_offset, @@ -173,6 +184,7 @@ def filter(self, request_ids: List[int]) -> Optional["Seq2SeqLMBatch"]: next_token_choosers = [] stopping_criterias = [] + top_tokens = [] max_input_length = 0 max_decoder_input_length = 0 @@ -204,6 +216,7 @@ def filter(self, request_ids: List[int]) -> Optional["Seq2SeqLMBatch"]: next_token_choosers.append(self.next_token_choosers[idx]) stopping_criteria = self.stopping_criterias[idx] stopping_criterias.append(stopping_criteria) + top_tokens.append(self.top_tokens[idx]) remaining_decode_tokens = ( stopping_criteria.max_new_tokens - stopping_criteria.current_tokens ) @@ -239,6 +252,7 @@ def filter(self, request_ids: List[int]) -> Optional["Seq2SeqLMBatch"]: layer[2] = layer[2][keep_indices, :, -max_input_length:] layer[3] = layer[3][keep_indices, :, -max_input_length:] + top_tokens_tensor = self.top_tokens_tensor[keep_indices] max_tokens = ( len(request_ids) * (max_input_length + max_decoder_input_length) + remaining_decode_tokens @@ -254,6 +268,8 @@ def filter(self, request_ids: List[int]) -> Optional["Seq2SeqLMBatch"]: self.read_offsets = read_offsets self.next_token_choosers = next_token_choosers self.stopping_criterias = stopping_criterias + self.top_tokens = top_tokens + self.top_tokens_tensor = top_tokens_tensor self.max_input_length = max_input_length self.max_decoder_input_length = max_decoder_input_length self.padding_right_offset = padding_right_offset @@ -289,6 +305,7 @@ def concatenate(cls, batches: List["Seq2SeqLMBatch"]) -> "Seq2SeqLMBatch": read_offsets = [] next_token_choosers = [] stopping_criterias = [] + top_tokens = [] max_tokens = 0 # Batch tensors @@ -312,6 +329,7 @@ def concatenate(cls, batches: List["Seq2SeqLMBatch"]) -> "Seq2SeqLMBatch": read_offsets.extend(batch.read_offsets) next_token_choosers.extend(batch.next_token_choosers) stopping_criterias.extend(batch.stopping_criterias) + top_tokens.extend(batch.top_tokens) if i == 0: requests_idx_mapping = batch.requests_idx_mapping @@ -384,6 +402,12 @@ def concatenate(cls, batches: List["Seq2SeqLMBatch"]) -> "Seq2SeqLMBatch": ), ) + if top_tokens_tensor is None: + top_tokens_tensor = batches[0].top_tokens_tensor.new_zeros( + total_batch_size, + ) + top_tokens_tensor[start_index:end_index] = batch.top_tokens_tensor + # Copy to correct indices encoder_last_hidden_state[ start_index:end_index, -batch.max_input_length :, : @@ -488,6 +512,8 @@ def concatenate(cls, batches: List["Seq2SeqLMBatch"]) -> "Seq2SeqLMBatch": read_offsets=read_offsets, next_token_choosers=next_token_choosers, stopping_criterias=stopping_criterias, + top_tokens=top_tokens, + top_tokens_tensor=top_tokens_tensor, max_input_length=max_input_length, max_decoder_input_length=max_decoder_input_length, padding_right_offset=padding_right_offset, @@ -613,6 +639,12 @@ def generate_token( batch.past_key_values, ) + batch_top_token_ids, batch_top_token_logprobs = batch_top_tokens( + batch.top_tokens, + batch.top_tokens_tensor, + torch.softmax(logits[:, -1], -1), + ) + # Finished requests generations: List[Generation] = [] stopped = True @@ -628,6 +660,9 @@ def generate_token( batch.next_token_choosers, batch.stopping_criterias, batch.all_decoder_input_ids, + batch.top_tokens, + batch_top_token_ids, + batch_top_token_logprobs, ) # For each member of the batch @@ -641,6 +676,9 @@ def generate_token( next_token_chooser, stopping_criteria, all_decoder_input_ids, + top_tokens, + top_token_ids, + top_token_logprobs, ) in enumerate(iterator): # Select next token next_token_id, logprobs = next_token_chooser( @@ -698,6 +736,24 @@ def generate_token( else: prefill_tokens = None + if top_tokens > 0: + toptoken_texts = self.tokenizer.batch_decode( + top_token_ids, + clean_up_tokenization_spaces=False, + skip_special_tokens=False, + ) + special_toptokens = [ + token_id in self.all_special_ids for token_id in top_token_ids + ] + top_tokens_obj = TopTokens( + top_token_ids, + top_token_logprobs, + toptoken_texts, + special_toptokens, + ) + else: + top_tokens_obj = None + generation = Generation( request.id, prefill_tokens, @@ -706,6 +762,7 @@ def generate_token( next_token_text, next_token_id_squeezed.item() in self.all_special_ids, generated_text, + top_tokens_obj, ) generations.append(generation) diff --git a/server/text_generation_server/models/types.py b/server/text_generation_server/models/types.py index 28ca8147eb9..98ad95c2859 100644 --- a/server/text_generation_server/models/types.py +++ b/server/text_generation_server/models/types.py @@ -71,6 +71,24 @@ def __len__(self): return len(self.token_ids) +@dataclass +class TopTokens: + token_ids: List[int] + logprobs: List[float] + texts: List[str] + is_special: List[bool] + + def to_pb(self) -> generate_pb2.TopTokens: + return generate_pb2.TopTokens( + ids=self.token_ids, + logprobs=self.logprobs, + texts=self.texts, + is_special=self.is_special, + ) + + def __len__(self): + return len(self.token_ids) + @dataclass class Generation: request_id: int @@ -80,6 +98,8 @@ class Generation: token_text: str token_is_special: bool generated_text: Optional[GeneratedText] + # Optional for now, since it's not yet supported for every model. + top_tokens: Optional[TopTokens] def to_pb(self) -> generate_pb2.Generation: return generate_pb2.Generation( @@ -94,4 +114,5 @@ def to_pb(self) -> generate_pb2.Generation: generated_text=self.generated_text.to_pb() if self.generated_text is not None else None, + top_tokens=self.top_tokens.to_pb() if self.top_tokens is not None else None, ) diff --git a/server/text_generation_server/utils/convert.py b/server/text_generation_server/utils/convert.py index 8d414ecac91..0b62f520836 100644 --- a/server/text_generation_server/utils/convert.py +++ b/server/text_generation_server/utils/convert.py @@ -29,9 +29,15 @@ def _remove_duplicate_names( [name for name in shared if _is_complete(state_dict[name])] ) if not complete_names: - raise RuntimeError( - f"Error while trying to find names to remove to save state dict, but found no suitable name to keep for saving amongst: {shared}. None is covering the entire storage.Refusing to save/load the model since you could be storing much more memory than needed. Please refer to https://huggingface.co/docs/safetensors/torch_shared_tensors for more information. Or open an issue." - ) + if len(shared) == 1: + # Force contiguous + name = list(shared)[0] + state_dict[name] = state_dict[name].clone() + complete_names = {name} + else: + raise RuntimeError( + f"Error while trying to find names to remove to save state dict, but found no suitable name to keep for saving amongst: {shared}. None is covering the entire storage.Refusing to save/load the model since you could be storing much more memory than needed. Please refer to https://huggingface.co/docs/safetensors/torch_shared_tensors for more information. Or open an issue." + ) keep_name = sorted(list(complete_names))[0] diff --git a/server/text_generation_server/utils/tokens.py b/server/text_generation_server/utils/tokens.py index b83af59150f..5b8400f2136 100644 --- a/server/text_generation_server/utils/tokens.py +++ b/server/text_generation_server/utils/tokens.py @@ -229,11 +229,11 @@ def __call__(self, input_ids: torch.Tensor, scores: torch.Tensor): scores = warper(input_ids, scores) next_ids = self.choice(scores) - next_logprobs = torch.gather( - torch.log_softmax(scores, -1), 1, next_ids.view(-1, 1) - ).view(-1) + logprobs = torch.log_softmax(scores, -1) + next_logprobs = torch.gather(logprobs, 1, next_ids.view(-1, 1)).view(-1) + return next_ids, next_logprobs, logprobs + - return next_ids, next_logprobs def filter(self, indices): if self.watermark_processor is not None: @@ -339,3 +339,59 @@ def filter(self, indices): self.greedy_indices = new_greedy_indices self.sampling_mapping = new_sampling_mapping return self + + +def batch_top_tokens( + top_tokens: list[int], top_tokens_tensor: torch.Tensor, logprobs: torch.Tensor +) -> Tuple[List[List[int]], List[List[float]]]: + """Find the top n most likely tokens for a batch of generations. + When multiple tokens have equal probabilities and they don't all fit, the + remaining tokens are also returned. + + Basically copied from HF's original repo to save some time + + Args: + top_tokens: List specifying the number of top tokens to retrieve for each item in the batch. + top_tokens_tensor: Torch tensor equivalent of top_tokens for use in tensor operations. + logprobs: Torch tensor of log probabilities, shape (batch_size, vocab_size). + + Returns: + A tuple containing two lists: + 1. The indices of the top tokens for each logprob tensor in the batch. + 2. The values of the top tokens for each logprob tensor in the batch. + """ + max_top_n = max(top_tokens) + # Early exit when top_tokens is not used + if max_top_n == 0: + return [[]] * len(top_tokens), [[]] * len(top_tokens) + + # Ensure top_n doesn't exceed vocab size + top_tokens = [min(tok, logprobs.size(-1)) for tok in top_tokens] + + # From https://discuss.pytorch.org/t/how-to-efficiently-get-the-k-th-largest-values-in-parallel/160529/2 + # Sorted topk is faster than torch.sort() since we only need a small subset + sorted_top_k = torch.topk(logprobs, k=max_top_n, dim=1, sorted=True).values + nth_highest = torch.gather( + sorted_top_k, 1, (top_tokens_tensor - 1).clip(min=0).unsqueeze(1) + ) + nth_highest[nth_highest == -float("inf")] = torch.finfo(logprobs.dtype).min + + # Find the new "fuzzy" top n values + top_n_indices = (logprobs >= nth_highest).nonzero() + _, top_n_ishes = torch.unique_consecutive(top_n_indices[:, 0], return_counts=True) + + top_k = torch.topk(logprobs, k=top_n_ishes.max(), dim=1, sorted=True) + top_n_ishes = top_n_ishes.tolist() + top_indices = top_k.indices.tolist() + top_values = top_k.values.tolist() + + return ( + [ + idxs[:n] if req_n > 0 else [] + for idxs, n, req_n in zip(top_indices, top_n_ishes, top_tokens) + ], + [ + vals[:n] if req_n > 0 else [] + for vals, n, req_n in zip(top_values, top_n_ishes, top_tokens) + ], + ) \ No newline at end of file From 2ee704adb0a90bc95caaaadce66d82eb3108cbc9 Mon Sep 17 00:00:00 2001 From: Patrick Fernandes Date: Tue, 7 Nov 2023 17:09:10 +0000 Subject: [PATCH 40/40] Fix updating command in README.md --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 0d459b75f53..9ef56cb6be9 100644 --- a/README.md +++ b/README.md @@ -57,9 +57,10 @@ In general, you don't have to recreate the environment every time you want to up To just update the library, run in the base directory (in a previously created environment) ```shell -OPENSSL_DIR=.openssl \ -OPENSSL_LIB_DIR=.openssl/lib \ -OPENSSL_INCLUDE_DIR=.openssl/include \ +export DIR=`pwd` +OPENSSL_DIR=${DIR}/.openssl \ +OPENSSL_LIB_DIR=${DIR}/.openssl/lib \ +OPENSSL_INCLUDE_DIR=${DIR}/.openssl/include \ BUILD_EXTENSIONS=false \ make install ```