diff --git a/.config/1espt/PipelineAutobaseliningConfig.yml b/.config/1espt/PipelineAutobaseliningConfig.yml index a46c8ef6dc..134d2262d5 100644 --- a/.config/1espt/PipelineAutobaseliningConfig.yml +++ b/.config/1espt/PipelineAutobaseliningConfig.yml @@ -1,21 +1,3 @@ ## DO NOT MODIFY THIS FILE MANUALLY. This is part of auto-baselining from 1ES Pipeline Templates. Go to [https://aka.ms/1espt-autobaselining] for more details. pipelines: - 1110: - retail: - binary: - credscan: - lastModifiedDate: 2024-03-06 - binskim: - lastModifiedDate: 2024-03-06 - spotbugs: - lastModifiedDate: 2024-03-06 - source: - credscan: - lastModifiedDate: 2024-03-06 - eslint: - lastModifiedDate: 2024-03-06 - psscriptanalyzer: - lastModifiedDate: 2024-03-06 - armory: - lastModifiedDate: 2024-03-06 diff --git a/.config/guardian/.gdnbaselines b/.config/guardian/.gdnbaselines deleted file mode 100644 index afb198073d..0000000000 --- a/.config/guardian/.gdnbaselines +++ /dev/null @@ -1,93 +0,0 @@ -{ - "properties": { - "helpUri": "https://eng.ms/docs/microsoft-security/security/azure-security/cloudai-security-fundamentals-engineering/security-integration/guardian-wiki/microsoft-guardian/general/baselines" - }, - "version": "1.0.0", - "baselines": { - "default": { - "name": "default", - "createdDate": "2024-03-06 21:08:31Z", - "lastUpdatedDate": "2024-03-06 21:08:31Z" - } - }, - "results": { - "31128318971be3d77cbd3aaf7b6a06d65b1874334a143ee500c7fccb5aa89427": { - "signature": "31128318971be3d77cbd3aaf7b6a06d65b1874334a143ee500c7fccb5aa89427", - "alternativeSignatures": [ - "9106dc3b9a335702dc4feeeed54285f07d8a06494f38fc23167f6158793928dc" - ], - "target": "eng/common/SetupNugetSources.ps1", - "line": 38, - "memberOf": [ - "default" - ], - "tool": "psscriptanalyzer", - "ruleId": "PSAvoidUsingUsernameAndPasswordParams", - "createdDate": "2024-03-06 21:08:31Z", - "expirationDate": "2024-08-23 23:30:43Z", - "justification": "This error is baselined with an expiration date of 180 days from 2024-03-06 23:30:43Z" - }, - "992b26983b997813a410dfc25048f3b218c6fc02fc14a5c2ad431ec8e022ac79": { - "signature": "992b26983b997813a410dfc25048f3b218c6fc02fc14a5c2ad431ec8e022ac79", - "alternativeSignatures": [ - "23e97da32b7142c282727c96d07fd5ce6aefd6ef26f02e91cb471eb7863542f8" - ], - "target": "eng/common/SetupNugetSources.ps1", - "line": 56, - "memberOf": [ - "default" - ], - "tool": "psscriptanalyzer", - "ruleId": "PSAvoidUsingUsernameAndPasswordParams", - "createdDate": "2024-03-06 21:08:31Z", - "expirationDate": "2024-08-23 23:30:43Z", - "justification": "This error is baselined with an expiration date of 180 days from 2024-03-06 23:30:43Z" - }, - "53b10a5fb6059b0b229ad32c6278123a5603386f65d9e1c5684a2333f2e1dc62": { - "signature": "53b10a5fb6059b0b229ad32c6278123a5603386f65d9e1c5684a2333f2e1dc62", - "alternativeSignatures": [ - "cd7b0b0937cfa32a98962a528bd99ede0181ae41a609df430f35fd30763166c4" - ], - "target": "eng/common/SetupNugetSources.ps1", - "line": 88, - "memberOf": [ - "default" - ], - "tool": "psscriptanalyzer", - "ruleId": "PSAvoidUsingUsernameAndPasswordParams", - "createdDate": "2024-03-06 21:08:31Z", - "expirationDate": "2024-08-23 23:30:43Z", - "justification": "This error is baselined with an expiration date of 180 days from 2024-03-06 23:30:43Z" - }, - "2c5f3fa8b37f6dfb1ec7cb1bc64d39a43a9a0184f317d7bd5811d734da9c8626": { - "signature": "2c5f3fa8b37f6dfb1ec7cb1bc64d39a43a9a0184f317d7bd5811d734da9c8626", - "alternativeSignatures": [ - "795ef944edceb1b07d6dd64cd3cc30a0d4d874a6dc6f5bc6f6834d2cdcef5e75" - ], - "target": "artifacts/pkgassets/Microsoft.ML.Mkl.Redist/runtimes/win-x86/native/MklImports.dll", - "memberOf": [ - "default" - ], - "tool": "binskim", - "ruleId": "BA2008", - "createdDate": "2024-03-06 21:13:53Z", - "expirationDate": "2024-08-23 23:30:43Z", - "justification": "This error is baselined with an expiration date of 180 days from 2024-03-06 23:30:43Z" - }, - "17d4115eadce781703d1e090f3c05e73f84fbbab513a1d4c8cd60b54dc8efe8c": { - "signature": "17d4115eadce781703d1e090f3c05e73f84fbbab513a1d4c8cd60b54dc8efe8c", - "alternativeSignatures": [ - "be452f644ec14427721109f8264e8074b2a0276ec71a0cd72e41ccbe33094c7f" - ], - "target": "artifacts/pkgassets/Microsoft.ML.Mkl.Redist/runtimes/win-x64/native/MklImports.dll", - "memberOf": [ - "default" - ], - "tool": "binskim", - "ruleId": "BA2008", - "createdDate": "2024-03-06 21:36:33Z", - "expirationDate": "2024-08-23 23:30:43Z", - "justification": "This error is baselined with an expiration date of 180 days from 2024-03-06 23:30:43Z" - } - } -} \ No newline at end of file diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000..9798d83002 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,10 @@ +# Configure regular dependency updates +# Please see the documentation for all configuration options: +# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: "nuget" + directory: "/" # Location of package manifests + schedule: + interval: "weekly" diff --git a/.github/workflows/copilot-setup-steps.yml b/.github/workflows/copilot-setup-steps.yml new file mode 100644 index 0000000000..bf5b939eaf --- /dev/null +++ b/.github/workflows/copilot-setup-steps.yml @@ -0,0 +1,35 @@ +name: "Copilot Setup Steps" + +# Allow testing of the setup steps from your repository's "Actions" tab. +on: workflow_dispatch + +jobs: + # The job MUST be called `copilot-setup-steps` or it will not be picked up by Copilot. + # See https://docs.github.com/en/copilot/customizing-copilot/customizing-the-development-environment-for-copilot-coding-agent + copilot-setup-steps: + runs-on: ubuntu-latest + + permissions: + contents: read + + # You can define any steps you want, and they will run before the agent starts. + # If you do not check out your code, Copilot will do this for you. + steps: + - uses: actions/checkout@v4.2.2 + with: + submodules: recursive + + - name: Install Dependencies + run: | + sudo ./eng/common/native/install-dependencies.sh && \ + sudo apt-get install -qq -y \ + libomp-dev + + - name: Build + run: ./build.sh + + - name: Put dotnet on the path + run: echo "PATH=$PWD/.dotnet:$PATH" >> $GITHUB_ENV + + - name: Run dotnet info + run: dotnet --info diff --git a/Directory.Build.targets b/Directory.Build.targets index f310d751cd..e086787dc5 100644 --- a/Directory.Build.targets +++ b/Directory.Build.targets @@ -5,7 +5,7 @@ - + diff --git a/Directory.Packages.props b/Directory.Packages.props new file mode 100644 index 0000000000..5e403100a0 --- /dev/null +++ b/Directory.Packages.props @@ -0,0 +1,119 @@ + + + + + true + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/build/Codecoverage.proj b/build/Codecoverage.proj index 1ca3a078f2..ac14ce9743 100644 --- a/build/Codecoverage.proj +++ b/build/Codecoverage.proj @@ -6,8 +6,8 @@ - - + + diff --git a/build/ci/job-template.yml b/build/ci/job-template.yml index 6d0f8f3bd6..8ee9572fe9 100644 --- a/build/ci/job-template.yml +++ b/build/ci/job-template.yml @@ -66,13 +66,12 @@ jobs: steps: # Extra MacOS step required to install OS-specific dependencies - - ${{ if and(contains(parameters.pool.vmImage, 'macOS'), not(contains(parameters.name, 'cross'))) }}: - - script: export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=TRUE && brew install $(Build.SourcesDirectory)/build/libomp.rb --build-from-source --formula + - ${{ if contains(parameters.pool.vmImage, 'macOS') }}: + - script: | + $(Build.SourcesDirectory)/eng/common/native/install-dependencies.sh osx + brew install libomp + brew link libomp --force displayName: Install MacOS build dependencies - # Extra Apple MacOS step required to install OS-specific dependencies - - ${{ if and(contains(parameters.pool.vmImage, 'macOS'), contains(parameters.name, 'cross')) }}: - - script: brew update && brew install -f --overwrite python@3.13 && brew install libomp && brew link libomp --force - displayName: Install MacOS ARM build dependencies - ${{ if and( eq(parameters.nightlyBuild, 'true'), eq(parameters.pool.vmImage, 'ubuntu-18.04')) }}: - bash: echo "##vso[task.setvariable variable=LD_LIBRARY_PATH]$(nightlyBuildRunPath):$LD_LIBRARY_PATH" displayName: Set LD_LIBRARY_PATH for Ubuntu and CentOS to locate Native shared library in current running path diff --git a/build/vsts-ci.yml b/build/vsts-ci.yml index a7ccf2f8cd..c36a1cc22a 100644 --- a/build/vsts-ci.yml +++ b/build/vsts-ci.yml @@ -28,7 +28,7 @@ variables: - name: LinuxImage value: 1es-ubuntu-2204 - name: WindowsImage - value: 1es-windows-2019 + value: 1es-windows-2022 - name: MacImage value: macOS-13 - ${{ if and(notin(variables['Build.Reason'], 'PullRequest'), eq(variables['Build.SourceBranch'], 'refs/heads/main')) }}: @@ -56,6 +56,8 @@ extends: enabled: true tsa: enabled: true + settings: + networkIsolationPolicy: Permissive,CFSClean pool: name: $(DncEngInternalBuildPool) image: $(WindowsImage) @@ -142,7 +144,10 @@ extends: PathtoPublish: $(Build.SourcesDirectory)/artifacts/pkgassets ArtifactName: pkgassets steps: - - script: export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 && rm '/usr/local/bin/2to3-3.11' && brew install $(Build.SourcesDirectory)/build/libomp.rb --build-from-source --formula + - script: | + $(Build.SourcesDirectory)/eng/common/native/install-dependencies.sh osx + brew install libomp + brew link libomp --force displayName: Install build dependencies # Only build native assets to avoid conflicts. - script: ./build.sh -projects $(Build.SourcesDirectory)/src/Native/Native.proj -configuration $(BuildConfig) /p:TargetArchitecture=x64 /p:CopyPackageAssets=true @@ -161,12 +166,10 @@ extends: PathtoPublish: $(Build.SourcesDirectory)/artifacts/pkgassets ArtifactName: pkgassets steps: - # Work around MacOS Homebrew image/environment bug: https://github.com/actions/virtual-environments/issues/2322#issuecomment-749211076 - - script: | - rm -rf /usr/local/bin/2to3 - displayName: MacOS Homebrew bug Workaround - continueOnError: true - - script: brew update && brew install -f --overwrite python@3.13 && brew install libomp && brew link libomp --force + - script: | + $(Build.SourcesDirectory)/eng/common/native/install-dependencies.sh osx + brew install libomp + brew link libomp --force displayName: Install build dependencies # Only build native assets to avoid conflicts. - script: ./build.sh -projects $(Build.SourcesDirectory)/src/Native/Native.proj -configuration $(BuildConfig) /p:TargetArchitecture=arm64 /p:CopyPackageAssets=true diff --git a/docs/gen-ai/CausalLMPipeline.md b/docs/gen-ai/CausalLMPipeline.md index aeab451ae4..a1c61c2bcd 100644 --- a/docs/gen-ai/CausalLMPipeline.md +++ b/docs/gen-ai/CausalLMPipeline.md @@ -20,11 +20,11 @@ public abstract class CausalLMPipeline bool echo = false); // echo the input token ids in the output token ids } -public CasualLMPipeline : CausalLMPipeline +public CausalLMPipeline : CausalLMPipeline where TTokenizer : ITokenizer where TCausalLM : nn.Module { - public CausalLMPipeline Create(LLama2Tokenizer tokenizer, Phi3ForCasualLM model); + public CausalLMPipeline Create(LLama2Tokenizer tokenizer, Phi3ForCausalLM model); } ``` @@ -105,7 +105,7 @@ The extension `Generate` method provides a even-easier way to generate text with ```C# public static string Generate( - this CasualLMPipeline pipeline, + this CausalLMPipeline pipeline, string prompt, int maxLen = 128, float temperature = 0.7f, diff --git a/docs/gen-ai/Usage.md b/docs/gen-ai/Usage.md index 611d5fb22e..40b86a3282 100644 --- a/docs/gen-ai/Usage.md +++ b/docs/gen-ai/Usage.md @@ -7,9 +7,9 @@ This document shows how to use the causal language model API for text generation ```C# var pathToPhi3 = "path/to/phi3"; var tokenizer = LLama2Tokenizer.FromPretrained(pathToPhi3); -var phi3CausalModel = Phi3ForCasualLM.FromPretrained(pathToPhi3); +var phi3CausalModel = Phi3ForCausalLM.FromPretrained(pathToPhi3); -CausalLMPipeline pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel); +CausalLMPipeline pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel); var prompt = "<|user|>Once upon a time<|end|>"; var output = pipeline.Generate( @@ -24,8 +24,8 @@ In most cases, developers would like to consume the model in a uniformed way. In ```C# var pathToPhi3 = "path/to/phi3"; var tokenizer = LLama2Tokenizer.FromPretrained(pathToPhi3); -var phi3CausalModel = Phi3ForCasualLM.FromPretrained(pathToPhi3); -CausalLMPipeline pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel); +var phi3CausalModel = Phi3ForCausalLM.FromPretrained(pathToPhi3); +CausalLMPipeline pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel); var kernel = Kernel.CreateBuilder() // the type of the tokenizer and the model are explicitly specified // here for clarity, but the compiler can infer them @@ -33,7 +33,7 @@ var kernel = Kernel.CreateBuilder() // The reason why we don't want to allow developers to pass an arbitrary CausalLMPipeline is because // - the model and the tokenizer must be compatible // - the chat template must be compatible with the model. e.g. In `AddPhi3AsChatCompletionService`, the chat template is fixed to "<|user|>{prompt}<|end|>" - .AddPhi3AsChatCompletionService(pipeline) + .AddPhi3AsChatCompletionService(pipeline) .Build(); ``` @@ -42,7 +42,7 @@ Similarly, developers would also like to consume the language model like agent. ```C# var pathToPhi3 = "path/to/phi3"; var tokenizer = LLama2Tokenizer.FromPretrained(pathToPhi3); -var phi3CausalModel = Phi3ForCasualLM.FromPretrained(pathToPhi3); +var phi3CausalModel = Phi3ForCausalLM.FromPretrained(pathToPhi3); var pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel); var agent = new Phi3MiniAgent(pipeline, name: "assistant"); @@ -59,7 +59,7 @@ If the model is deployed as a service, developers can consume the model similar // server.cs var pathToPhi3 = "path/to/phi3"; var tokenizer = LLama2Tokenizer.FromPretrained(pathToPhi3); -var phi3CausalModel = Phi3ForCasualLM.FromPretrained(pathToPhi3); +var phi3CausalModel = Phi3ForCausalLM.FromPretrained(pathToPhi3); var pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel); var agent = new Phi3MiniAgent(pipeline, name: "assistant"); diff --git a/docs/project-docs/components-and-dependencies.md b/docs/project-docs/components-and-dependencies.md new file mode 100644 index 0000000000..ce6b175010 --- /dev/null +++ b/docs/project-docs/components-and-dependencies.md @@ -0,0 +1,269 @@ +ML.NET is a modular set of libraries that enables building a pipeline from data loaders, trainers/estimators (in case of training), transformers (in case of inferencing), and various data structures to facilitate the building of pipelines and representing data. The core of ML.NET – the Microsoft.ML package has no external dependencies. It's largely managed code. + +Microsoft.ML does have a helper native math library CPUMath - which is only used on .NETFramework. .NET 6.0 and later have a managed implementation using intrinsics/TensorPrimitives and do not require the native build of CPUMath. + +Microsoft.ML contains one other native library, LDANative, which is used by the LatentDirichletAllocationEstimator/Transform to support the LightLDA algorithm. If this component is used it will require the LightLDA native library. The native library is built for linux-arm, linux-arm64, linux-x64, osx-arm64 (M1), osx-x64, win-arm64, win-x64, win-x86. This library has only platform/CRT dependencies. + +Some components that represent an algorithm or binding to another framework are factored into separate packages to allow opt-in to using those and their dependencies. + +ML.NET redistributes Intel MKL as Microsoft.ML.MKL.Redist in which is a minimized MKL library linked with just exports needed by ML.NET. This component follows the support matrix of Intel MKL and is only supported on x86 and x64 architectures: linux-x64, osx-x64 (no longer supported by Intel), win-x64, and win-x86. Similarly some components have light-up to use an Intel OneDAL implementation which is only supported on x64. + +| NuGet Package | Entry-Point Components | Native Dependencies | Status | Notes | +|-------------------------------------|----------------------------------------------------------------|------------------------------------------------|----------|--------------------------------------------------------------------------------------------| +| `Microsoft.ML` | `MLContext`, core transforms, trainers | None | Stable | | +| `Microsoft.Extensions.ML` | `PredictionEnginePool` | None | Stable | | +| `Microsoft.ML.AutoML` | `AutoCatalog` for AutoML | *As required by other components* | Preview | Support varies based on components used | +| `Microsoft.ML.CodeGenerator` | | None | Preview | Part of AutoML | +| `Microsoft.ML.CpuMath` | | Optional native | Stable | Internal implementation; only used on .NET Framework | +| `Microsoft.ML.DataView` | `IDataView` | None | Stable | | +| `Microsoft.ML.DnnImageFeaturizer.*` | | None | Preview | Data-only | +| `Microsoft.ML.Ensemble` | | None | Preview | Supports ML.NET component catalog | +| `Microsoft.ML.EntryPoints` | | None | Preview | Supports ML.NET component catalog | +| `Microsoft.ML.Experimental` | | None | Preview | Experimental API | +| `Microsoft.ML.FairLearn` | `FairlearnCatalog` | None | Preview | | +| `Microsoft.ML.FastTree` | `FastTreeRankingTrainer` | Optional native acceleration | Stable | Native library used on x86/x64; managed fallback | +| `Microsoft.ML.ImageAnalytics` | `MLImage` (image exchange type) | `libSkiaSharp` | Stable | Wrapper over SkiaSharp / Google Skia; supported where dependency is supported | +| `Microsoft.ML.LightGBM` | `LightGbm\*Trainer` | `LightGBM` | Stable | Wrapper over LightGBM; supported where dependency is supported | +| `Microsoft.ML.MKL.Components` | `SymbolicSgdLogisticRegressionBinaryTrainer` | Intel MKL | Stable | Only works where Intel MKL works | +| `Microsoft.ML.MKL.Redist` | Internal native Intel MKL | `libomp` | Stable | Not for direct reference; win-x86/x64 only | +| `Microsoft.ML.OneDal` | Internal native Intel OneDal | Intel OneDAL | Preview | Not for direct reference; x64 only | +| `Microsoft.ML.OnnxConverter` | Adds ONNX export support | `Microsoft.ML.OnnxRuntime` | Stable | Wrapper over ONNX Runtime; supports "bring your own" runtime | +| `Microsoft.ML.OnnxTransformer` | `OnnxCatalog` | `Microsoft.ML.OnnxRuntime` | Stable | Wrapper over ONNX Runtime; supports "bring your own" runtime | +| `Microsoft.ML.Parquet` | `ParquetLoader` | None | Preview | Uses managed Parquet.Net (port of Apache Parquet) | +| `Microsoft.ML.Recommender` | `MatrixFactorizationTrainer` | LIBMF (bundled) | Stable | Includes libmf built for all runtimes supported by ML.NET | +| `Microsoft.ML.TensorFlow` | `TensorFlowModel`, `Transformer`, `Estimator` | TensorFlow via `TensorFlow.NET` | Stable | Wrapper over TensorFlow; supports "bring your own" runtime | +| `Microsoft.ML.TimeSeries` | `ForecastingCatalog` | Intel MKL, `libomp` | Stable | Only works where Intel MKL works | +| `Microsoft.ML.TorchSharp` | `QATrainer`, `TextClassificationTrainer`, `SentenceSimilarityTrainer` | libTorch via `TorchSharp` | Preview | Wrapper over libTorch; supported where TorchSharp is supported | +| `Microsoft.ML.Vision` | `ImageClassificationTrainer` | TensorFlow | Stable | Depends on `Microsoft.ML.TensorFlow` for implementation | + + +Other packages: +| NuGet Package | Entry-Point Components | Native Dependencies | Status | Notes | +|----------------------------------|--------------------------------------------------|-----------------------------------------------|------------|---------| +| `Microsoft.Data.Analysis` | `DataFrame` | `Apache.Arrow` | Preview | | +| `Microsoft.ML.GenAI.*` | | | Preview | | +| `Microsoft.ML.Tokenizers.*` | `Tokenizer` | | Stable | | +| `Microsoft.ML.SampleUtils` | | | Preview | | + + +## Package Dependencies Diagram + +The following diagram shows the relationships between ML.NET packages and their external dependencies: + +```mermaid +%%{init: {'theme':'base', 'themeVariables': {'fontSize':'18px'}, 'flowchart': {'useMaxWidth': false, 'htmlLabels': true, 'curve': 'linear'}, 'securityLevel': 'loose'}}%% +graph TD + %% Core packages - arranged vertically at top + subgraph CorePackages["🔧 Core ML.NET Packages"] + direction TB + DataView["Microsoft.ML.DataView"] + Core["Microsoft.ML"] + Extensions["Microsoft.Extensions.ML"] + CpuMath["Microsoft.ML.CpuMath"] + end + + %% AutoML packages + subgraph AutoMLPackages["🤖 AutoML Packages"] + direction TB + AutoML["Microsoft.ML.AutoML"] + CodeGen["Microsoft.ML.CodeGenerator"] + Ensemble["Microsoft.ML.Ensemble"] + EntryPoints["Microsoft.ML.EntryPoints"] + FairLearn["Microsoft.ML.FairLearn"] + end + + %% Algorithm packages + subgraph AlgorithmPackages["⚙️ ML Algorithm Packages"] + direction TB + FastTree["Microsoft.ML.FastTree"] + LightGBM["Microsoft.ML.LightGBM"] + Recommender["Microsoft.ML.Recommender"] + TimeSeries["Microsoft.ML.TimeSeries"] + TorchSharp["Microsoft.ML.TorchSharp"] + end + + %% Image and vision packages + subgraph ImagePackages["🖼️ Image & Vision Packages"] + direction TB + ImageAnalytics["Microsoft.ML.ImageAnalytics"] + Vision["Microsoft.ML.Vision"] + DnnFeaturizerAlexNet["Microsoft.ML.DnnImageFeaturizer.AlexNet"] + DnnFeaturizerResNet18["Microsoft.ML.DnnImageFeaturizer.ResNet18"] + DnnFeaturizerResNet50["Microsoft.ML.DnnImageFeaturizer.ResNet50"] + DnnFeaturizerResNet101["Microsoft.ML.DnnImageFeaturizer.ResNet101"] + DnnFeaturizerModelRedist["Microsoft.ML.DnnImageFeaturizer.ModelRedist"] + end + + %% ONNX and TensorFlow packages + subgraph FrameworkPackages["🔗 Framework Integration Packages"] + direction TB + OnnxConverter["Microsoft.ML.OnnxConverter"] + OnnxTransformer["Microsoft.ML.OnnxTransformer"] + TensorFlow["Microsoft.ML.TensorFlow"] + end + + %% Intel MKL packages + subgraph IntelPackages["⚡ Intel MKL Packages"] + direction TB + MKLComponents["Microsoft.ML.MKL.Components"] + MKLRedist["Microsoft.ML.MKL.Redist"] + OneDal["Microsoft.ML.OneDal"] + end + + %% AI/GenAI packages + subgraph AIPackages["🧠 AI & GenAI Packages"] + direction TB + GenAICore["Microsoft.ML.GenAI.Core"] + GenAILLaMA["Microsoft.ML.GenAI.LLaMA"] + GenAIMistral["Microsoft.ML.GenAI.Mistral"] + GenAIPhi["Microsoft.ML.GenAI.Phi"] + AutoGenCore["AutoGen.Core"] + MSExtensionsAI["Microsoft.Extensions.AI.Abstractions"] + SemanticKernel["Microsoft.SemanticKernel.Abstractions"] + end + + %% Tokenizer packages + subgraph TokenizerPackages["📝 Tokenizer Packages"] + direction TB + Tokenizers["Microsoft.ML.Tokenizers"] + TokenizersGpt2["Microsoft.ML.Tokenizers.Data.Gpt2"] + TokenizersR50k["Microsoft.ML.Tokenizers.Data.R50kBase"] + TokenizersP50k["Microsoft.ML.Tokenizers.Data.P50kBase"] + TokenizersO200k["Microsoft.ML.Tokenizers.Data.O200kBase"] + TokenizersCl100k["Microsoft.ML.Tokenizers.Data.Cl100kBase"] + end + + %% Data packages + subgraph DataPackages["📊 Data Packages"] + direction TB + Parquet["Microsoft.ML.Parquet"] + DataAnalysis["Microsoft.Data.Analysis"] + end + + %% Other packages + subgraph OtherPackages["🔧 Other Packages"] + direction TB + Experimental["Microsoft.ML.Experimental"] + SampleUtils["Microsoft.ML.SampleUtils"] + end + + %% External dependencies - arranged vertically at bottom + subgraph ExternalDeps["🌐 External Dependencies"] + direction TB + SkiaSharp["SkiaSharp"] + LightGBMNative["LightGBM"] + OnnxRuntime["Microsoft.ML.OnnxRuntime"] + TensorFlowNET["TensorFlow.NET"] + TorchSharpLib["TorchSharp"] + ApacheArrow["Apache.Arrow"] + ParquetNet["Parquet.Net"] + GoogleProtobuf["Google.Protobuf"] + end + + %% Core dependencies + Core --> DataView + Core --> CpuMath + Extensions --> Core + + %% AutoML dependencies + AutoML --> Core + AutoML --> CpuMath + AutoML --> DnnFeaturizerAlexNet + AutoML --> DnnFeaturizerResNet18 + AutoML --> DnnFeaturizerResNet50 + AutoML --> DnnFeaturizerResNet101 + AutoML --> OnnxTransformer + AutoML --> TimeSeries + AutoML --> TorchSharp + AutoML --> Vision + AutoML --> ImageAnalytics + AutoML --> LightGBM + AutoML --> MKLComponents + AutoML --> Recommender + CodeGen --> AutoML + + %% Algorithm dependencies + FastTree --> Core + LightGBM --> Core + LightGBM --> FastTree + LightGBM --> LightGBMNative + Recommender --> Core + TimeSeries --> Core + TimeSeries --> MKLRedist + TorchSharp --> Core + TorchSharp --> ImageAnalytics + TorchSharp --> Tokenizers + TorchSharp --> TorchSharpLib + + %% Image and vision dependencies + ImageAnalytics --> Core + ImageAnalytics --> SkiaSharp + Vision --> Core + Vision --> TensorFlow + + %% Framework dependencies + OnnxConverter --> Core + OnnxTransformer --> Core + OnnxTransformer --> OnnxRuntime + OnnxTransformer --> GoogleProtobuf + TensorFlow --> Core + TensorFlow --> ImageAnalytics + TensorFlow --> TensorFlowNET + + %% Intel MKL dependencies + MKLComponents --> Core + MKLComponents --> MKLRedist + MKLComponents --> OneDal + + %% Other package dependencies + Ensemble --> Core + EntryPoints --> Core + Experimental --> Core + FairLearn --> Core + FairLearn --> DataAnalysis + FairLearn --> AutoML + Parquet --> Core + Parquet --> ParquetNet + DataAnalysis --> ApacheArrow + + %% GenAI dependencies + GenAICore --> TorchSharpLib + GenAICore --> AutoGenCore + GenAICore --> MSExtensionsAI + GenAICore --> SemanticKernel + GenAILLaMA --> GenAICore + GenAILLaMA --> TorchSharpLib + GenAIMistral --> GenAICore + GenAIPhi --> GenAICore + + %% DNN Image Featurizer dependencies + DnnFeaturizerAlexNet --> OnnxTransformer + DnnFeaturizerAlexNet --> DnnFeaturizerModelRedist + DnnFeaturizerResNet18 --> OnnxTransformer + DnnFeaturizerResNet18 --> DnnFeaturizerModelRedist + DnnFeaturizerResNet50 --> OnnxTransformer + DnnFeaturizerResNet50 --> DnnFeaturizerModelRedist + DnnFeaturizerResNet101 --> OnnxTransformer + DnnFeaturizerResNet101 --> DnnFeaturizerModelRedist + + %% Tokenizer dependencies + Tokenizers --> GoogleProtobuf + TokenizersGpt2 --> Tokenizers + TokenizersR50k --> Tokenizers + TokenizersP50k --> Tokenizers + TokenizersO200k --> Tokenizers + TokenizersCl100k --> Tokenizers + + %% Styling for readability and larger text + classDef external fill:#ffebcd,stroke:#d2691e,stroke-width:4px,font-size:18px,font-weight:bold + classDef core fill:#e6f3ff,stroke:#0066cc,stroke-width:4px,font-size:18px,font-weight:bold + classDef algorithm fill:#f0f8e6,stroke:#228b22,stroke-width:4px,font-size:18px,font-weight:bold + classDef bundled fill:#ffefd5,stroke:#ff8c00,stroke-width:4px,font-size:18px,font-weight:bold + classDef subgraphStyle fill:#f9f9f9,stroke:#333,stroke-width:3px,font-size:20px,font-weight:bold + + class SkiaSharp,LightGBMNative,OnnxRuntime,TensorFlowNET,TorchSharpLib,ApacheArrow,ParquetNet,GoogleProtobuf,AutoGenCore,MSExtensionsAI,SemanticKernel external + class DataView,Core,Extensions core + class AutoML,CodeGen,FastTree,LightGBM,Recommender,TimeSeries,TorchSharp,ImageAnalytics,DnnFeaturizerAlexNet,DnnFeaturizerResNet18,DnnFeaturizerResNet50,DnnFeaturizerResNet101,DnnFeaturizerModelRedist,Vision,OnnxConverter,OnnxTransformer,TensorFlow,MKLComponents,Ensemble,EntryPoints,Experimental,FairLearn,Parquet,DataAnalysis,GenAICore,GenAILLaMA,GenAIMistral,GenAIPhi,Tokenizers,TokenizersGpt2,TokenizersR50k,TokenizersP50k,TokenizersO200k,TokenizersCl100k,SampleUtils algorithm + class CpuMath,MKLRedist,OneDal bundled +``` + diff --git a/docs/release-notes/4.0.3/release-4.0.3.md b/docs/release-notes/4.0.3/release-4.0.3.md new file mode 100644 index 0000000000..fe4d0e4a12 --- /dev/null +++ b/docs/release-notes/4.0.3/release-4.0.3.md @@ -0,0 +1,21 @@ +# [ML.NET](http://dot.net/ml) 4.0.3 + +## **Bug Fixes** +- **[release/4.0] Improve unique directory generation for temp files** ([#7528](https://github.com/dotnet/machinelearning/pull/7528)) + - Compatibility note: This change resolves a performance problem where past versions of ML.NET would leave behind folders with the pattern `ml_dotnet\d+` in the temp directory, which would cause model opening performance to degrade. This fixes the problem. You may also wish to delete these empty folders once after updating. + + Using powershell: + ```powershell + Get-ChildItem "$env:TEMP" -Directory -Filter "ml_dotnet*" | Remove-Item -Recurse -Force + ``` + + Using Bash: + ```bash + find "$TEMP" -type d -name "ml_dotnet*" -exec rm -rf {} + + ``` + + + +## **Build / Test updates** +- **[release/4.0] Update dependencies from dotnet/arcade** ([#7470](https://github.com/dotnet/machinelearning/pull/7470)) +- **[release/4.0] Use arcade script for installing MacOS dependencies** ([#7534](https://github.com/dotnet/machinelearning/pull/7534)) diff --git a/docs/release-notes/5.0.0/release-5.0.0.md b/docs/release-notes/5.0.0/release-5.0.0.md new file mode 100644 index 0000000000..b7a4dfc94f --- /dev/null +++ b/docs/release-notes/5.0.0/release-5.0.0.md @@ -0,0 +1,106 @@ +# [ML.NET](http://dot.net/ml) 5.0.0 + +## **New Features** +- **[GenAI] Introduce CausalLMPipelineChatClient for MEAI.IChatClient** ([#7270](https://github.com/dotnet/machinelearning/pull/7270)) +- **Introducing SentencePiece Unigram Tokenizer Model** ([#7390](https://github.com/dotnet/machinelearning/pull/7390)) +- **Phi-4 Tokenizer Support** ([#7396](https://github.com/dotnet/machinelearning/pull/7396)) +- **Support O3 OpenAI model mapping** ([#7394](https://github.com/dotnet/machinelearning/pull/7394)) +- **Support ByteLevel encoding in Bpe tokenizer to support DeepSeek model** ([#7425](https://github.com/dotnet/machinelearning/pull/7425)) +- **Support Tiktoken Gpt-4.1 Model** ([#7453](https://github.com/dotnet/machinelearning/pull/7453)) +- **Support OpenAI OSS Models with Tiktoken tokenizer** ([#7494](https://github.com/dotnet/machinelearning/pull/7494)) +- **Add deterministic option for LightGBM** ([#7415](https://github.com/dotnet/machinelearning/pull/7415)) +- **Added NumberOfLeaves to FastForestRegression and FastForestOva options** ([#7499](https://github.com/dotnet/machinelearning/pull/7499)) - Thanks @JoshuaSloan! + +## **Enhancements** +- **Add Timeout to Regex used in the tokenizers** ([#7284](https://github.com/dotnet/machinelearning/pull/7284)) +- **Final tokenizer's cleanup** ([#7291](https://github.com/dotnet/machinelearning/pull/7291)) +- **Update System.Numerics.Tensors version** ([#7322](https://github.com/dotnet/machinelearning/pull/7322)) - Thanks @asmirnov82! +- **[GenAI] SFT Example** ([#7316](https://github.com/dotnet/machinelearning/pull/7316)) +- **Update M.E.AI version used by Microsoft.ML.GenAI.Core** ([#7329](https://github.com/dotnet/machinelearning/pull/7329)) +- **Update DependencyModel** ([#7338](https://github.com/dotnet/machinelearning/pull/7338)) +- **Some tweaks to the Microsoft.ML.Tokenizers PACKAGE.md** ([#7360](https://github.com/dotnet/machinelearning/pull/7360)) +- **Consolidate System.Numerics.Tensors dependency** ([#7356](https://github.com/dotnet/machinelearning/pull/7356)) - Thanks @asmirnov82! +- **Update Microsoft.Extensions.AI to 9.3.0-preview.1.25114.11** ([#7388](https://github.com/dotnet/machinelearning/pull/7388)) +- **Create SentencePieceTokenizer from options object** ([#7403](https://github.com/dotnet/machinelearning/pull/7403)) +- **Unigram tokenizer fixes** ([#7409](https://github.com/dotnet/machinelearning/pull/7409)) +- **Update to M.E.AI 9.3.0-preview.1.25161.3** ([#7414](https://github.com/dotnet/machinelearning/pull/7414)) +- **Reduce usage of unsafe constructs throughout codebase** ([#7426](https://github.com/dotnet/machinelearning/pull/7426)) - Thanks @GrabYourPitchforks! +- **Cleanup SentencePiece tokenizer** ([#7427](https://github.com/dotnet/machinelearning/pull/7427)) +- **Update to M.E.AI 9.4.0-preview.1.25207.5** ([#7439](https://github.com/dotnet/machinelearning/pull/7439)) +- **Update to M.E.AI 9.4.3-preview.1.25230.7** ([#7459](https://github.com/dotnet/machinelearning/pull/7459)) +- **Update to stable Microsoft.Extensions.AI.Abstractions** ([#7466](https://github.com/dotnet/machinelearning/pull/7466)) +- **Convert repository to NuGet Central Package Management** ([#7482](https://github.com/dotnet/machinelearning/pull/7482)) +- **Rename Casual to Causal** ([#7484](https://github.com/dotnet/machinelearning/pull/7484)) - Thanks @feiyun0112! +- **Updated Tensorflow.Net to 0.70.2 with Tensorflow 2.7.0.** ([#7472](https://github.com/dotnet/machinelearning/pull/7472)) - Thanks @Crichen! +- **Mark internal classes as internal** ([#7511](https://github.com/dotnet/machinelearning/pull/7511)) +- **Address the design review feedback** ([#7513](https://github.com/dotnet/machinelearning/pull/7513)) +- **BpeTokenizer Cleanup** ([#7514](https://github.com/dotnet/machinelearning/pull/7514)) +- **Improve native build and mark our official build as CFS Clean** ([#7516](https://github.com/dotnet/machinelearning/pull/7516)) +- **Improve unique directory generation for temp files** ([#7520](https://github.com/dotnet/machinelearning/pull/7520)) +- **Updating OnnxRuntime** ([#7469](https://github.com/dotnet/machinelearning/pull/7469)) + +## **Bug Fixes** +- **Fix broken inheritance from DataFrameColumn class** ([#7324](https://github.com/dotnet/machinelearning/pull/7324)) - Thanks @asmirnov82! +- **Moved SpecialTokens assignment after the modification to avoid "Collection Modified" error** ([#7328](https://github.com/dotnet/machinelearning/pull/7328)) - Thanks @shaltielshmid! +- **Fix DateTime export to csv with culture info** ([#7358](https://github.com/dotnet/machinelearning/pull/7358)) - Thanks @asmirnov82! +- **Increase cancelling waiting time for AutoMLExperiment_return_current_best_trial_when_ct_is_canceled_with_trial_completed_Async** ([#7424](https://github.com/dotnet/machinelearning/pull/7424)) +- **Fixed light gbm update** ([#7431](https://github.com/dotnet/machinelearning/pull/7431)) +- **Fix incorrect IntPtr null check in FftUtils** ([#7434](https://github.com/dotnet/machinelearning/pull/7434)) - Thanks @GrabYourPitchforks! +- **ImageClassificationTrainer PredictedLabelColumnName bug when the name is not default** ([#7458](https://github.com/dotnet/machinelearning/pull/7458)) - Thanks @feiyun0112! +- **Fix ElementwiseGreaterThanOrEqual to use >= instead of ==** ([#7475](https://github.com/dotnet/machinelearning/pull/7475)) - Thanks @vsarakhan! +- **Fix minor typo in BinFinder.cs** ([#7496](https://github.com/dotnet/machinelearning/pull/7496)) - Thanks @KM5075! +- **Fix PositiveRecall optimization in AutoMLExperiment** ([#7493](https://github.com/dotnet/machinelearning/pull/7493)) - Thanks @JoshuaSloan! + +## **Build / Test updates** +- **Add the components governance file `cgmanifest.json` for tokenizer's vocab files** ([#7283](https://github.com/dotnet/machinelearning/pull/7283)) +- **Update To MacOS 13** ([#7285](https://github.com/dotnet/machinelearning/pull/7285)) +- **Updated remote executor** ([#7295](https://github.com/dotnet/machinelearning/pull/7295)) +- **Fixing native lookup** ([#7282](https://github.com/dotnet/machinelearning/pull/7282)) +- **Update dependencies from maintenance-packages to latest versions** ([#7301](https://github.com/dotnet/machinelearning/pull/7301)) +- **Maintenance package version updates.** ([#7304](https://github.com/dotnet/machinelearning/pull/7304)) +- **Fixing tokenizers version** ([#7309](https://github.com/dotnet/machinelearning/pull/7309)) +- **Update version for 5.0** ([#7311](https://github.com/dotnet/machinelearning/pull/7311)) +- **Update dynamic loading report reference** ([#7321](https://github.com/dotnet/machinelearning/pull/7321)) - Thanks @emmanuel-ferdman! +- **Net8 tests** ([#7319](https://github.com/dotnet/machinelearning/pull/7319)) +- **[main] Update dependencies from dotnet/arcade** ([#7266](https://github.com/dotnet/machinelearning/pull/7266)) +- **[main] Update dependencies from dotnet/arcade** ([#7352](https://github.com/dotnet/machinelearning/pull/7352)) +- **Update MSTest to latest** ([#7349](https://github.com/dotnet/machinelearning/pull/7349)) - Thanks @Youssef1313! +- **[main] Update dependencies from dotnet/arcade** ([#7368](https://github.com/dotnet/machinelearning/pull/7368)) +- **[main] Update dependencies from dotnet/arcade** ([#7374](https://github.com/dotnet/machinelearning/pull/7374)) +- **[main] Update dependencies from dotnet/arcade** ([#7376](https://github.com/dotnet/machinelearning/pull/7376)) +- **[main] Update dependencies from dotnet/arcade** ([#7382](https://github.com/dotnet/machinelearning/pull/7382)) +- **[main] Update dependencies from dotnet/arcade** ([#7387](https://github.com/dotnet/machinelearning/pull/7387)) +- **Update Helix ubuntu arm32 container** ([#7410](https://github.com/dotnet/machinelearning/pull/7410)) +- **Update dependencies from maintenance-packages** ([#7412](https://github.com/dotnet/machinelearning/pull/7412)) +- **[main] Update dependencies from dotnet/arcade** ([#7397](https://github.com/dotnet/machinelearning/pull/7397)) +- **Switch to AwesomeAssertions** ([#7421](https://github.com/dotnet/machinelearning/pull/7421)) +- **Update maintenance-dependencies** ([#7433](https://github.com/dotnet/machinelearning/pull/7433)) +- **update cmake mac** ([#7443](https://github.com/dotnet/machinelearning/pull/7443)) +- **[main] Update dependencies from dotnet/arcade** ([#7423](https://github.com/dotnet/machinelearning/pull/7423)) +- **[main] Update dependencies from dotnet/arcade** ([#7455](https://github.com/dotnet/machinelearning/pull/7455)) +- **Dependency version updates** ([#7457](https://github.com/dotnet/machinelearning/pull/7457)) +- **[main] Update dependencies from dotnet/arcade** ([#7463](https://github.com/dotnet/machinelearning/pull/7463)) +- **Create copilot-setup-steps.yml** ([#7478](https://github.com/dotnet/machinelearning/pull/7478)) +- **Add copilot-setup-steps.yml** ([#7481](https://github.com/dotnet/machinelearning/pull/7481)) +- **Enable dependabot.** ([#7486](https://github.com/dotnet/machinelearning/pull/7486)) +- **macOS x64 CI: fix dependency install and OpenMP runtime copy (use Homebrew libomp, adjust Helix payload)** ([#7510](https://github.com/dotnet/machinelearning/pull/7510)) - Thanks @asp2286! +- **Initialize es-metadata.yml for inventory** ([#7504](https://github.com/dotnet/machinelearning/pull/7504)) +- **Update Windows image, fix mac build** ([#7515](https://github.com/dotnet/machinelearning/pull/7515)) +- **[main] Update dependencies from dotnet/arcade** ([#7473](https://github.com/dotnet/machinelearning/pull/7473)) +- **[main] Update dependencies from dotnet/arcade** ([#7519](https://github.com/dotnet/machinelearning/pull/7519)) +- **Remove baselines** ([#7526](https://github.com/dotnet/machinelearning/pull/7526)) +- **[main] Update dependencies from dotnet/arcade** ([#7521](https://github.com/dotnet/machinelearning/pull/7521)) +- **Use arcade script for installing MacOS dependencies** ([#7533](https://github.com/dotnet/machinelearning/pull/7533)) +- **[main] Update dependencies from dotnet/arcade** ([#7532](https://github.com/dotnet/machinelearning/pull/7532)) + +## **Documentation Updates** +- **4.0 release notes** ([#7302](https://github.com/dotnet/machinelearning/pull/7302)) +- **Fix up docs for MLContext** ([#7334](https://github.com/dotnet/machinelearning/pull/7334)) - Thanks @gewarren! +- **Added in 5.0 preview 1 release notes** ([#7400](https://github.com/dotnet/machinelearning/pull/7400)) +- **[main] Added 4.0.2 servicing release notes** ([#7401](https://github.com/dotnet/machinelearning/pull/7401)) +- **Updated preview release notes.** ([#7405](https://github.com/dotnet/machinelearning/pull/7405)) +- **Update Tokenizer conceptual doc link in package docs** ([#7445](https://github.com/dotnet/machinelearning/pull/7445)) +- **Random doc updates** ([#7476](https://github.com/dotnet/machinelearning/pull/7476)) - Thanks @gewarren! +- **Add release notes for 4.0.3** ([#7530](https://github.com/dotnet/machinelearning/pull/7530)) +- **Update release-4.0.3.md** ([#7535](https://github.com/dotnet/machinelearning/pull/7535)) +- **Add a doc with information about components and dependencies** ([#7537](https://github.com/dotnet/machinelearning/pull/7537)) \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.AutoML.Samples/Microsoft.ML.AutoML.Samples.csproj b/docs/samples/Microsoft.ML.AutoML.Samples/Microsoft.ML.AutoML.Samples.csproj index 68d7c0e8cd..e6b715f32d 100644 --- a/docs/samples/Microsoft.ML.AutoML.Samples/Microsoft.ML.AutoML.Samples.csproj +++ b/docs/samples/Microsoft.ML.AutoML.Samples/Microsoft.ML.AutoML.Samples.csproj @@ -21,7 +21,7 @@ - + diff --git a/docs/samples/Microsoft.ML.GenAI.Samples/Llama/SFT_Llama_3_2_1B.cs b/docs/samples/Microsoft.ML.GenAI.Samples/Llama/SFT_Llama_3_2_1B.cs index 98f4ae71ef..33443496a4 100644 --- a/docs/samples/Microsoft.ML.GenAI.Samples/Llama/SFT_Llama_3_2_1B.cs +++ b/docs/samples/Microsoft.ML.GenAI.Samples/Llama/SFT_Llama_3_2_1B.cs @@ -25,7 +25,7 @@ public static async Task Train(string weightFolder, string checkPointName = "mod using var loggerFactory = LoggerFactory.Create(builder => builder.AddConsole()); // create logger - var logger = loggerFactory.CreateLogger(); + var logger = loggerFactory.CreateLogger(); var device = "cuda"; @@ -46,10 +46,10 @@ public static async Task Train(string weightFolder, string checkPointName = "mod var input = CreateDataset(dataset, pipeline.TypedTokenizer, Llama3_1ChatTemplateBuilder.Instance); // create trainer - var sftTrainer = new CasualLMSupervisedFineTuningTrainer(pipeline, logger: logger); + var sftTrainer = new CausalLMSupervisedFineTuningTrainer(pipeline, logger: logger); // Train the model - var option = new CasualLMSupervisedFineTuningTrainer.Option + var option = new CausalLMSupervisedFineTuningTrainer.Option { BatchSize = 1, Device = device, diff --git a/docs/samples/Microsoft.ML.GenAI.Samples/MEAI/Phi3.cs b/docs/samples/Microsoft.ML.GenAI.Samples/MEAI/Phi3.cs index a10c01dfb3..e1db7c6d95 100644 --- a/docs/samples/Microsoft.ML.GenAI.Samples/MEAI/Phi3.cs +++ b/docs/samples/Microsoft.ML.GenAI.Samples/MEAI/Phi3.cs @@ -27,8 +27,8 @@ public static async Task RunAsync(string weightFolder) torch.set_default_dtype(defaultType); var tokenizerPath = Path.Combine(weightFolder, "tokenizer.model"); var tokenizer = Phi3TokenizerHelper.FromPretrained(tokenizerPath); - var model = Phi3ForCasualLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true); - var pipeline = new CausalLMPipeline(tokenizer, model, device); + var model = Phi3ForCausalLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true); + var pipeline = new CausalLMPipeline(tokenizer, model, device); var client = new Phi3CausalLMChatClient(pipeline); var task = """ diff --git a/docs/samples/Microsoft.ML.GenAI.Samples/Microsoft.ML.GenAI.Samples.csproj b/docs/samples/Microsoft.ML.GenAI.Samples/Microsoft.ML.GenAI.Samples.csproj index c8cee633ac..496a3bac33 100644 --- a/docs/samples/Microsoft.ML.GenAI.Samples/Microsoft.ML.GenAI.Samples.csproj +++ b/docs/samples/Microsoft.ML.GenAI.Samples/Microsoft.ML.GenAI.Samples.csproj @@ -16,10 +16,11 @@ - - - - + + + + + diff --git a/docs/samples/Microsoft.ML.GenAI.Samples/Phi3Mini/AutoGenSample.cs b/docs/samples/Microsoft.ML.GenAI.Samples/Phi3Mini/AutoGenSample.cs index 20f2dd4418..76b641a532 100644 --- a/docs/samples/Microsoft.ML.GenAI.Samples/Phi3Mini/AutoGenSample.cs +++ b/docs/samples/Microsoft.ML.GenAI.Samples/Phi3Mini/AutoGenSample.cs @@ -29,8 +29,8 @@ public static async Task RunAsync() var weightFolder = @"C:\Users\xiaoyuz\source\repos\Phi-3-mini-4k-instruct"; var tokenizerPath = Path.Combine(weightFolder, "tokenizer.model"); var tokenizer = Phi3TokenizerHelper.FromPretrained(tokenizerPath); - var model = Phi3ForCasualLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true); - var pipeline = new CausalLMPipeline(tokenizer, model, device); + var model = Phi3ForCausalLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true); + var pipeline = new CausalLMPipeline(tokenizer, model, device); var question = @"write a C# program to calculate the factorial of a number"; // agent diff --git a/docs/samples/Microsoft.ML.GenAI.Samples/Phi3Mini/SemanticKernelSample.cs b/docs/samples/Microsoft.ML.GenAI.Samples/Phi3Mini/SemanticKernelSample.cs index 8ba882618b..6684627b58 100644 --- a/docs/samples/Microsoft.ML.GenAI.Samples/Phi3Mini/SemanticKernelSample.cs +++ b/docs/samples/Microsoft.ML.GenAI.Samples/Phi3Mini/SemanticKernelSample.cs @@ -25,8 +25,8 @@ public static async Task RunChatCompletionSample() var weightFolder = @"C:\Users\xiaoyuz\source\repos\Phi-3-mini-4k-instruct"; var tokenizerPath = Path.Combine(weightFolder, "tokenizer.model"); var tokenizer = Phi3TokenizerHelper.FromPretrained(tokenizerPath); - var model = Phi3ForCasualLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true); - var pipeline = new CausalLMPipeline(tokenizer, model, device); + var model = Phi3ForCausalLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true); + var pipeline = new CausalLMPipeline(tokenizer, model, device); var kernel = Kernel.CreateBuilder() .AddGenAIChatCompletion(pipeline) @@ -56,8 +56,8 @@ public static async Task RunTextGenerationSample() var weightFolder = @"C:\Users\xiaoyuz\source\repos\Phi-3-mini-4k-instruct"; var tokenizerPath = Path.Combine(weightFolder, "tokenizer.model"); var tokenizer = Phi3TokenizerHelper.FromPretrained(tokenizerPath); - var model = Phi3ForCasualLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true); - var pipeline = new CausalLMPipeline(tokenizer, model, device); + var model = Phi3ForCausalLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true); + var pipeline = new CausalLMPipeline(tokenizer, model, device); var kernel = Kernel.CreateBuilder() .AddGenAITextGeneration(pipeline) diff --git a/docs/samples/Microsoft.ML.Samples.GPU/Microsoft.ML.Samples.GPU.csproj b/docs/samples/Microsoft.ML.Samples.GPU/Microsoft.ML.Samples.GPU.csproj index 6f33d4ea53..82fec48f2b 100644 --- a/docs/samples/Microsoft.ML.Samples.GPU/Microsoft.ML.Samples.GPU.csproj +++ b/docs/samples/Microsoft.ML.Samples.GPU/Microsoft.ML.Samples.GPU.csproj @@ -44,31 +44,36 @@ - + - + - + - + + + + + + DnnImageModels\ResNet18Onnx\ResNet18.onnx PreserveNewest - + DnnImageModels\ResNetPrepOnnx\ResNetPreprocess.onnx PreserveNewest - - - + + + diff --git a/docs/samples/Microsoft.ML.Samples.OneDal/Microsoft.ML.Samples.OneDal.csproj b/docs/samples/Microsoft.ML.Samples.OneDal/Microsoft.ML.Samples.OneDal.csproj index 36a5d25034..17cd5891c7 100755 --- a/docs/samples/Microsoft.ML.Samples.OneDal/Microsoft.ML.Samples.OneDal.csproj +++ b/docs/samples/Microsoft.ML.Samples.OneDal/Microsoft.ML.Samples.OneDal.csproj @@ -36,7 +36,7 @@ ../machinelearning/artifacts/bin/Microsoft.ML.OneDal/Debug/netstandard2.0/Microsoft.ML.OneDal.dll - + --> @@ -48,7 +48,7 @@ - + diff --git a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj index 7ea6aef2f0..f758f2ce56 100644 --- a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj +++ b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj @@ -974,9 +974,10 @@ - - - + + + + diff --git a/eng/Version.Details.xml b/eng/Version.Details.xml index 8449322dda..f4c5993073 100644 --- a/eng/Version.Details.xml +++ b/eng/Version.Details.xml @@ -7,38 +7,38 @@ - + https://github.com/dotnet/arcade - 5fb72aaffeff9c6f2ce46d3b226a84772fb72f55 + 904bfd153de2a88471c00a7cdd3450948e758db8 - + https://github.com/dotnet/arcade - 5fb72aaffeff9c6f2ce46d3b226a84772fb72f55 + 904bfd153de2a88471c00a7cdd3450948e758db8 - + https://github.com/dotnet/arcade - 5fb72aaffeff9c6f2ce46d3b226a84772fb72f55 + 904bfd153de2a88471c00a7cdd3450948e758db8 - + https://github.com/dotnet/arcade - 5fb72aaffeff9c6f2ce46d3b226a84772fb72f55 + 904bfd153de2a88471c00a7cdd3450948e758db8 - + https://github.com/dotnet/arcade - 5fb72aaffeff9c6f2ce46d3b226a84772fb72f55 + 904bfd153de2a88471c00a7cdd3450948e758db8 - + https://github.com/dotnet/arcade - 5fb72aaffeff9c6f2ce46d3b226a84772fb72f55 + 904bfd153de2a88471c00a7cdd3450948e758db8 - + https://github.com/dotnet/arcade - 5fb72aaffeff9c6f2ce46d3b226a84772fb72f55 + 904bfd153de2a88471c00a7cdd3450948e758db8 diff --git a/eng/Versions.props b/eng/Versions.props index 0ead156c83..eb42e44d38 100644 --- a/eng/Versions.props +++ b/eng/Versions.props @@ -12,40 +12,43 @@ preview 1.0.0.0 - 8.0.0 - 8.0.0 - 8.0.1 - 8.0.0 - 8.0.7 - 8.0.2 + 9.0.4 + 9.0.4 + 9.0.4 + 9.0.4 + 9.0.4 + 9.0.4 6.9.1 2.88.8 - 8.0.0 - 8.0.0 - 6.0.1 - 8.0.1 + 9.0.4 + 9.0.4 + 9.0.4 + 9.0.4 5.0.0 - 9.0.0 + 9.0.4 + 9.0.4 4.7.0 - 4.3.0 + 4.7.0 5.0.0 - 8.0.0 - 8.0.5 - 8.0.0 + 9.0.4 + 9.0.4 + 9.0.4 14.0.2 - 3.27.1 + 3.30.2 4.6.0 + 5.0.1 + 7.3.4 6.0.0 - 9.0.0 - 3.3.4 - 4.9.2 - 1.0.0-beta.24375.2 - 1.18.1 + 9.0.4 + 3.11.0 + 4.13.0 + 1.0.0-beta.25177.1 + 1.23.2 0.0.0.12 - 9.4.0-preview.1.25207.5 + 9.5.0 - 4.4.0 + 4.7.0 @@ -58,41 +61,45 @@ 2021.7.1.14939 1 1 - 3.7.0 - 3.7.0 + 3.8.3 + 3.8.3 13.0.3 2.1.3 0.11.1 1.4.2 - 0.20.1 + + 0.100.2 2 - 2.3.1 + 2.16.0 + 2.10.3 + 2.11.1 1.4.1 - 0.1.0 - 1.15.0 + 0.2.3 + 1.48.0 0.102.7 2.2.1.1 1.12.4 - 6.0.2 + 6.0.4 6.0.0 - 3.3.1 + 4.13.0 4.7.0 4.3.6 - 8.0.0 + 9.0.4 6.0.0 4.20.70 0.13.12 - 6.0.26 - 8.0.1 - 8.0.2 - 1.1.2-beta1.23431.1 + 6.0.36 + 8.0.16 + 8.1.0 + 1.1.2 9.0.0-beta.24212.4 5.0.0-preview.5.20278.1 8.0.0-beta.24525.2 - 10.0.0-beta.25225.4 - 8.0.2 + 11.0.0-beta.25524.1 + 9.0.4 0.0.6-test 0.0.13-test 0.0.6-test diff --git a/eng/common/SetupNugetSources.ps1 b/eng/common/SetupNugetSources.ps1 index 5db4ad71ee..fc8d618014 100644 --- a/eng/common/SetupNugetSources.ps1 +++ b/eng/common/SetupNugetSources.ps1 @@ -7,11 +7,11 @@ # See example call for this script below. # # - task: PowerShell@2 -# displayName: Setup Private Feeds Credentials +# displayName: Setup internal Feeds Credentials # condition: eq(variables['Agent.OS'], 'Windows_NT') # inputs: -# filePath: $(Build.SourcesDirectory)/eng/common/SetupNugetSources.ps1 -# arguments: -ConfigFile $(Build.SourcesDirectory)/NuGet.config -Password $Env:Token +# filePath: $(System.DefaultWorkingDirectory)/eng/common/SetupNugetSources.ps1 +# arguments: -ConfigFile $(System.DefaultWorkingDirectory)/NuGet.config -Password $Env:Token # env: # Token: $(dn-bot-dnceng-artifact-feeds-rw) # @@ -34,19 +34,28 @@ Set-StrictMode -Version 2.0 . $PSScriptRoot\tools.ps1 +# Adds or enables the package source with the given name +function AddOrEnablePackageSource($sources, $disabledPackageSources, $SourceName, $SourceEndPoint, $creds, $Username, $pwd) { + if ($disabledPackageSources -eq $null -or -not (EnableInternalPackageSource -DisabledPackageSources $disabledPackageSources -Creds $creds -PackageSourceName $SourceName)) { + AddPackageSource -Sources $sources -SourceName $SourceName -SourceEndPoint $SourceEndPoint -Creds $creds -Username $userName -pwd $Password + } +} + # Add source entry to PackageSources function AddPackageSource($sources, $SourceName, $SourceEndPoint, $creds, $Username, $pwd) { $packageSource = $sources.SelectSingleNode("add[@key='$SourceName']") if ($packageSource -eq $null) { + Write-Host "Adding package source $SourceName" + $packageSource = $doc.CreateElement("add") $packageSource.SetAttribute("key", $SourceName) $packageSource.SetAttribute("value", $SourceEndPoint) $sources.AppendChild($packageSource) | Out-Null } else { - Write-Host "Package source $SourceName already present." + Write-Host "Package source $SourceName already present and enabled." } AddCredential -Creds $creds -Source $SourceName -Username $Username -pwd $pwd @@ -59,6 +68,8 @@ function AddCredential($creds, $source, $username, $pwd) { return; } + Write-Host "Inserting credential for feed: " $source + # Looks for credential configuration for the given SourceName. Create it if none is found. $sourceElement = $creds.SelectSingleNode($Source) if ($sourceElement -eq $null) @@ -91,24 +102,27 @@ function AddCredential($creds, $source, $username, $pwd) { $passwordElement.SetAttribute("value", $pwd) } -function InsertMaestroPrivateFeedCredentials($Sources, $Creds, $Username, $pwd) { - $maestroPrivateSources = $Sources.SelectNodes("add[contains(@key,'darc-int')]") - - Write-Host "Inserting credentials for $($maestroPrivateSources.Count) Maestro's private feeds." - - ForEach ($PackageSource in $maestroPrivateSources) { - Write-Host "`tInserting credential for Maestro's feed:" $PackageSource.Key - AddCredential -Creds $creds -Source $PackageSource.Key -Username $Username -pwd $pwd +# Enable all darc-int package sources. +function EnableMaestroInternalPackageSources($DisabledPackageSources, $Creds) { + $maestroInternalSources = $DisabledPackageSources.SelectNodes("add[contains(@key,'darc-int')]") + ForEach ($DisabledPackageSource in $maestroInternalSources) { + EnableInternalPackageSource -DisabledPackageSources $DisabledPackageSources -Creds $Creds -PackageSourceName $DisabledPackageSource.key } } -function EnablePrivatePackageSources($DisabledPackageSources) { - $maestroPrivateSources = $DisabledPackageSources.SelectNodes("add[contains(@key,'darc-int')]") - ForEach ($DisabledPackageSource in $maestroPrivateSources) { - Write-Host "`tEnsuring private source '$($DisabledPackageSource.key)' is enabled by deleting it from disabledPackageSource" +# Enables an internal package source by name, if found. Returns true if the package source was found and enabled, false otherwise. +function EnableInternalPackageSource($DisabledPackageSources, $Creds, $PackageSourceName) { + $DisabledPackageSource = $DisabledPackageSources.SelectSingleNode("add[@key='$PackageSourceName']") + if ($DisabledPackageSource) { + Write-Host "Enabling internal source '$($DisabledPackageSource.key)'." + # Due to https://github.com/NuGet/Home/issues/10291, we must actually remove the disabled entries $DisabledPackageSources.RemoveChild($DisabledPackageSource) + + AddCredential -Creds $creds -Source $DisabledPackageSource.Key -Username $userName -pwd $Password + return $true } + return $false } if (!(Test-Path $ConfigFile -PathType Leaf)) { @@ -121,15 +135,17 @@ $doc = New-Object System.Xml.XmlDocument $filename = (Get-Item $ConfigFile).FullName $doc.Load($filename) -# Get reference to or create one if none exist already +# Get reference to - fail if none exist $sources = $doc.DocumentElement.SelectSingleNode("packageSources") if ($sources -eq $null) { - $sources = $doc.CreateElement("packageSources") - $doc.DocumentElement.AppendChild($sources) | Out-Null + Write-PipelineTelemetryError -Category 'Build' -Message "Eng/common/SetupNugetSources.ps1 returned a non-zero exit code. NuGet config file must contain a packageSources section: $ConfigFile" + ExitWithExitCode 1 } $creds = $null +$feedSuffix = "v3/index.json" if ($Password) { + $feedSuffix = "v2" # Looks for a node. Create it if none is found. $creds = $doc.DocumentElement.SelectSingleNode("packageSourceCredentials") if ($creds -eq $null) { @@ -138,33 +154,22 @@ if ($Password) { } } +$userName = "dn-bot" + # Check for disabledPackageSources; we'll enable any darc-int ones we find there $disabledSources = $doc.DocumentElement.SelectSingleNode("disabledPackageSources") if ($disabledSources -ne $null) { Write-Host "Checking for any darc-int disabled package sources in the disabledPackageSources node" - EnablePrivatePackageSources -DisabledPackageSources $disabledSources -} - -$userName = "dn-bot" - -# Insert credential nodes for Maestro's private feeds -InsertMaestroPrivateFeedCredentials -Sources $sources -Creds $creds -Username $userName -pwd $Password - -# 3.1 uses a different feed url format so it's handled differently here -$dotnet31Source = $sources.SelectSingleNode("add[@key='dotnet3.1']") -if ($dotnet31Source -ne $null) { - AddPackageSource -Sources $sources -SourceName "dotnet3.1-internal" -SourceEndPoint "https://pkgs.dev.azure.com/dnceng/_packaging/dotnet3.1-internal/nuget/v2" -Creds $creds -Username $userName -pwd $Password - AddPackageSource -Sources $sources -SourceName "dotnet3.1-internal-transport" -SourceEndPoint "https://pkgs.dev.azure.com/dnceng/_packaging/dotnet3.1-internal-transport/nuget/v2" -Creds $creds -Username $userName -pwd $Password + EnableMaestroInternalPackageSources -DisabledPackageSources $disabledSources -Creds $creds } - -$dotnetVersions = @('5','6','7','8','9') +$dotnetVersions = @('5','6','7','8','9','10') foreach ($dotnetVersion in $dotnetVersions) { $feedPrefix = "dotnet" + $dotnetVersion; $dotnetSource = $sources.SelectSingleNode("add[@key='$feedPrefix']") if ($dotnetSource -ne $null) { - AddPackageSource -Sources $sources -SourceName "$feedPrefix-internal" -SourceEndPoint "https://pkgs.dev.azure.com/dnceng/internal/_packaging/$feedPrefix-internal/nuget/v2" -Creds $creds -Username $userName -pwd $Password - AddPackageSource -Sources $sources -SourceName "$feedPrefix-internal-transport" -SourceEndPoint "https://pkgs.dev.azure.com/dnceng/internal/_packaging/$feedPrefix-internal-transport/nuget/v2" -Creds $creds -Username $userName -pwd $Password + AddOrEnablePackageSource -Sources $sources -DisabledPackageSources $disabledSources -SourceName "$feedPrefix-internal" -SourceEndPoint "https://pkgs.dev.azure.com/dnceng/internal/_packaging/$feedPrefix-internal/nuget/$feedSuffix" -Creds $creds -Username $userName -pwd $Password + AddOrEnablePackageSource -Sources $sources -DisabledPackageSources $disabledSources -SourceName "$feedPrefix-internal-transport" -SourceEndPoint "https://pkgs.dev.azure.com/dnceng/internal/_packaging/$feedPrefix-internal-transport/nuget/$feedSuffix" -Creds $creds -Username $userName -pwd $Password } } diff --git a/eng/common/SetupNugetSources.sh b/eng/common/SetupNugetSources.sh index 4604b61b03..b97cc53637 100755 --- a/eng/common/SetupNugetSources.sh +++ b/eng/common/SetupNugetSources.sh @@ -11,8 +11,8 @@ # - task: Bash@3 # displayName: Setup Internal Feeds # inputs: -# filePath: $(Build.SourcesDirectory)/eng/common/SetupNugetSources.sh -# arguments: $(Build.SourcesDirectory)/NuGet.config +# filePath: $(System.DefaultWorkingDirectory)/eng/common/SetupNugetSources.sh +# arguments: $(System.DefaultWorkingDirectory)/NuGet.config # condition: ne(variables['Agent.OS'], 'Windows_NT') # - task: NuGetAuthenticate@1 # @@ -52,78 +52,124 @@ if [[ `uname -s` == "Darwin" ]]; then TB='' fi -# Ensure there is a ... section. -grep -i "" $ConfigFile -if [ "$?" != "0" ]; then - echo "Adding ... section." - ConfigNodeHeader="" - PackageSourcesTemplate="${TB}${NL}${TB}" +# Enables an internal package source by name, if found. Returns 0 if found and enabled, 1 if not found. +EnableInternalPackageSource() { + local PackageSourceName="$1" + + # Check if disabledPackageSources section exists + grep -i "" "$ConfigFile" > /dev/null + if [ "$?" != "0" ]; then + return 1 # No disabled sources section + fi + + # Check if this source name is disabled + grep -i " /dev/null + if [ "$?" == "0" ]; then + echo "Enabling internal source '$PackageSourceName'." + # Remove the disabled entry (including any surrounding comments or whitespace on the same line) + sed -i.bak "//d" "$ConfigFile" + + # Add the source name to PackageSources for credential handling + PackageSources+=("$PackageSourceName") + return 0 # Found and enabled + fi + + return 1 # Not found in disabled sources +} + +# Add source entry to PackageSources +AddPackageSource() { + local SourceName="$1" + local SourceEndPoint="$2" + + # Check if source already exists + grep -i " /dev/null + if [ "$?" == "0" ]; then + echo "Package source $SourceName already present and enabled." + PackageSources+=("$SourceName") + return + fi + + echo "Adding package source $SourceName" + PackageSourcesNodeFooter="" + PackageSourceTemplate="${TB}" + + sed -i.bak "s|$PackageSourcesNodeFooter|$PackageSourceTemplate${NL}$PackageSourcesNodeFooter|" "$ConfigFile" + PackageSources+=("$SourceName") +} + +# Adds or enables the package source with the given name +AddOrEnablePackageSource() { + local SourceName="$1" + local SourceEndPoint="$2" + + # Try to enable if disabled, if not found then add new source + EnableInternalPackageSource "$SourceName" + if [ "$?" != "0" ]; then + AddPackageSource "$SourceName" "$SourceEndPoint" + fi +} - sed -i.bak "s|$ConfigNodeHeader|$ConfigNodeHeader${NL}$PackageSourcesTemplate|" $ConfigFile -fi +# Enable all darc-int package sources +EnableMaestroInternalPackageSources() { + # Check if disabledPackageSources section exists + grep -i "" "$ConfigFile" > /dev/null + if [ "$?" != "0" ]; then + return # No disabled sources section + fi + + # Find all darc-int disabled sources + local DisabledDarcIntSources=() + DisabledDarcIntSources+=$(grep -oh '"darc-int-[^"]*" value="true"' "$ConfigFile" | tr -d '"') + + for DisabledSourceName in ${DisabledDarcIntSources[@]} ; do + if [[ $DisabledSourceName == darc-int* ]]; then + EnableInternalPackageSource "$DisabledSourceName" + fi + done +} -# Ensure there is a ... section. -grep -i "" $ConfigFile +# Ensure there is a ... section. +grep -i "" $ConfigFile if [ "$?" != "0" ]; then - echo "Adding ... section." - - PackageSourcesNodeFooter="" - PackageSourceCredentialsTemplate="${TB}${NL}${TB}" - - sed -i.bak "s|$PackageSourcesNodeFooter|$PackageSourcesNodeFooter${NL}$PackageSourceCredentialsTemplate|" $ConfigFile + Write-PipelineTelemetryError -Category 'Build' "Error: Eng/common/SetupNugetSources.sh returned a non-zero exit code. NuGet config file must contain a packageSources section: $ConfigFile" + ExitWithExitCode 1 fi PackageSources=() -# Ensure dotnet3.1-internal and dotnet3.1-internal-transport are in the packageSources if the public dotnet3.1 feeds are present -grep -i "... section. + grep -i "" $ConfigFile if [ "$?" != "0" ]; then - echo "Adding dotnet3.1-internal to the packageSources." - PackageSourcesNodeFooter="" - PackageSourceTemplate="${TB}" + echo "Adding ... section." - sed -i.bak "s|$PackageSourcesNodeFooter|$PackageSourceTemplate${NL}$PackageSourcesNodeFooter|" $ConfigFile - fi - PackageSources+=('dotnet3.1-internal') - - grep -i "" $ConfigFile - if [ "$?" != "0" ]; then - echo "Adding dotnet3.1-internal-transport to the packageSources." PackageSourcesNodeFooter="" - PackageSourceTemplate="${TB}" + PackageSourceCredentialsTemplate="${TB}${NL}${TB}" - sed -i.bak "s|$PackageSourcesNodeFooter|$PackageSourceTemplate${NL}$PackageSourcesNodeFooter|" $ConfigFile + sed -i.bak "s|$PackageSourcesNodeFooter|$PackageSourcesNodeFooter${NL}$PackageSourceCredentialsTemplate|" $ConfigFile fi - PackageSources+=('dotnet3.1-internal-transport') fi -DotNetVersions=('5' '6' '7' '8' '9') +# Check for disabledPackageSources; we'll enable any darc-int ones we find there +grep -i "" $ConfigFile > /dev/null +if [ "$?" == "0" ]; then + echo "Checking for any darc-int disabled package sources in the disabledPackageSources node" + EnableMaestroInternalPackageSources +fi + +DotNetVersions=('5' '6' '7' '8' '9' '10') for DotNetVersion in ${DotNetVersions[@]} ; do FeedPrefix="dotnet${DotNetVersion}"; - grep -i " /dev/null if [ "$?" == "0" ]; then - grep -i "" - - sed -i.bak "s|$PackageSourcesNodeFooter|$PackageSourceTemplate${NL}$PackageSourcesNodeFooter|" $ConfigFile - fi - PackageSources+=("$FeedPrefix-internal") - - grep -i "" $ConfigFile - if [ "$?" != "0" ]; then - echo "Adding $FeedPrefix-internal-transport to the packageSources." - PackageSourcesNodeFooter="" - PackageSourceTemplate="${TB}" - - sed -i.bak "s|$PackageSourcesNodeFooter|$PackageSourceTemplate${NL}$PackageSourcesNodeFooter|" $ConfigFile - fi - PackageSources+=("$FeedPrefix-internal-transport") + AddOrEnablePackageSource "$FeedPrefix-internal" "https://pkgs.dev.azure.com/dnceng/internal/_packaging/$FeedPrefix-internal/nuget/$FeedSuffix" + AddOrEnablePackageSource "$FeedPrefix-internal-transport" "https://pkgs.dev.azure.com/dnceng/internal/_packaging/$FeedPrefix-internal-transport/nuget/$FeedSuffix" fi done @@ -139,29 +185,12 @@ if [ "$CredToken" ]; then # Check if there is no existing credential for this FeedName grep -i "<$FeedName>" $ConfigFile if [ "$?" != "0" ]; then - echo "Adding credentials for $FeedName." + echo " Inserting credential for feed: $FeedName" PackageSourceCredentialsNodeFooter="" - NewCredential="${TB}${TB}<$FeedName>${NL}${NL}${NL}" + NewCredential="${TB}${TB}<$FeedName>${NL}${TB}${NL}${TB}${TB}${NL}${TB}${TB}" sed -i.bak "s|$PackageSourceCredentialsNodeFooter|$NewCredential${NL}$PackageSourceCredentialsNodeFooter|" $ConfigFile fi done fi - -# Re-enable any entries in disabledPackageSources where the feed name contains darc-int -grep -i "" $ConfigFile -if [ "$?" == "0" ]; then - DisabledDarcIntSources=() - echo "Re-enabling any disabled \"darc-int\" package sources in $ConfigFile" - DisabledDarcIntSources+=$(grep -oh '"darc-int-[^"]*" value="true"' $ConfigFile | tr -d '"') - for DisabledSourceName in ${DisabledDarcIntSources[@]} ; do - if [[ $DisabledSourceName == darc-int* ]] - then - OldDisableValue="" - NewDisableValue="" - sed -i.bak "s|$OldDisableValue|$NewDisableValue|" $ConfigFile - echo "Neutralized disablePackageSources entry for '$DisabledSourceName'" - fi - done -fi diff --git a/eng/common/build.ps1 b/eng/common/build.ps1 index 6b3be1916f..8cfee107e7 100644 --- a/eng/common/build.ps1 +++ b/eng/common/build.ps1 @@ -21,6 +21,7 @@ Param( [switch] $publish, [switch] $clean, [switch][Alias('pb')]$productBuild, + [switch]$fromVMR, [switch][Alias('bl')]$binaryLog, [switch][Alias('nobl')]$excludeCIBinarylog, [switch] $ci, @@ -74,6 +75,7 @@ function Print-Usage() { Write-Host " -nativeToolsOnMachine Sets the native tools on machine environment variable (indicating that the script should use native tools on machine)" Write-Host " -nodeReuse Sets nodereuse msbuild parameter ('true' or 'false')" Write-Host " -buildCheck Sets /check msbuild parameter" + Write-Host " -fromVMR Set when building from within the VMR" Write-Host "" Write-Host "Command line arguments not listed above are passed thru to msbuild." @@ -127,7 +129,8 @@ function Build { /p:Deploy=$deploy ` /p:Test=$test ` /p:Pack=$pack ` - /p:DotNetBuildRepo=$productBuild ` + /p:DotNetBuild=$productBuild ` + /p:DotNetBuildFromVMR=$fromVMR ` /p:IntegrationTest=$integrationTest ` /p:PerformanceTest=$performanceTest ` /p:Sign=$sign ` diff --git a/eng/common/build.sh b/eng/common/build.sh index 36fba82a37..ec3e80d189 100755 --- a/eng/common/build.sh +++ b/eng/common/build.sh @@ -43,6 +43,7 @@ usage() echo " --nodeReuse Sets nodereuse msbuild parameter ('true' or 'false')" echo " --warnAsError Sets warnaserror msbuild parameter ('true' or 'false')" echo " --buildCheck Sets /check msbuild parameter" + echo " --fromVMR Set when building from within the VMR" echo "" echo "Command line arguments not listed above are passed thru to msbuild." echo "Arguments can also be passed in with a single hyphen." @@ -64,6 +65,7 @@ restore=false build=false source_build=false product_build=false +from_vmr=false rebuild=false test=false integration_test=false @@ -89,8 +91,8 @@ verbosity='minimal' runtime_source_feed='' runtime_source_feed_key='' -properties='' -while [[ $# > 0 ]]; do +properties=() +while [[ $# -gt 0 ]]; do opt="$(echo "${1/#--/-}" | tr "[:upper:]" "[:lower:]")" case "$opt" in -help|-h) @@ -129,19 +131,22 @@ while [[ $# > 0 ]]; do -pack) pack=true ;; - -sourcebuild|-sb) + -sourcebuild|-source-build|-sb) build=true source_build=true product_build=true restore=true pack=true ;; - -productBuild|-pb) + -productbuild|-product-build|-pb) build=true product_build=true restore=true pack=true ;; + -fromvmr|-from-vmr) + from_vmr=true + ;; -test|-t) test=true ;; @@ -187,7 +192,7 @@ while [[ $# > 0 ]]; do shift ;; *) - properties="$properties $1" + properties+=("$1") ;; esac @@ -221,7 +226,7 @@ function Build { InitializeCustomToolset if [[ ! -z "$projects" ]]; then - properties="$properties /p:Projects=$projects" + properties+=("/p:Projects=$projects") fi local bl="" @@ -241,8 +246,9 @@ function Build { /p:RepoRoot="$repo_root" \ /p:Restore=$restore \ /p:Build=$build \ - /p:DotNetBuildRepo=$product_build \ + /p:DotNetBuild=$product_build \ /p:DotNetBuildSourceOnly=$source_build \ + /p:DotNetBuildFromVMR=$from_vmr \ /p:Rebuild=$rebuild \ /p:Test=$test \ /p:Pack=$pack \ @@ -251,7 +257,7 @@ function Build { /p:Sign=$sign \ /p:Publish=$publish \ /p:RestoreStaticGraphEnableBinaryLogger=$binary_log \ - $properties + ${properties[@]+"${properties[@]}"} ExitWithExitCode 0 } diff --git a/eng/common/core-templates/job/job.yml b/eng/common/core-templates/job/job.yml index 6badecba7b..cb4ccc023a 100644 --- a/eng/common/core-templates/job/job.yml +++ b/eng/common/core-templates/job/job.yml @@ -19,7 +19,10 @@ parameters: # publishing defaults artifacts: '' enableMicrobuild: false + enablePreviewMicrobuild: false + microbuildPluginVersion: 'latest' enableMicrobuildForMacAndLinux: false + microbuildUseESRP: true enablePublishBuildArtifacts: false enablePublishBuildAssets: false enablePublishTestResults: false @@ -127,7 +130,10 @@ jobs: - template: /eng/common/core-templates/steps/install-microbuild.yml parameters: enableMicrobuild: ${{ parameters.enableMicrobuild }} + enablePreviewMicrobuild: ${{ parameters.enablePreviewMicrobuild }} + microbuildPluginVersion: ${{ parameters.microbuildPluginVersion }} enableMicrobuildForMacAndLinux: ${{ parameters.enableMicrobuildForMacAndLinux }} + microbuildUseESRP: ${{ parameters.microbuildUseESRP }} continueOnError: ${{ parameters.continueOnError }} - ${{ if and(eq(parameters.runAsPublic, 'false'), eq(variables['System.TeamProject'], 'internal')) }}: @@ -151,6 +157,8 @@ jobs: - template: /eng/common/core-templates/steps/cleanup-microbuild.yml parameters: enableMicrobuild: ${{ parameters.enableMicrobuild }} + enablePreviewMicrobuild: ${{ parameters.enablePreviewMicrobuild }} + microbuildPluginVersion: ${{ parameters.microbuildPluginVersion }} enableMicrobuildForMacAndLinux: ${{ parameters.enableMicrobuildForMacAndLinux }} continueOnError: ${{ parameters.continueOnError }} @@ -161,7 +169,7 @@ jobs: inputs: testResultsFormat: 'xUnit' testResultsFiles: '*.xml' - searchFolder: '$(Build.SourcesDirectory)/artifacts/TestResults/$(_BuildConfig)' + searchFolder: '$(System.DefaultWorkingDirectory)/artifacts/TestResults/$(_BuildConfig)' testRunTitle: ${{ coalesce(parameters.testRunTitle, parameters.name, '$(System.JobName)') }}-xunit mergeTestResults: ${{ parameters.mergeTestResults }} continueOnError: true @@ -172,7 +180,7 @@ jobs: inputs: testResultsFormat: 'VSTest' testResultsFiles: '*.trx' - searchFolder: '$(Build.SourcesDirectory)/artifacts/TestResults/$(_BuildConfig)' + searchFolder: '$(System.DefaultWorkingDirectory)/artifacts/TestResults/$(_BuildConfig)' testRunTitle: ${{ coalesce(parameters.testRunTitle, parameters.name, '$(System.JobName)') }}-trx mergeTestResults: ${{ parameters.mergeTestResults }} continueOnError: true @@ -216,7 +224,7 @@ jobs: - task: CopyFiles@2 displayName: Gather buildconfiguration for build retry inputs: - SourceFolder: '$(Build.SourcesDirectory)/eng/common/BuildConfiguration' + SourceFolder: '$(System.DefaultWorkingDirectory)/eng/common/BuildConfiguration' Contents: '**' TargetFolder: '$(Build.ArtifactStagingDirectory)/eng/common/BuildConfiguration' continueOnError: true diff --git a/eng/common/core-templates/job/onelocbuild.yml b/eng/common/core-templates/job/onelocbuild.yml index 00feec8ebb..c5788829a8 100644 --- a/eng/common/core-templates/job/onelocbuild.yml +++ b/eng/common/core-templates/job/onelocbuild.yml @@ -4,11 +4,11 @@ parameters: # Optional: A defined YAML pool - https://docs.microsoft.com/en-us/azure/devops/pipelines/yaml-schema?view=vsts&tabs=schema#pool pool: '' - + CeapexPat: $(dn-bot-ceapex-package-r) # PAT for the loc AzDO instance https://dev.azure.com/ceapex GithubPat: $(BotAccount-dotnet-bot-repo-PAT) - SourcesDirectory: $(Build.SourcesDirectory) + SourcesDirectory: $(System.DefaultWorkingDirectory) CreatePr: true AutoCompletePr: false ReusePr: true @@ -27,7 +27,7 @@ parameters: is1ESPipeline: '' jobs: - job: OneLocBuild${{ parameters.JobNameSuffix }} - + dependsOn: ${{ parameters.dependsOn }} displayName: OneLocBuild${{ parameters.JobNameSuffix }} @@ -68,7 +68,7 @@ jobs: - ${{ if ne(parameters.SkipLocProjectJsonGeneration, 'true') }}: - task: Powershell@2 inputs: - filePath: $(Build.SourcesDirectory)/eng/common/generate-locproject.ps1 + filePath: $(System.DefaultWorkingDirectory)/eng/common/generate-locproject.ps1 arguments: $(_GenerateLocProjectArguments) displayName: Generate LocProject.json condition: ${{ parameters.condition }} @@ -86,8 +86,7 @@ jobs: isAutoCompletePrSelected: ${{ parameters.AutoCompletePr }} ${{ if eq(parameters.CreatePr, true) }}: isUseLfLineEndingsSelected: ${{ parameters.UseLfLineEndings }} - ${{ if eq(parameters.RepoType, 'gitHub') }}: - isShouldReusePrSelected: ${{ parameters.ReusePr }} + isShouldReusePrSelected: ${{ parameters.ReusePr }} packageSourceAuth: patAuth patVariable: ${{ parameters.CeapexPat }} ${{ if eq(parameters.RepoType, 'gitHub') }}: @@ -100,22 +99,20 @@ jobs: mirrorBranch: ${{ parameters.MirrorBranch }} condition: ${{ parameters.condition }} - - template: /eng/common/core-templates/steps/publish-build-artifacts.yml - parameters: - is1ESPipeline: ${{ parameters.is1ESPipeline }} - args: - displayName: Publish Localization Files - pathToPublish: '$(Build.ArtifactStagingDirectory)/loc' - publishLocation: Container - artifactName: Loc - condition: ${{ parameters.condition }} + # Copy the locProject.json to the root of the Loc directory, then publish a pipeline artifact + - task: CopyFiles@2 + displayName: Copy LocProject.json + inputs: + SourceFolder: '$(System.DefaultWorkingDirectory)/eng/Localize/' + Contents: 'LocProject.json' + TargetFolder: '$(Build.ArtifactStagingDirectory)/loc' + condition: ${{ parameters.condition }} - - template: /eng/common/core-templates/steps/publish-build-artifacts.yml + - template: /eng/common/core-templates/steps/publish-pipeline-artifacts.yml parameters: is1ESPipeline: ${{ parameters.is1ESPipeline }} args: - displayName: Publish LocProject.json - pathToPublish: '$(Build.SourcesDirectory)/eng/Localize/' - publishLocation: Container - artifactName: Loc - condition: ${{ parameters.condition }} \ No newline at end of file + targetPath: '$(Build.ArtifactStagingDirectory)/loc' + artifactName: 'Loc' + displayName: 'Publish Localization Files' + condition: ${{ parameters.condition }} diff --git a/eng/common/core-templates/job/publish-build-assets.yml b/eng/common/core-templates/job/publish-build-assets.yml index 4f1dc42e02..721a556669 100644 --- a/eng/common/core-templates/job/publish-build-assets.yml +++ b/eng/common/core-templates/job/publish-build-assets.yml @@ -32,6 +32,16 @@ parameters: # Optional: 🌤️ or not the build has assets it wants to publish to BAR isAssetlessBuild: false + # Optional, publishing version + publishingVersion: 3 + + # Optional: A minimatch pattern for the asset manifests to publish to BAR + assetManifestsPattern: '*/manifests/**/*.xml' + + repositoryAlias: self + + officialBuildId: '' + jobs: - job: Asset_Registry_Publish @@ -54,6 +64,11 @@ jobs: value: false # unconditional - needed for logs publishing (redactor tool version) - template: /eng/common/core-templates/post-build/common-variables.yml + - name: OfficialBuildId + ${{ if ne(parameters.officialBuildId, '') }}: + value: ${{ parameters.officialBuildId }} + ${{ else }}: + value: $(Build.BuildNumber) pool: # We don't use the collection uri here because it might vary (.visualstudio.com vs. dev.azure.com) @@ -72,36 +87,64 @@ jobs: - 'Illegal entry point, is1ESPipeline is not defined. Repository yaml should not directly reference templates in core-templates folder.': error - ${{ if and(eq(parameters.runAsPublic, 'false'), ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}: - - checkout: self + - checkout: ${{ parameters.repositoryAlias }} fetchDepth: 3 clean: true - - ${{ if eq(parameters.isAssetlessBuild, 'false') }}: - - task: DownloadPipelineArtifact@2 - displayName: Download Asset Manifests - inputs: - artifactName: AssetManifests - targetPath: '$(Build.StagingDirectory)/AssetManifests' - condition: ${{ parameters.condition }} - continueOnError: ${{ parameters.continueOnError }} - + - ${{ if eq(parameters.isAssetlessBuild, 'false') }}: + - ${{ if eq(parameters.publishingVersion, 3) }}: + - task: DownloadPipelineArtifact@2 + displayName: Download Asset Manifests + inputs: + artifactName: AssetManifests + targetPath: '$(Build.StagingDirectory)/AssetManifests' + condition: ${{ parameters.condition }} + continueOnError: ${{ parameters.continueOnError }} + - ${{ if eq(parameters.publishingVersion, 4) }}: + - task: DownloadPipelineArtifact@2 + displayName: Download V4 asset manifests + inputs: + itemPattern: '*/manifests/**/*.xml' + targetPath: '$(Build.StagingDirectory)/AllAssetManifests' + condition: ${{ parameters.condition }} + continueOnError: ${{ parameters.continueOnError }} + - task: CopyFiles@2 + displayName: Copy V4 asset manifests to AssetManifests + inputs: + SourceFolder: '$(Build.StagingDirectory)/AllAssetManifests' + Contents: ${{ parameters.assetManifestsPattern }} + TargetFolder: '$(Build.StagingDirectory)/AssetManifests' + flattenFolders: true + condition: ${{ parameters.condition }} + continueOnError: ${{ parameters.continueOnError }} + - task: NuGetAuthenticate@1 + # Populate internal runtime variables. + - template: /eng/common/templates/steps/enable-internal-sources.yml + parameters: + legacyCredential: $(dn-bot-dnceng-artifact-feeds-rw) + + - template: /eng/common/templates/steps/enable-internal-runtimes.yml + - task: AzureCLI@2 displayName: Publish Build Assets inputs: azureSubscription: "Darc: Maestro Production" scriptType: ps scriptLocation: scriptPath - scriptPath: $(Build.SourcesDirectory)/eng/common/sdk-task.ps1 + scriptPath: $(System.DefaultWorkingDirectory)/eng/common/sdk-task.ps1 arguments: -task PublishBuildAssets -restore -msbuildEngine dotnet /p:ManifestsPath='$(Build.StagingDirectory)/AssetManifests' /p:IsAssetlessBuild=${{ parameters.isAssetlessBuild }} /p:MaestroApiEndpoint=https://maestro.dot.net - /p:OfficialBuildId=$(Build.BuildNumber) + /p:OfficialBuildId=$(OfficialBuildId) + -runtimeSourceFeed https://ci.dot.net/internal + -runtimeSourceFeedKey $(dotnetbuilds-internal-container-read-token-base64) + condition: ${{ parameters.condition }} continueOnError: ${{ parameters.continueOnError }} - + - task: powershell@2 displayName: Create ReleaseConfigs Artifact inputs: @@ -113,13 +156,24 @@ jobs: Add-Content -Path $filePath -Value "$(DefaultChannels)" Add-Content -Path $filePath -Value $(IsStableBuild) - $symbolExclusionfile = "$(Build.SourcesDirectory)/eng/SymbolPublishingExclusionsFile.txt" + $symbolExclusionfile = "$(System.DefaultWorkingDirectory)/eng/SymbolPublishingExclusionsFile.txt" if (Test-Path -Path $symbolExclusionfile) { Write-Host "SymbolExclusionFile exists" Copy-Item -Path $symbolExclusionfile -Destination "$(Build.StagingDirectory)/ReleaseConfigs" } + - ${{ if eq(parameters.publishingVersion, 4) }}: + - template: /eng/common/core-templates/steps/publish-pipeline-artifacts.yml + parameters: + is1ESPipeline: ${{ parameters.is1ESPipeline }} + args: + targetPath: '$(Build.ArtifactStagingDirectory)/MergedManifest.xml' + artifactName: AssetManifests + displayName: 'Publish Merged Manifest' + retryCountOnTaskFailure: 10 # for any logs being locked + sbomEnabled: false # we don't need SBOM for logs + - template: /eng/common/core-templates/steps/publish-build-artifacts.yml parameters: is1ESPipeline: ${{ parameters.is1ESPipeline }} @@ -142,7 +196,7 @@ jobs: azureSubscription: "Darc: Maestro Production" scriptType: ps scriptLocation: scriptPath - scriptPath: $(Build.SourcesDirectory)/eng/common/post-build/publish-using-darc.ps1 + scriptPath: $(System.DefaultWorkingDirectory)/eng/common/post-build/publish-using-darc.ps1 arguments: > -BuildId $(BARBuildId) -PublishingInfraVersion 3 @@ -151,9 +205,11 @@ jobs: -ArtifactsPublishingAdditionalParameters '${{ parameters.artifactsPublishingAdditionalParameters }}' -SymbolPublishingAdditionalParameters '${{ parameters.symbolPublishingAdditionalParameters }}' -SkipAssetsPublishing '${{ parameters.isAssetlessBuild }}' + -runtimeSourceFeed https://ci.dot.net/internal + -runtimeSourceFeedKey $(dotnetbuilds-internal-container-read-token-base64) - ${{ if eq(parameters.enablePublishBuildArtifacts, 'true') }}: - template: /eng/common/core-templates/steps/publish-logs.yml parameters: is1ESPipeline: ${{ parameters.is1ESPipeline }} - JobLabel: 'Publish_Artifacts_Logs' + JobLabel: 'Publish_Artifacts_Logs' diff --git a/eng/common/core-templates/jobs/codeql-build.yml b/eng/common/core-templates/jobs/codeql-build.yml index 693b00b370..dbc14ac580 100644 --- a/eng/common/core-templates/jobs/codeql-build.yml +++ b/eng/common/core-templates/jobs/codeql-build.yml @@ -24,7 +24,7 @@ jobs: - name: DefaultGuardianVersion value: 0.109.0 - name: GuardianPackagesConfigFile - value: $(Build.SourcesDirectory)\eng\common\sdl\packages.config + value: $(System.DefaultWorkingDirectory)\eng\common\sdl\packages.config - name: GuardianVersion value: ${{ coalesce(parameters.overrideGuardianVersion, '$(DefaultGuardianVersion)') }} diff --git a/eng/common/core-templates/jobs/jobs.yml b/eng/common/core-templates/jobs/jobs.yml index bf35b78faa..01ada74766 100644 --- a/eng/common/core-templates/jobs/jobs.yml +++ b/eng/common/core-templates/jobs/jobs.yml @@ -43,6 +43,8 @@ parameters: artifacts: {} is1ESPipeline: '' + repositoryAlias: self + officialBuildId: '' # Internal resources (telemetry, microbuild) can only be accessed from non-public projects, # and some (Microbuild) should only be applied to non-PR cases for internal builds. @@ -83,7 +85,6 @@ jobs: - template: /eng/common/core-templates/jobs/source-build.yml parameters: is1ESPipeline: ${{ parameters.is1ESPipeline }} - allCompletedJobId: Source_Build_Complete ${{ each parameter in parameters.sourceBuildParameters }}: ${{ parameter.key }}: ${{ parameter.value }} @@ -108,8 +109,6 @@ jobs: - ${{ if eq(parameters.publishBuildAssetsDependsOn, '') }}: - ${{ each job in parameters.jobs }}: - ${{ job.job }} - - ${{ if eq(parameters.enableSourceBuild, true) }}: - - Source_Build_Complete runAsPublic: ${{ parameters.runAsPublic }} publishAssetsImmediately: ${{ or(parameters.publishAssetsImmediately, parameters.isAssetlessBuild) }} @@ -117,3 +116,5 @@ jobs: enablePublishBuildArtifacts: ${{ parameters.enablePublishBuildArtifacts }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} signingValidationAdditionalParameters: ${{ parameters.signingValidationAdditionalParameters }} + repositoryAlias: ${{ parameters.repositoryAlias }} + officialBuildId: ${{ parameters.officialBuildId }} diff --git a/eng/common/core-templates/jobs/source-build.yml b/eng/common/core-templates/jobs/source-build.yml index a10ccfbee6..d92860cba2 100644 --- a/eng/common/core-templates/jobs/source-build.yml +++ b/eng/common/core-templates/jobs/source-build.yml @@ -2,19 +2,13 @@ parameters: # This template adds arcade-powered source-build to CI. A job is created for each platform, as # well as an optional server job that completes when all platform jobs complete. - # The name of the "join" job for all source-build platforms. If set to empty string, the job is - # not included. Existing repo pipelines can use this job depend on all source-build jobs - # completing without maintaining a separate list of every single job ID: just depend on this one - # server job. By default, not included. Recommended name if used: 'Source_Build_Complete'. - allCompletedJobId: '' - # See /eng/common/core-templates/job/source-build.yml jobNamePrefix: 'Source_Build' # This is the default platform provided by Arcade, intended for use by a managed-only repo. defaultManagedPlatform: name: 'Managed' - container: 'mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream9' + container: 'mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream-10-amd64' # Defines the platforms on which to run build jobs. One job is created for each platform, and the # object in this array is sent to the job template as 'platform'. If no platforms are specified, @@ -31,16 +25,6 @@ parameters: jobs: -- ${{ if ne(parameters.allCompletedJobId, '') }}: - - job: ${{ parameters.allCompletedJobId }} - displayName: Source-Build Complete - pool: server - dependsOn: - - ${{ each platform in parameters.platforms }}: - - ${{ parameters.jobNamePrefix }}_${{ platform.name }} - - ${{ if eq(length(parameters.platforms), 0) }}: - - ${{ parameters.jobNamePrefix }}_${{ parameters.defaultManagedPlatform.name }} - - ${{ each platform in parameters.platforms }}: - template: /eng/common/core-templates/job/source-build.yml parameters: diff --git a/eng/common/core-templates/post-build/post-build.yml b/eng/common/core-templates/post-build/post-build.yml index 5757915edb..0cea81c74f 100644 --- a/eng/common/core-templates/post-build/post-build.yml +++ b/eng/common/core-templates/post-build/post-build.yml @@ -1,106 +1,106 @@ parameters: - # Which publishing infra should be used. THIS SHOULD MATCH THE VERSION ON THE BUILD MANIFEST. - # Publishing V1 is no longer supported - # Publishing V2 is no longer supported - # Publishing V3 is the default - - name: publishingInfraVersion - displayName: Which version of publishing should be used to promote the build definition? - type: number - default: 3 - values: - - 3 - - - name: BARBuildId - displayName: BAR Build Id - type: number - default: 0 - - - name: PromoteToChannelIds - displayName: Channel to promote BARBuildId to - type: string - default: '' - - - name: enableSourceLinkValidation - displayName: Enable SourceLink validation - type: boolean - default: false - - - name: enableSigningValidation - displayName: Enable signing validation - type: boolean - default: true - - - name: enableSymbolValidation - displayName: Enable symbol validation - type: boolean - default: false - - - name: enableNugetValidation - displayName: Enable NuGet validation - type: boolean - default: true - - - name: publishInstallersAndChecksums - displayName: Publish installers and checksums - type: boolean - default: true - - - name: requireDefaultChannels - displayName: Fail the build if there are no default channel(s) registrations for the current build - type: boolean - default: false - - - name: SDLValidationParameters - type: object - default: - enable: false - publishGdn: false - continueOnError: false - params: '' - artifactNames: '' - downloadArtifacts: true - - - name: isAssetlessBuild - type: boolean - displayName: Is Assetless Build - default: false - - # These parameters let the user customize the call to sdk-task.ps1 for publishing - # symbols & general artifacts as well as for signing validation - - name: symbolPublishingAdditionalParameters - displayName: Symbol publishing additional parameters - type: string - default: '' - - - name: artifactsPublishingAdditionalParameters - displayName: Artifact publishing additional parameters - type: string - default: '' - - - name: signingValidationAdditionalParameters - displayName: Signing validation additional parameters - type: string - default: '' - - # Which stages should finish execution before post-build stages start - - name: validateDependsOn - type: object - default: - - build - - - name: publishDependsOn - type: object - default: - - Validate - - # Optional: Call asset publishing rather than running in a separate stage - - name: publishAssetsImmediately - type: boolean - default: false - - - name: is1ESPipeline - type: boolean - default: false +# Which publishing infra should be used. THIS SHOULD MATCH THE VERSION ON THE BUILD MANIFEST. +# Publishing V1 is no longer supported +# Publishing V2 is no longer supported +# Publishing V3 is the default +- name: publishingInfraVersion + displayName: Which version of publishing should be used to promote the build definition? + type: number + default: 3 + values: + - 3 + +- name: BARBuildId + displayName: BAR Build Id + type: number + default: 0 + +- name: PromoteToChannelIds + displayName: Channel to promote BARBuildId to + type: string + default: '' + +- name: enableSourceLinkValidation + displayName: Enable SourceLink validation + type: boolean + default: false + +- name: enableSigningValidation + displayName: Enable signing validation + type: boolean + default: true + +- name: enableSymbolValidation + displayName: Enable symbol validation + type: boolean + default: false + +- name: enableNugetValidation + displayName: Enable NuGet validation + type: boolean + default: true + +- name: publishInstallersAndChecksums + displayName: Publish installers and checksums + type: boolean + default: true + +- name: requireDefaultChannels + displayName: Fail the build if there are no default channel(s) registrations for the current build + type: boolean + default: false + +- name: SDLValidationParameters + type: object + default: + enable: false + publishGdn: false + continueOnError: false + params: '' + artifactNames: '' + downloadArtifacts: true + +- name: isAssetlessBuild + type: boolean + displayName: Is Assetless Build + default: false + +# These parameters let the user customize the call to sdk-task.ps1 for publishing +# symbols & general artifacts as well as for signing validation +- name: symbolPublishingAdditionalParameters + displayName: Symbol publishing additional parameters + type: string + default: '' + +- name: artifactsPublishingAdditionalParameters + displayName: Artifact publishing additional parameters + type: string + default: '' + +- name: signingValidationAdditionalParameters + displayName: Signing validation additional parameters + type: string + default: '' + +# Which stages should finish execution before post-build stages start +- name: validateDependsOn + type: object + default: + - build + +- name: publishDependsOn + type: object + default: + - Validate + +# Optional: Call asset publishing rather than running in a separate stage +- name: publishAssetsImmediately + type: boolean + default: false + +- name: is1ESPipeline + type: boolean + default: false stages: - ${{ if or(eq( parameters.enableNugetValidation, 'true'), eq(parameters.enableSigningValidation, 'true'), eq(parameters.enableSourceLinkValidation, 'true'), eq(parameters.SDLValidationParameters.enable, 'true')) }}: @@ -108,10 +108,10 @@ stages: dependsOn: ${{ parameters.validateDependsOn }} displayName: Validate Build Assets variables: - - template: /eng/common/core-templates/post-build/common-variables.yml - - template: /eng/common/core-templates/variables/pool-providers.yml - parameters: - is1ESPipeline: ${{ parameters.is1ESPipeline }} + - template: /eng/common/core-templates/post-build/common-variables.yml + - template: /eng/common/core-templates/variables/pool-providers.yml + parameters: + is1ESPipeline: ${{ parameters.is1ESPipeline }} jobs: - job: displayName: NuGet Validation @@ -134,28 +134,28 @@ stages: demands: ImageOverride -equals windows.vs2022.amd64 steps: - - template: /eng/common/core-templates/post-build/setup-maestro-vars.yml - parameters: - BARBuildId: ${{ parameters.BARBuildId }} - PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} - is1ESPipeline: ${{ parameters.is1ESPipeline }} - - - task: DownloadBuildArtifacts@0 - displayName: Download Package Artifacts - inputs: - buildType: specific - buildVersionToDownload: specific - project: $(AzDOProjectName) - pipeline: $(AzDOPipelineId) - buildId: $(AzDOBuildId) - artifactName: PackageArtifacts - checkDownloadedFiles: true - - - task: PowerShell@2 - displayName: Validate - inputs: - filePath: $(Build.SourcesDirectory)/eng/common/post-build/nuget-validation.ps1 - arguments: -PackagesPath $(Build.ArtifactStagingDirectory)/PackageArtifacts/ + - template: /eng/common/core-templates/post-build/setup-maestro-vars.yml + parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} + is1ESPipeline: ${{ parameters.is1ESPipeline }} + + - task: DownloadBuildArtifacts@0 + displayName: Download Package Artifacts + inputs: + buildType: specific + buildVersionToDownload: specific + project: $(AzDOProjectName) + pipeline: $(AzDOPipelineId) + buildId: $(AzDOBuildId) + artifactName: PackageArtifacts + checkDownloadedFiles: true + + - task: PowerShell@2 + displayName: Validate + inputs: + filePath: $(System.DefaultWorkingDirectory)/eng/common/post-build/nuget-validation.ps1 + arguments: -PackagesPath $(Build.ArtifactStagingDirectory)/PackageArtifacts/ - job: displayName: Signing Validation @@ -169,57 +169,54 @@ stages: os: windows # If it's not devdiv, it's dnceng ${{ else }}: - ${{ if eq(parameters.is1ESPipeline, true) }}: + ${{ if eq(parameters.is1ESPipeline, true) }}: name: $(DncEngInternalBuildPool) image: 1es-windows-2022 os: windows ${{ else }}: name: $(DncEngInternalBuildPool) - demands: ImageOverride -equals windows.vs2022.amd64 + demands: ImageOverride -equals windows.vs2022.amd64 steps: - - template: /eng/common/core-templates/post-build/setup-maestro-vars.yml - parameters: - BARBuildId: ${{ parameters.BARBuildId }} - PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} - is1ESPipeline: ${{ parameters.is1ESPipeline }} - - - task: DownloadBuildArtifacts@0 - displayName: Download Package Artifacts - inputs: - buildType: specific - buildVersionToDownload: specific - project: $(AzDOProjectName) - pipeline: $(AzDOPipelineId) - buildId: $(AzDOBuildId) - artifactName: PackageArtifacts - checkDownloadedFiles: true - itemPattern: | - ** - !**/Microsoft.SourceBuild.Intermediate.*.nupkg - - # This is necessary whenever we want to publish/restore to an AzDO private feed - # Since sdk-task.ps1 tries to restore packages we need to do this authentication here - # otherwise it'll complain about accessing a private feed. - - task: NuGetAuthenticate@1 - displayName: 'Authenticate to AzDO Feeds' - - # Signing validation will optionally work with the buildmanifest file which is downloaded from - # Azure DevOps above. - - task: PowerShell@2 - displayName: Validate - inputs: - filePath: eng\common\sdk-task.ps1 - arguments: -task SigningValidation -restore -msbuildEngine vs - /p:PackageBasePath='$(Build.ArtifactStagingDirectory)/PackageArtifacts' - /p:SignCheckExclusionsFile='$(Build.SourcesDirectory)/eng/SignCheckExclusionsFile.txt' - ${{ parameters.signingValidationAdditionalParameters }} - - - template: /eng/common/core-templates/steps/publish-logs.yml - parameters: - is1ESPipeline: ${{ parameters.is1ESPipeline }} - StageLabel: 'Validation' - JobLabel: 'Signing' - BinlogToolVersion: $(BinlogToolVersion) + - template: /eng/common/core-templates/post-build/setup-maestro-vars.yml + parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} + is1ESPipeline: ${{ parameters.is1ESPipeline }} + + - task: DownloadBuildArtifacts@0 + displayName: Download Package Artifacts + inputs: + buildType: specific + buildVersionToDownload: specific + project: $(AzDOProjectName) + pipeline: $(AzDOPipelineId) + buildId: $(AzDOBuildId) + artifactName: PackageArtifacts + checkDownloadedFiles: true + + # This is necessary whenever we want to publish/restore to an AzDO private feed + # Since sdk-task.ps1 tries to restore packages we need to do this authentication here + # otherwise it'll complain about accessing a private feed. + - task: NuGetAuthenticate@1 + displayName: 'Authenticate to AzDO Feeds' + + # Signing validation will optionally work with the buildmanifest file which is downloaded from + # Azure DevOps above. + - task: PowerShell@2 + displayName: Validate + inputs: + filePath: eng\common\sdk-task.ps1 + arguments: -task SigningValidation -restore -msbuildEngine vs + /p:PackageBasePath='$(Build.ArtifactStagingDirectory)/PackageArtifacts' + /p:SignCheckExclusionsFile='$(System.DefaultWorkingDirectory)/eng/SignCheckExclusionsFile.txt' + ${{ parameters.signingValidationAdditionalParameters }} + + - template: /eng/common/core-templates/steps/publish-logs.yml + parameters: + is1ESPipeline: ${{ parameters.is1ESPipeline }} + StageLabel: 'Validation' + JobLabel: 'Signing' + BinlogToolVersion: $(BinlogToolVersion) - job: displayName: SourceLink Validation @@ -233,41 +230,41 @@ stages: os: windows # If it's not devdiv, it's dnceng ${{ else }}: - ${{ if eq(parameters.is1ESPipeline, true) }}: + ${{ if eq(parameters.is1ESPipeline, true) }}: name: $(DncEngInternalBuildPool) image: 1es-windows-2022 os: windows ${{ else }}: name: $(DncEngInternalBuildPool) - demands: ImageOverride -equals windows.vs2022.amd64 + demands: ImageOverride -equals windows.vs2022.amd64 steps: - - template: /eng/common/core-templates/post-build/setup-maestro-vars.yml - parameters: - BARBuildId: ${{ parameters.BARBuildId }} - PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} - is1ESPipeline: ${{ parameters.is1ESPipeline }} - - - task: DownloadBuildArtifacts@0 - displayName: Download Blob Artifacts - inputs: - buildType: specific - buildVersionToDownload: specific - project: $(AzDOProjectName) - pipeline: $(AzDOPipelineId) - buildId: $(AzDOBuildId) - artifactName: BlobArtifacts - checkDownloadedFiles: true - - - task: PowerShell@2 - displayName: Validate - inputs: - filePath: $(Build.SourcesDirectory)/eng/common/post-build/sourcelink-validation.ps1 - arguments: -InputPath $(Build.ArtifactStagingDirectory)/BlobArtifacts/ - -ExtractPath $(Agent.BuildDirectory)/Extract/ - -GHRepoName $(Build.Repository.Name) - -GHCommit $(Build.SourceVersion) - -SourcelinkCliVersion $(SourceLinkCLIVersion) - continueOnError: true + - template: /eng/common/core-templates/post-build/setup-maestro-vars.yml + parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} + is1ESPipeline: ${{ parameters.is1ESPipeline }} + + - task: DownloadBuildArtifacts@0 + displayName: Download Blob Artifacts + inputs: + buildType: specific + buildVersionToDownload: specific + project: $(AzDOProjectName) + pipeline: $(AzDOPipelineId) + buildId: $(AzDOBuildId) + artifactName: BlobArtifacts + checkDownloadedFiles: true + + - task: PowerShell@2 + displayName: Validate + inputs: + filePath: $(System.DefaultWorkingDirectory)/eng/common/post-build/sourcelink-validation.ps1 + arguments: -InputPath $(Build.ArtifactStagingDirectory)/BlobArtifacts/ + -ExtractPath $(Agent.BuildDirectory)/Extract/ + -GHRepoName $(Build.Repository.Name) + -GHCommit $(Build.SourceVersion) + -SourcelinkCliVersion $(SourceLinkCLIVersion) + continueOnError: true - ${{ if ne(parameters.publishAssetsImmediately, 'true') }}: - stage: publish_using_darc @@ -277,10 +274,10 @@ stages: dependsOn: ${{ parameters.validateDependsOn }} displayName: Publish using Darc variables: - - template: /eng/common/core-templates/post-build/common-variables.yml - - template: /eng/common/core-templates/variables/pool-providers.yml - parameters: - is1ESPipeline: ${{ parameters.is1ESPipeline }} + - template: /eng/common/core-templates/post-build/common-variables.yml + - template: /eng/common/core-templates/variables/pool-providers.yml + parameters: + is1ESPipeline: ${{ parameters.is1ESPipeline }} jobs: - job: displayName: Publish Using Darc @@ -294,30 +291,36 @@ stages: os: windows # If it's not devdiv, it's dnceng ${{ else }}: - ${{ if eq(parameters.is1ESPipeline, true) }}: + ${{ if eq(parameters.is1ESPipeline, true) }}: name: NetCore1ESPool-Publishing-Internal image: windows.vs2019.amd64 os: windows ${{ else }}: name: NetCore1ESPool-Publishing-Internal - demands: ImageOverride -equals windows.vs2019.amd64 + demands: ImageOverride -equals windows.vs2019.amd64 steps: - - template: /eng/common/core-templates/post-build/setup-maestro-vars.yml - parameters: - BARBuildId: ${{ parameters.BARBuildId }} - PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} - is1ESPipeline: ${{ parameters.is1ESPipeline }} - - - task: NuGetAuthenticate@1 - - - task: AzureCLI@2 - displayName: Publish Using Darc - inputs: - azureSubscription: "Darc: Maestro Production" - scriptType: ps - scriptLocation: scriptPath - scriptPath: $(Build.SourcesDirectory)/eng/common/post-build/publish-using-darc.ps1 - arguments: > + - template: /eng/common/core-templates/post-build/setup-maestro-vars.yml + parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} + is1ESPipeline: ${{ parameters.is1ESPipeline }} + + - task: NuGetAuthenticate@1 # Populate internal runtime variables. + + - template: /eng/common/templates/steps/enable-internal-sources.yml + parameters: + legacyCredential: $(dn-bot-dnceng-artifact-feeds-rw) + + - template: /eng/common/templates/steps/enable-internal-runtimes.yml + + - task: AzureCLI@2 + displayName: Publish Using Darc + inputs: + azureSubscription: "Darc: Maestro Production" + scriptType: ps + scriptLocation: scriptPath + scriptPath: $(System.DefaultWorkingDirectory)/eng/common/post-build/publish-using-darc.ps1 + arguments: > -BuildId $(BARBuildId) -PublishingInfraVersion ${{ parameters.publishingInfraVersion }} -AzdoToken '$(System.AccessToken)' @@ -326,3 +329,5 @@ stages: -ArtifactsPublishingAdditionalParameters '${{ parameters.artifactsPublishingAdditionalParameters }}' -SymbolPublishingAdditionalParameters '${{ parameters.symbolPublishingAdditionalParameters }}' -SkipAssetsPublishing '${{ parameters.isAssetlessBuild }}' + -runtimeSourceFeed https://ci.dot.net/internal + -runtimeSourceFeedKey $(dotnetbuilds-internal-container-read-token-base64) diff --git a/eng/common/core-templates/post-build/setup-maestro-vars.yml b/eng/common/core-templates/post-build/setup-maestro-vars.yml index f7602980db..a7abd58c4b 100644 --- a/eng/common/core-templates/post-build/setup-maestro-vars.yml +++ b/eng/common/core-templates/post-build/setup-maestro-vars.yml @@ -36,7 +36,7 @@ steps: $AzureDevOpsBuildId = $Env:Build_BuildId } else { - . $(Build.SourcesDirectory)\eng\common\tools.ps1 + . $(System.DefaultWorkingDirectory)\eng\common\tools.ps1 $darc = Get-Darc $buildInfo = & $darc get-build ` --id ${{ parameters.BARBuildId }} ` diff --git a/eng/common/core-templates/steps/enable-internal-sources.yml b/eng/common/core-templates/steps/enable-internal-sources.yml index 64f881bffc..4085512b69 100644 --- a/eng/common/core-templates/steps/enable-internal-sources.yml +++ b/eng/common/core-templates/steps/enable-internal-sources.yml @@ -17,8 +17,8 @@ steps: - task: PowerShell@2 displayName: Setup Internal Feeds inputs: - filePath: $(Build.SourcesDirectory)/eng/common/SetupNugetSources.ps1 - arguments: -ConfigFile $(Build.SourcesDirectory)/NuGet.config -Password $Env:Token + filePath: $(System.DefaultWorkingDirectory)/eng/common/SetupNugetSources.ps1 + arguments: -ConfigFile $(System.DefaultWorkingDirectory)/NuGet.config -Password $Env:Token env: Token: ${{ parameters.legacyCredential }} # If running on dnceng (internal project), just use the default behavior for NuGetAuthenticate. @@ -29,8 +29,8 @@ steps: - task: PowerShell@2 displayName: Setup Internal Feeds inputs: - filePath: $(Build.SourcesDirectory)/eng/common/SetupNugetSources.ps1 - arguments: -ConfigFile $(Build.SourcesDirectory)/NuGet.config + filePath: $(System.DefaultWorkingDirectory)/eng/common/SetupNugetSources.ps1 + arguments: -ConfigFile $(System.DefaultWorkingDirectory)/NuGet.config - ${{ else }}: - template: /eng/common/templates/steps/get-federated-access-token.yml parameters: @@ -39,8 +39,8 @@ steps: - task: PowerShell@2 displayName: Setup Internal Feeds inputs: - filePath: $(Build.SourcesDirectory)/eng/common/SetupNugetSources.ps1 - arguments: -ConfigFile $(Build.SourcesDirectory)/NuGet.config -Password $(dnceng-artifacts-feeds-read-access-token) + filePath: $(System.DefaultWorkingDirectory)/eng/common/SetupNugetSources.ps1 + arguments: -ConfigFile $(System.DefaultWorkingDirectory)/NuGet.config -Password $(dnceng-artifacts-feeds-read-access-token) # This is required in certain scenarios to install the ADO credential provider. # It installed by default in some msbuild invocations (e.g. VS msbuild), but needs to be installed for others # (e.g. dotnet msbuild). diff --git a/eng/common/core-templates/steps/generate-sbom.yml b/eng/common/core-templates/steps/generate-sbom.yml index 44a9636cdf..003f7eae0f 100644 --- a/eng/common/core-templates/steps/generate-sbom.yml +++ b/eng/common/core-templates/steps/generate-sbom.yml @@ -5,8 +5,8 @@ # IgnoreDirectories - Directories to ignore for SBOM generation. This will be passed through to the CG component detector. parameters: - PackageVersion: 10.0.0 - BuildDropPath: '$(Build.SourcesDirectory)/artifacts' + PackageVersion: 11.0.0 + BuildDropPath: '$(System.DefaultWorkingDirectory)/artifacts' PackageName: '.NET' ManifestDirPath: $(Build.ArtifactStagingDirectory)/sbom IgnoreDirectories: '' diff --git a/eng/common/core-templates/steps/install-microbuild-impl.yml b/eng/common/core-templates/steps/install-microbuild-impl.yml new file mode 100644 index 0000000000..9fdf3a1167 --- /dev/null +++ b/eng/common/core-templates/steps/install-microbuild-impl.yml @@ -0,0 +1,34 @@ +parameters: + - name: microbuildTaskInputs + type: object + default: {} + + - name: microbuildEnv + type: object + default: {} + + - name: enablePreviewMicrobuild + type: boolean + default: false + + - name: condition + type: string + + - name: continueOnError + type: boolean + +steps: +- ${{ if eq(parameters.enablePreviewMicrobuild, 'true') }}: + - task: MicroBuildSigningPluginPreview@4 + displayName: Install Preview MicroBuild plugin (Windows) + inputs: ${{ parameters.microbuildTaskInputs }} + env: ${{ parameters.microbuildEnv }} + continueOnError: ${{ parameters.continueOnError }} + condition: ${{ parameters.condition }} +- ${{ else }}: + - task: MicroBuildSigningPlugin@4 + displayName: Install MicroBuild plugin (Windows) + inputs: ${{ parameters.microbuildTaskInputs }} + env: ${{ parameters.microbuildEnv }} + continueOnError: ${{ parameters.continueOnError }} + condition: ${{ parameters.condition }} \ No newline at end of file diff --git a/eng/common/core-templates/steps/install-microbuild.yml b/eng/common/core-templates/steps/install-microbuild.yml index 2bcf974ee1..3d42d9a566 100644 --- a/eng/common/core-templates/steps/install-microbuild.yml +++ b/eng/common/core-templates/steps/install-microbuild.yml @@ -4,82 +4,95 @@ parameters: # Enable install tasks for MicroBuild on Mac and Linux # Will be ignored if 'enableMicrobuild' is false or 'Agent.Os' is 'Windows_NT' enableMicrobuildForMacAndLinux: false - # Location of the MicroBuild output folder - microBuildOutputFolder: '$(Build.SourcesDirectory)' + # Enable preview version of MB signing plugin + enablePreviewMicrobuild: false + # Determines whether the ESRP service connection information should be passed to the signing plugin. + # This overlaps with _SignType to some degree. We only need the service connection for real signing. + # It's important that the service connection not be passed to the MicroBuildSigningPlugin task in this place. + # Doing so will cause the service connection to be authorized for the pipeline, which isn't allowed and won't work for non-prod. + # Unfortunately, _SignType can't be used to exclude the use of the service connection in non-real sign scenarios. The + # variable is not available in template expression. _SignType has a very large proliferation across .NET, so replacing it is tough. + microbuildUseESRP: true + # Microbuild version + microbuildPluginVersion: 'latest' + continueOnError: false steps: - ${{ if eq(parameters.enableMicrobuild, 'true') }}: - ${{ if eq(parameters.enableMicrobuildForMacAndLinux, 'true') }}: - # Needed to download the MicroBuild plugin nupkgs on Mac and Linux when nuget.exe is unavailable + # Installing .NET 8 is required to use the MicroBuild signing plugin on non-Windows platforms - task: UseDotNet@2 displayName: Install .NET 8.0 SDK for MicroBuild Plugin inputs: packageType: sdk version: 8.0.x - installationPath: ${{ parameters.microBuildOutputFolder }}/.dotnet - workingDirectory: ${{ parameters.microBuildOutputFolder }} + # Installing the SDK in a '.dotnet-microbuild' directory is required for signing. + # See target FindDotNetPathForMicroBuild in arcade/src/Microsoft.DotNet.Arcade.Sdk/tools/Sign.proj + # Do not remove '.dotnet-microbuild' from the path without changing the corresponding logic. + installationPath: $(Agent.TempDirectory)/.dotnet-microbuild condition: and(succeeded(), ne(variables['Agent.Os'], 'Windows_NT')) - - task: MicroBuildSigningPlugin@4 - displayName: Install MicroBuild plugin - inputs: - signType: $(_SignType) - zipSources: false - feedSource: https://dnceng.pkgs.visualstudio.com/_packaging/MicroBuildToolset/nuget/v3/index.json - ${{ if and(eq(parameters.enableMicrobuildForMacAndLinux, 'true'), ne(variables['Agent.Os'], 'Windows_NT')) }}: - azureSubscription: 'MicroBuild Signing Task (DevDiv)' - useEsrpCli: true - env: - TeamName: $(_TeamName) - MicroBuildOutputFolderOverride: ${{ parameters.microBuildOutputFolder }} - SYSTEM_ACCESSTOKEN: $(System.AccessToken) - continueOnError: ${{ parameters.continueOnError }} - condition: and( - succeeded(), - or( - and( - eq(variables['Agent.Os'], 'Windows_NT'), - in(variables['_SignType'], 'real', 'test') - ), - and( - ${{ eq(parameters.enableMicrobuildForMacAndLinux, true) }}, - ne(variables['Agent.Os'], 'Windows_NT'), - eq(variables['_SignType'], 'real') - ) - )) - - # Workaround for ESRP CLI on Linux - https://github.com/dotnet/source-build/issues/4964 - - ${{ if eq(parameters.enableMicrobuildForMacAndLinux, 'true') }}: - - task: UseDotNet@2 - displayName: Install .NET 9.0 SDK for ESRP CLI Workaround - inputs: - packageType: sdk - version: 9.0.x - installationPath: ${{ parameters.microBuildOutputFolder }}/.dotnet - workingDirectory: ${{ parameters.microBuildOutputFolder }} - condition: and(succeeded(), eq(variables['Agent.OS'], 'Linux')) - - - task: PowerShell@2 - displayName: Workaround for ESRP CLI on Linux - inputs: - targetType: 'inline' - script: | - Write-Host "Copying Linux Path" - $MBSIGN_APPFOLDER = '$(MBSIGN_APPFOLDER)' - $MBSIGN_APPFOLDER = ($MBSIGN_APPFOLDER -replace '/build', '') - - $versionRegex = '\d+\.\d+\.\d+' - $package = Get-ChildItem -Path $MBSIGN_APPFOLDER -Directory | - Where-Object { $_.Name -match $versionRegex } + - script: | + REM Check if ESRP is disabled while SignType is real + if /I "${{ parameters.microbuildUseESRP }}"=="false" if /I "$(_SignType)"=="real" ( + echo Error: ESRP must be enabled when SignType is real. + exit /b 1 + ) + displayName: 'Validate ESRP usage (Windows)' + condition: and(succeeded(), eq(variables['Agent.Os'], 'Windows_NT')) + - script: | + # Check if ESRP is disabled while SignType is real + if [ "${{ parameters.microbuildUseESRP }}" = "false" ] && [ "$(_SignType)" = "real" ]; then + echo "Error: ESRP must be enabled when SignType is real." + exit 1 + fi + displayName: 'Validate ESRP usage (Non-Windows)' + condition: and(succeeded(), ne(variables['Agent.Os'], 'Windows_NT')) - if ($package.Count -ne 1) { - Write-Host "There should be exactly one matching subfolder, but found $($package.Count)." - exit 1 - } + # Two different MB install steps. This is due to not being able to use the agent OS during + # YAML expansion, and Windows vs. Linux/Mac uses different service connections. However, + # we can avoid including the MB install step if not enabled at all. This avoids a bunch of + # extra pipeline authorizations, since most pipelines do not sign on non-Windows. + - template: /eng/common/core-templates/steps/install-microbuild-impl.yml@self + parameters: + enablePreviewMicrobuild: ${{ parameters.enablePreviewMicrobuild }} + microbuildTaskInputs: + signType: $(_SignType) + zipSources: false + feedSource: https://dnceng.pkgs.visualstudio.com/_packaging/MicroBuildToolset/nuget/v3/index.json + version: ${{ parameters.microbuildPluginVersion }} + ${{ if eq(parameters.microbuildUseESRP, true) }}: + ConnectedServiceName: 'MicroBuild Signing Task (DevDiv)' + ${{ if eq(variables['System.TeamProject'], 'DevDiv') }}: + ConnectedPMEServiceName: 6cc74545-d7b9-4050-9dfa-ebefcc8961ea + ${{ else }}: + ConnectedPMEServiceName: 248d384a-b39b-46e3-8ad5-c2c210d5e7ca + microbuildEnv: + TeamName: $(_TeamName) + MicroBuildOutputFolderOverride: $(Agent.TempDirectory)/MicroBuild + SYSTEM_ACCESSTOKEN: $(System.AccessToken) + continueOnError: ${{ parameters.continueOnError }} + condition: and(succeeded(), eq(variables['Agent.Os'], 'Windows_NT'), in(variables['_SignType'], 'real', 'test')) - $MBSIGN_APPFOLDER = $package[0].FullName + '/build' - $MBSIGN_APPFOLDER | Write-Host - $SignConfigPath = $MBSIGN_APPFOLDER + '/signconfig.xml' - Copy-Item -Path "$(MBSIGN_APPFOLDER)/signconfig.xml" -Destination $SignConfigPath -Force - condition: and(succeeded(), eq(variables['Agent.OS'], 'Linux')) + - ${{ if eq(parameters.enableMicrobuildForMacAndLinux, true) }}: + - template: /eng/common/core-templates/steps/install-microbuild-impl.yml@self + parameters: + enablePreviewMicrobuild: ${{ parameters.enablePreviewMicrobuild }} + microbuildTaskInputs: + signType: $(_SignType) + zipSources: false + feedSource: https://dnceng.pkgs.visualstudio.com/_packaging/MicroBuildToolset/nuget/v3/index.json + version: ${{ parameters.microbuildPluginVersion }} + ${{ if eq(parameters.microbuildUseESRP, true) }}: + ConnectedServiceName: 'MicroBuild Signing Task (DevDiv)' + ${{ if eq(variables['System.TeamProject'], 'DevDiv') }}: + ConnectedPMEServiceName: beb8cb23-b303-4c95-ab26-9e44bc958d39 + ${{ else }}: + ConnectedPMEServiceName: c24de2a5-cc7a-493d-95e4-8e5ff5cad2bc + microbuildEnv: + TeamName: $(_TeamName) + MicroBuildOutputFolderOverride: $(Agent.TempDirectory)/MicroBuild + SYSTEM_ACCESSTOKEN: $(System.AccessToken) + continueOnError: ${{ parameters.continueOnError }} + condition: and(succeeded(), ne(variables['Agent.Os'], 'Windows_NT'), eq(variables['_SignType'], 'real')) diff --git a/eng/common/core-templates/steps/publish-logs.yml b/eng/common/core-templates/steps/publish-logs.yml index de24d0087c..10f825e270 100644 --- a/eng/common/core-templates/steps/publish-logs.yml +++ b/eng/common/core-templates/steps/publish-logs.yml @@ -12,22 +12,22 @@ steps: inputs: targetType: inline script: | - New-Item -ItemType Directory $(Build.SourcesDirectory)/PostBuildLogs/${{parameters.StageLabel}}/${{parameters.JobLabel}}/ - Move-Item -Path $(Build.SourcesDirectory)/artifacts/log/Debug/* $(Build.SourcesDirectory)/PostBuildLogs/${{parameters.StageLabel}}/${{parameters.JobLabel}}/ + New-Item -ItemType Directory $(System.DefaultWorkingDirectory)/PostBuildLogs/${{parameters.StageLabel}}/${{parameters.JobLabel}}/ + Move-Item -Path $(System.DefaultWorkingDirectory)/artifacts/log/Debug/* $(System.DefaultWorkingDirectory)/PostBuildLogs/${{parameters.StageLabel}}/${{parameters.JobLabel}}/ continueOnError: true condition: always() - task: PowerShell@2 displayName: Redact Logs inputs: - filePath: $(Build.SourcesDirectory)/eng/common/post-build/redact-logs.ps1 + filePath: $(System.DefaultWorkingDirectory)/eng/common/post-build/redact-logs.ps1 # For now this needs to have explicit list of all sensitive data. Taken from eng/publishing/v3/publish.yml - # Sensitive data can as well be added to $(Build.SourcesDirectory)/eng/BinlogSecretsRedactionFile.txt' + # Sensitive data can as well be added to $(System.DefaultWorkingDirectory)/eng/BinlogSecretsRedactionFile.txt' # If the file exists - sensitive data for redaction will be sourced from it # (single entry per line, lines starting with '# ' are considered comments and skipped) - arguments: -InputPath '$(Build.SourcesDirectory)/PostBuildLogs' + arguments: -InputPath '$(System.DefaultWorkingDirectory)/PostBuildLogs' -BinlogToolVersion ${{parameters.BinlogToolVersion}} - -TokensFilePath '$(Build.SourcesDirectory)/eng/BinlogSecretsRedactionFile.txt' + -TokensFilePath '$(System.DefaultWorkingDirectory)/eng/BinlogSecretsRedactionFile.txt' '$(publishing-dnceng-devdiv-code-r-build-re)' '$(MaestroAccessToken)' '$(dn-bot-all-orgs-artifact-feeds-rw)' @@ -44,7 +44,7 @@ steps: - task: CopyFiles@2 displayName: Gather post build logs inputs: - SourceFolder: '$(Build.SourcesDirectory)/PostBuildLogs' + SourceFolder: '$(System.DefaultWorkingDirectory)/PostBuildLogs' Contents: '**' TargetFolder: '$(Build.ArtifactStagingDirectory)/PostBuildLogs' condition: always() diff --git a/eng/common/core-templates/steps/source-build.yml b/eng/common/core-templates/steps/source-build.yml index 8c88ccd7b0..acf16ed349 100644 --- a/eng/common/core-templates/steps/source-build.yml +++ b/eng/common/core-templates/steps/source-build.yml @@ -19,19 +19,6 @@ steps: set -x df -h - # If file changes are detected, set CopyWipIntoInnerSourceBuildRepo to copy the WIP changes into the inner source build repo. - internalRestoreArgs= - if ! git diff --quiet; then - internalRestoreArgs='/p:CopyWipIntoInnerSourceBuildRepo=true' - # The 'Copy WIP' feature of source build uses git stash to apply changes from the original repo. - # This only works if there is a username/email configured, which won't be the case in most CI runs. - git config --get user.email - if [ $? -ne 0 ]; then - git config user.email dn-bot@microsoft.com - git config user.name dn-bot - fi - fi - # If building on the internal project, the internal storage variable may be available (usually only if needed) # In that case, add variables to allow the download of internal runtimes if the specified versions are not found # in the default public locations. @@ -46,36 +33,11 @@ steps: buildConfig='$(_BuildConfig)' fi - officialBuildArgs= - if [ '${{ and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}' = 'True' ]; then - officialBuildArgs='/p:DotNetPublishUsingPipelines=true /p:OfficialBuildId=$(BUILD.BUILDNUMBER)' - fi - targetRidArgs= if [ '${{ parameters.platform.targetRID }}' != '' ]; then targetRidArgs='/p:TargetRid=${{ parameters.platform.targetRID }}' fi - runtimeOsArgs= - if [ '${{ parameters.platform.runtimeOS }}' != '' ]; then - runtimeOsArgs='/p:RuntimeOS=${{ parameters.platform.runtimeOS }}' - fi - - baseOsArgs= - if [ '${{ parameters.platform.baseOS }}' != '' ]; then - baseOsArgs='/p:BaseOS=${{ parameters.platform.baseOS }}' - fi - - publishArgs= - if [ '${{ parameters.platform.skipPublishValidation }}' != 'true' ]; then - publishArgs='--publish' - fi - - assetManifestFileName=SourceBuild_RidSpecific.xml - if [ '${{ parameters.platform.name }}' != '' ]; then - assetManifestFileName=SourceBuild_${{ parameters.platform.name }}.xml - fi - portableBuildArgs= if [ '${{ parameters.platform.portableBuild }}' != '' ]; then portableBuildArgs='/p:PortableBuild=${{ parameters.platform.portableBuild }}' @@ -83,52 +45,21 @@ steps: ${{ coalesce(parameters.platform.buildScript, './build.sh') }} --ci \ --configuration $buildConfig \ - --restore --build --pack $publishArgs -bl \ + --restore --build --pack -bl \ + --source-build \ ${{ parameters.platform.buildArguments }} \ - $officialBuildArgs \ $internalRuntimeDownloadArgs \ - $internalRestoreArgs \ $targetRidArgs \ - $runtimeOsArgs \ - $baseOsArgs \ $portableBuildArgs \ - /p:DotNetBuildSourceOnly=true \ - /p:DotNetBuildRepo=true \ - /p:AssetManifestFileName=$assetManifestFileName displayName: Build -# Upload build logs for diagnosis. -- task: CopyFiles@2 - displayName: Prepare BuildLogs staging directory - inputs: - SourceFolder: '$(Build.SourcesDirectory)' - Contents: | - **/*.log - **/*.binlog - artifacts/sb/prebuilt-report/** - TargetFolder: '$(Build.StagingDirectory)/BuildLogs' - CleanTargetFolder: true - continueOnError: true - condition: succeededOrFailed() - - template: /eng/common/core-templates/steps/publish-pipeline-artifacts.yml parameters: is1ESPipeline: ${{ parameters.is1ESPipeline }} args: displayName: Publish BuildLogs - targetPath: '$(Build.StagingDirectory)/BuildLogs' + targetPath: artifacts/log/${{ coalesce(variables._BuildConfig, 'Release') }} artifactName: BuildLogs_SourceBuild_${{ parameters.platform.name }}_Attempt$(System.JobAttempt) continueOnError: true condition: succeededOrFailed() sbomEnabled: false # we don't need SBOM for logs - -# Manually inject component detection so that we can ignore the source build upstream cache, which contains -# a nupkg cache of input packages (a local feed). -# This path must match the upstream cache path in property 'CurrentRepoSourceBuiltNupkgCacheDir' -# in src\Microsoft.DotNet.Arcade.Sdk\tools\SourceBuild\SourceBuildArcade.targets -- template: /eng/common/core-templates/steps/component-governance.yml - parameters: - displayName: Component Detection (Exclude upstream cache) - is1ESPipeline: ${{ parameters.is1ESPipeline }} - componentGovernanceIgnoreDirectories: '$(Build.SourcesDirectory)/artifacts/sb/src/artifacts/obj/source-built-upstream-cache' - disableComponentGovernance: ${{ eq(variables['System.TeamProject'], 'public') }} diff --git a/eng/common/core-templates/steps/source-index-stage1-publish.yml b/eng/common/core-templates/steps/source-index-stage1-publish.yml index 99c2326fc1..eff4573c6e 100644 --- a/eng/common/core-templates/steps/source-index-stage1-publish.yml +++ b/eng/common/core-templates/steps/source-index-stage1-publish.yml @@ -1,15 +1,15 @@ parameters: - sourceIndexUploadPackageVersion: 2.0.0-20250425.2 - sourceIndexProcessBinlogPackageVersion: 1.0.1-20250425.2 + sourceIndexUploadPackageVersion: 2.0.0-20250906.1 + sourceIndexProcessBinlogPackageVersion: 1.0.1-20250906.1 sourceIndexPackageSource: https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json binlogPath: artifacts/log/Debug/Build.binlog steps: - task: UseDotNet@2 - displayName: "Source Index: Use .NET 8 SDK" + displayName: "Source Index: Use .NET 9 SDK" inputs: packageType: sdk - version: 8.0.x + version: 9.0.x installationPath: $(Agent.TempDirectory)/dotnet workingDirectory: $(Agent.TempDirectory) @@ -20,7 +20,7 @@ steps: # Set working directory to temp directory so 'dotnet' doesn't try to use global.json and use the repo's sdk. workingDirectory: $(Agent.TempDirectory) -- script: $(Agent.TempDirectory)/.source-index/tools/BinLogToSln -i ${{parameters.BinlogPath}} -r $(Build.SourcesDirectory) -n $(Build.Repository.Name) -o .source-index/stage1output +- script: $(Agent.TempDirectory)/.source-index/tools/BinLogToSln -i ${{parameters.BinlogPath}} -r $(System.DefaultWorkingDirectory) -n $(Build.Repository.Name) -o .source-index/stage1output displayName: "Source Index: Process Binlog into indexable sln" - ${{ if and(ne(parameters.runAsPublic, 'true'), ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}: diff --git a/eng/common/cross/build-rootfs.sh b/eng/common/cross/build-rootfs.sh index d6f005b5da..8abfb71f72 100755 --- a/eng/common/cross/build-rootfs.sh +++ b/eng/common/cross/build-rootfs.sh @@ -295,8 +295,8 @@ while :; do ;; noble) # Ubuntu 24.04 __CodeName=noble - if [[ -n "$__LLDB_Package" ]]; then - __LLDB_Package="liblldb-18-dev" + if [[ -z "$__LLDB_Package" ]]; then + __LLDB_Package="liblldb-19-dev" fi ;; stretch) # Debian 9 diff --git a/eng/common/darc-init.sh b/eng/common/darc-init.sh index 36dbd45e1c..9f5ad6b763 100755 --- a/eng/common/darc-init.sh +++ b/eng/common/darc-init.sh @@ -5,7 +5,7 @@ darcVersion='' versionEndpoint='https://maestro.dot.net/api/assets/darc-version?api-version=2020-02-20' verbosity='minimal' -while [[ $# > 0 ]]; do +while [[ $# -gt 0 ]]; do opt="$(echo "$1" | tr "[:upper:]" "[:lower:]")" case "$opt" in --darcversion) @@ -68,7 +68,7 @@ function InstallDarcCli { fi fi - local arcadeServicesSource="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json" + local arcadeServicesSource="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-eng/nuget/v3/index.json" echo "Installing Darc CLI version $darcVersion..." echo "You may need to restart your command shell if this is the first dotnet tool you have installed." diff --git a/eng/common/dotnet-install.sh b/eng/common/dotnet-install.sh index 7b9d97e3bd..61f302bb67 100755 --- a/eng/common/dotnet-install.sh +++ b/eng/common/dotnet-install.sh @@ -18,7 +18,7 @@ architecture='' runtime='dotnet' runtimeSourceFeed='' runtimeSourceFeedKey='' -while [[ $# > 0 ]]; do +while [[ $# -gt 0 ]]; do opt="$(echo "$1" | tr "[:upper:]" "[:lower:]")" case "$opt" in -version|-v) diff --git a/eng/common/dotnet.cmd b/eng/common/dotnet.cmd new file mode 100644 index 0000000000..527fa4bb38 --- /dev/null +++ b/eng/common/dotnet.cmd @@ -0,0 +1,7 @@ +@echo off + +:: This script is used to install the .NET SDK. +:: It will also invoke the SDK with any provided arguments. + +powershell -ExecutionPolicy ByPass -NoProfile -command "& """%~dp0dotnet.ps1""" %*" +exit /b %ErrorLevel% diff --git a/eng/common/dotnet.ps1 b/eng/common/dotnet.ps1 new file mode 100644 index 0000000000..45e5676c9e --- /dev/null +++ b/eng/common/dotnet.ps1 @@ -0,0 +1,11 @@ +# This script is used to install the .NET SDK. +# It will also invoke the SDK with any provided arguments. + +. $PSScriptRoot\tools.ps1 +$dotnetRoot = InitializeDotNetCli -install:$true + +# Invoke acquired SDK with args if they are provided +if ($args.count -gt 0) { + $env:DOTNET_NOLOGO=1 + & "$dotnetRoot\dotnet.exe" $args +} diff --git a/eng/common/dotnet.sh b/eng/common/dotnet.sh new file mode 100644 index 0000000000..f6d24871c1 --- /dev/null +++ b/eng/common/dotnet.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +# This script is used to install the .NET SDK. +# It will also invoke the SDK with any provided arguments. + +source="${BASH_SOURCE[0]}" +# resolve $SOURCE until the file is no longer a symlink +while [[ -h $source ]]; do + scriptroot="$( cd -P "$( dirname "$source" )" && pwd )" + source="$(readlink "$source")" + + # if $source was a relative symlink, we need to resolve it relative to the path where the + # symlink file was located + [[ $source != /* ]] && source="$scriptroot/$source" +done +scriptroot="$( cd -P "$( dirname "$source" )" && pwd )" + +source $scriptroot/tools.sh +InitializeDotNetCli true # install + +# Invoke acquired SDK with args if they are provided +if [[ $# -gt 0 ]]; then + __dotnetDir=${_InitializeDotNetCli} + dotnetPath=${__dotnetDir}/dotnet + ${dotnetPath} "$@" +fi diff --git a/eng/common/generate-locproject.ps1 b/eng/common/generate-locproject.ps1 index 524aaa57f2..fa1cdc2b30 100644 --- a/eng/common/generate-locproject.ps1 +++ b/eng/common/generate-locproject.ps1 @@ -33,15 +33,27 @@ $jsonTemplateFiles | ForEach-Object { $jsonWinformsTemplateFiles = Get-ChildItem -Recurse -Path "$SourcesDirectory" | Where-Object { $_.FullName -Match "en\\strings\.json" } # current winforms pattern +$wxlFilesV3 = @() +$wxlFilesV5 = @() $wxlFiles = Get-ChildItem -Recurse -Path "$SourcesDirectory" | Where-Object { $_.FullName -Match "\\.+\.wxl" -And -Not( $_.Directory.Name -Match "\d{4}" ) } # localized files live in four digit lang ID directories; this excludes them if (-not $wxlFiles) { $wxlEnFiles = Get-ChildItem -Recurse -Path "$SourcesDirectory" | Where-Object { $_.FullName -Match "\\1033\\.+\.wxl" } # pick up en files (1033 = en) specifically so we can copy them to use as the neutral xlf files if ($wxlEnFiles) { - $wxlFiles = @() - $wxlEnFiles | ForEach-Object { - $destinationFile = "$($_.Directory.Parent.FullName)\$($_.Name)" - $wxlFiles += Copy-Item "$($_.FullName)" -Destination $destinationFile -PassThru - } + $wxlFiles = @() + $wxlEnFiles | ForEach-Object { + $destinationFile = "$($_.Directory.Parent.FullName)\$($_.Name)" + $content = Get-Content $_.FullName -Raw + + # Split files on schema to select different parser settings in the generated project. + if ($content -like "*http://wixtoolset.org/schemas/v4/wxl*") + { + $wxlFilesV5 += Copy-Item $_.FullName -Destination $destinationFile -PassThru + } + elseif ($content -like "*http://schemas.microsoft.com/wix/2006/localization*") + { + $wxlFilesV3 += Copy-Item $_.FullName -Destination $destinationFile -PassThru + } + } } } @@ -114,7 +126,32 @@ $locJson = @{ CloneLanguageSet = "WiX_CloneLanguages" LssFiles = @( "wxl_loc.lss" ) LocItems = @( - $wxlFiles | ForEach-Object { + $wxlFilesV3 | ForEach-Object { + $outputPath = "$($_.Directory.FullName | Resolve-Path -Relative)\" + $continue = $true + foreach ($exclusion in $exclusions.Exclusions) { + if ($_.FullName.Contains($exclusion)) { + $continue = $false + } + } + $sourceFile = ($_.FullName | Resolve-Path -Relative) + if ($continue) + { + return @{ + SourceFile = $sourceFile + CopyOption = "LangIDOnPath" + OutputPath = $outputPath + } + } + } + ) + }, + @{ + LanguageSet = $LanguageSet + CloneLanguageSet = "WiX_CloneLanguages" + LssFiles = @( "P210WxlSchemaV4.lss" ) + LocItems = @( + $wxlFilesV5 | ForEach-Object { $outputPath = "$($_.Directory.FullName | Resolve-Path -Relative)\" $continue = $true foreach ($exclusion in $exclusions.Exclusions) { diff --git a/eng/common/internal-feed-operations.sh b/eng/common/internal-feed-operations.sh index 9378223ba0..6299e7effd 100755 --- a/eng/common/internal-feed-operations.sh +++ b/eng/common/internal-feed-operations.sh @@ -100,7 +100,7 @@ operation='' authToken='' repoName='' -while [[ $# > 0 ]]; do +while [[ $# -gt 0 ]]; do opt="$(echo "$1" | tr "[:upper:]" "[:lower:]")" case "$opt" in --operation) diff --git a/eng/common/internal/NuGet.config b/eng/common/internal/NuGet.config index 19d3d311b1..f70261ed68 100644 --- a/eng/common/internal/NuGet.config +++ b/eng/common/internal/NuGet.config @@ -4,4 +4,7 @@ + + + diff --git a/eng/common/native/init-os-and-arch.sh b/eng/common/native/init-os-and-arch.sh old mode 100644 new mode 100755 diff --git a/eng/common/native/install-dependencies.sh b/eng/common/native/install-dependencies.sh old mode 100644 new mode 100755 index 477a44f335..f7bd4af0c8 --- a/eng/common/native/install-dependencies.sh +++ b/eng/common/native/install-dependencies.sh @@ -30,6 +30,8 @@ case "$os" in elif [ "$ID" = "fedora" ] || [ "$ID" = "rhel" ] || [ "$ID" = "azurelinux" ]; then pkg_mgr="$(command -v tdnf 2>/dev/null || command -v dnf)" $pkg_mgr install -y cmake llvm lld lldb clang python curl libicu-devel openssl-devel krb5-devel lttng-ust-devel pigz cpio + elif [ "$ID" = "amzn" ]; then + dnf install -y cmake llvm lld lldb clang python libicu-devel openssl-devel krb5-devel lttng-ust-devel pigz cpio elif [ "$ID" = "alpine" ]; then apk add build-base cmake bash curl clang llvm-dev lld lldb krb5-dev lttng-ust-dev icu-dev openssl-dev pigz cpio else diff --git a/eng/common/post-build/nuget-verification.ps1 b/eng/common/post-build/nuget-verification.ps1 index a365194a93..ac5c69ffca 100644 --- a/eng/common/post-build/nuget-verification.ps1 +++ b/eng/common/post-build/nuget-verification.ps1 @@ -30,7 +30,7 @@ [CmdletBinding(PositionalBinding = $false)] param( [string]$NuGetExePath, - [string]$PackageSource = "https://api.nuget.org/v3/index.json", + [string]$PackageSource = "https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-public/nuget/v3/index.json", [string]$DownloadPath, [Parameter(ValueFromRemainingArguments = $true)] [string[]]$args diff --git a/eng/common/post-build/publish-using-darc.ps1 b/eng/common/post-build/publish-using-darc.ps1 index 1eda208a3b..48e55598bd 100644 --- a/eng/common/post-build/publish-using-darc.ps1 +++ b/eng/common/post-build/publish-using-darc.ps1 @@ -7,7 +7,9 @@ param( [Parameter(Mandatory=$false)][string] $ArtifactsPublishingAdditionalParameters, [Parameter(Mandatory=$false)][string] $SymbolPublishingAdditionalParameters, [Parameter(Mandatory=$false)][string] $RequireDefaultChannels, - [Parameter(Mandatory=$false)][string] $SkipAssetsPublishing + [Parameter(Mandatory=$false)][string] $SkipAssetsPublishing, + [Parameter(Mandatory=$false)][string] $runtimeSourceFeed, + [Parameter(Mandatory=$false)][string] $runtimeSourceFeedKey ) try { diff --git a/eng/common/sdk-task.ps1 b/eng/common/sdk-task.ps1 index a9d2a2d269..9ae443f1c3 100644 --- a/eng/common/sdk-task.ps1 +++ b/eng/common/sdk-task.ps1 @@ -7,13 +7,16 @@ Param( [switch] $restore, [switch] $prepareMachine, [switch][Alias('nobl')]$excludeCIBinaryLog, + [switch]$noWarnAsError, [switch] $help, + [string] $runtimeSourceFeed = '', + [string] $runtimeSourceFeedKey = '', [Parameter(ValueFromRemainingArguments=$true)][String[]]$properties ) $ci = $true $binaryLog = if ($excludeCIBinaryLog) { $false } else { $true } -$warnAsError = $true +$warnAsError = if ($noWarnAsError) { $false } else { $true } . $PSScriptRoot\tools.ps1 @@ -67,7 +70,7 @@ try { $GlobalJson.tools | Add-Member -Name "vs" -Value (ConvertFrom-Json "{ `"version`": `"16.5`" }") -MemberType NoteProperty } if( -not ($GlobalJson.tools.PSObject.Properties.Name -match "xcopy-msbuild" )) { - $GlobalJson.tools | Add-Member -Name "xcopy-msbuild" -Value "17.13.0" -MemberType NoteProperty + $GlobalJson.tools | Add-Member -Name "xcopy-msbuild" -Value "17.14.16" -MemberType NoteProperty } if ($GlobalJson.tools."xcopy-msbuild".Trim() -ine "none") { $xcopyMSBuildToolsFolder = InitializeXCopyMSBuild $GlobalJson.tools."xcopy-msbuild" -install $true diff --git a/eng/common/sdk-task.sh b/eng/common/sdk-task.sh index 2f83adc026..3270f83fa9 100644 --- a/eng/common/sdk-task.sh +++ b/eng/common/sdk-task.sh @@ -10,6 +10,7 @@ show_usage() { echo "Advanced settings:" echo " --excludeCIBinarylog Don't output binary log (short: -nobl)" + echo " --noWarnAsError Do not warn as error" echo "" echo "Command line arguments not listed above are passed thru to msbuild." } @@ -52,6 +53,7 @@ exclude_ci_binary_log=false restore=false help=false properties='' +warnAsError=true while (($# > 0)); do lowerI="$(echo $1 | tr "[:upper:]" "[:lower:]")" @@ -73,6 +75,10 @@ while (($# > 0)); do exclude_ci_binary_log=true shift 1 ;; + --noWarnAsError) + warnAsError=false + shift 1 + ;; --help) help=true shift 1 @@ -85,7 +91,6 @@ while (($# > 0)); do done ci=true -warnAsError=true if $help; then show_usage diff --git a/eng/common/template-guidance.md b/eng/common/template-guidance.md index 98bbc1ded0..4bf4cf41bd 100644 --- a/eng/common/template-guidance.md +++ b/eng/common/template-guidance.md @@ -50,7 +50,7 @@ extends: - task: CopyFiles@2 displayName: Gather build output inputs: - SourceFolder: '$(Build.SourcesDirectory)/artifacts/marvel' + SourceFolder: '$(System.DefaultWorkingDirectory)/artifacts/marvel' Contents: '**' TargetFolder: '$(Build.ArtifactStagingDirectory)/artifacts/marvel' ``` diff --git a/eng/common/templates-official/job/job.yml b/eng/common/templates-official/job/job.yml index a8a9432874..92a0664f56 100644 --- a/eng/common/templates-official/job/job.yml +++ b/eng/common/templates-official/job/job.yml @@ -3,7 +3,7 @@ parameters: enableSbom: true runAsPublic: false PackageVersion: 9.0.0 - BuildDropPath: '$(Build.SourcesDirectory)/artifacts' + BuildDropPath: '$(System.DefaultWorkingDirectory)/artifacts' jobs: - template: /eng/common/core-templates/job/job.yml diff --git a/eng/common/templates-official/variables/sdl-variables.yml b/eng/common/templates-official/variables/sdl-variables.yml index dbdd66d4a4..f1311bbb1b 100644 --- a/eng/common/templates-official/variables/sdl-variables.yml +++ b/eng/common/templates-official/variables/sdl-variables.yml @@ -4,4 +4,4 @@ variables: - name: DefaultGuardianVersion value: 0.109.0 - name: GuardianPackagesConfigFile - value: $(Build.SourcesDirectory)\eng\common\sdl\packages.config \ No newline at end of file + value: $(System.DefaultWorkingDirectory)\eng\common\sdl\packages.config \ No newline at end of file diff --git a/eng/common/templates/job/job.yml b/eng/common/templates/job/job.yml index 7cbf668c22..238fa0818f 100644 --- a/eng/common/templates/job/job.yml +++ b/eng/common/templates/job/job.yml @@ -6,7 +6,7 @@ parameters: enableSbom: true runAsPublic: false PackageVersion: 9.0.0 - BuildDropPath: '$(Build.SourcesDirectory)/artifacts' + BuildDropPath: '$(System.DefaultWorkingDirectory)/artifacts' jobs: - template: /eng/common/core-templates/job/job.yml @@ -77,7 +77,7 @@ jobs: parameters: is1ESPipeline: false args: - targetPath: '$(Build.SourcesDirectory)\eng\common\BuildConfiguration' + targetPath: '$(System.DefaultWorkingDirectory)\eng\common\BuildConfiguration' artifactName: 'BuildConfiguration' displayName: 'Publish build retry configuration' continueOnError: true diff --git a/eng/common/templates/steps/vmr-sync.yml b/eng/common/templates/steps/vmr-sync.yml new file mode 100644 index 0000000000..599afb6186 --- /dev/null +++ b/eng/common/templates/steps/vmr-sync.yml @@ -0,0 +1,207 @@ +### These steps synchronize new code from product repositories into the VMR (https://github.com/dotnet/dotnet). +### They initialize the darc CLI and pull the new updates. +### Changes are applied locally onto the already cloned VMR (located in $vmrPath). + +parameters: +- name: targetRef + displayName: Target revision in dotnet/ to synchronize + type: string + default: $(Build.SourceVersion) + +- name: vmrPath + displayName: Path where the dotnet/dotnet is checked out to + type: string + default: $(Agent.BuildDirectory)/vmr + +- name: additionalSyncs + displayName: Optional list of package names whose repo's source will also be synchronized in the local VMR, e.g. NuGet.Protocol + type: object + default: [] + +steps: +- checkout: vmr + displayName: Clone dotnet/dotnet + path: vmr + clean: true + +- checkout: self + displayName: Clone $(Build.Repository.Name) + path: repo + fetchDepth: 0 + +# This step is needed so that when we get a detached HEAD / shallow clone, +# we still pull the commit into the temporary repo clone to use it during the sync. +# Also unshallow the clone so that forwardflow command would work. +- script: | + git branch repo-head + git rev-parse HEAD + displayName: Label PR commit + workingDirectory: $(Agent.BuildDirectory)/repo + +- script: | + vmr_sha=$(grep -oP '(?<=Sha=")[^"]*' $(Agent.BuildDirectory)/repo/eng/Version.Details.xml) + echo "##vso[task.setvariable variable=vmr_sha]$vmr_sha" + displayName: Obtain the vmr sha from Version.Details.xml (Unix) + condition: ne(variables['Agent.OS'], 'Windows_NT') + workingDirectory: $(Agent.BuildDirectory)/repo + +- powershell: | + [xml]$xml = Get-Content -Path $(Agent.BuildDirectory)/repo/eng/Version.Details.xml + $vmr_sha = $xml.SelectSingleNode("//Source").Sha + Write-Output "##vso[task.setvariable variable=vmr_sha]$vmr_sha" + displayName: Obtain the vmr sha from Version.Details.xml (Windows) + condition: eq(variables['Agent.OS'], 'Windows_NT') + workingDirectory: $(Agent.BuildDirectory)/repo + +- script: | + git fetch --all + git checkout $(vmr_sha) + displayName: Checkout VMR at correct sha for repo flow + workingDirectory: ${{ parameters.vmrPath }} + +- script: | + git config --global user.name "dotnet-maestro[bot]" + git config --global user.email "dotnet-maestro[bot]@users.noreply.github.com" + displayName: Set git author to dotnet-maestro[bot] + workingDirectory: ${{ parameters.vmrPath }} + +- script: | + ./eng/common/vmr-sync.sh \ + --vmr ${{ parameters.vmrPath }} \ + --tmp $(Agent.TempDirectory) \ + --azdev-pat '$(dn-bot-all-orgs-code-r)' \ + --ci \ + --debug + + if [ "$?" -ne 0 ]; then + echo "##vso[task.logissue type=error]Failed to synchronize the VMR" + exit 1 + fi + displayName: Sync repo into VMR (Unix) + condition: ne(variables['Agent.OS'], 'Windows_NT') + workingDirectory: $(Agent.BuildDirectory)/repo + +- script: | + git config --global diff.astextplain.textconv echo + git config --system core.longpaths true + displayName: Configure Windows git (longpaths, astextplain) + condition: eq(variables['Agent.OS'], 'Windows_NT') + +- powershell: | + ./eng/common/vmr-sync.ps1 ` + -vmr ${{ parameters.vmrPath }} ` + -tmp $(Agent.TempDirectory) ` + -azdevPat '$(dn-bot-all-orgs-code-r)' ` + -ci ` + -debugOutput + + if ($LASTEXITCODE -ne 0) { + echo "##vso[task.logissue type=error]Failed to synchronize the VMR" + exit 1 + } + displayName: Sync repo into VMR (Windows) + condition: eq(variables['Agent.OS'], 'Windows_NT') + workingDirectory: $(Agent.BuildDirectory)/repo + +- ${{ if eq(variables['Build.Reason'], 'PullRequest') }}: + - task: CopyFiles@2 + displayName: Collect failed patches + condition: failed() + inputs: + SourceFolder: '$(Agent.TempDirectory)' + Contents: '*.patch' + TargetFolder: '$(Build.ArtifactStagingDirectory)/FailedPatches' + + - publish: '$(Build.ArtifactStagingDirectory)/FailedPatches' + artifact: $(System.JobDisplayName)_FailedPatches + displayName: Upload failed patches + condition: failed() + +- ${{ each assetName in parameters.additionalSyncs }}: + # The vmr-sync script ends up staging files in the local VMR so we have to commit those + - script: + git commit --allow-empty -am "Forward-flow $(Build.Repository.Name)" + displayName: Commit local VMR changes + workingDirectory: ${{ parameters.vmrPath }} + + - script: | + set -ex + + echo "Searching for details of asset ${{ assetName }}..." + + # Use darc to get dependencies information + dependencies=$(./.dotnet/dotnet darc get-dependencies --name '${{ assetName }}' --ci) + + # Extract repository URL and commit hash + repository=$(echo "$dependencies" | grep 'Repo:' | sed 's/Repo:[[:space:]]*//' | head -1) + + if [ -z "$repository" ]; then + echo "##vso[task.logissue type=error]Asset ${{ assetName }} not found in the dependency list" + exit 1 + fi + + commit=$(echo "$dependencies" | grep 'Commit:' | sed 's/Commit:[[:space:]]*//' | head -1) + + echo "Updating the VMR from $repository / $commit..." + cd .. + git clone $repository ${{ assetName }} + cd ${{ assetName }} + git checkout $commit + git branch "sync/$commit" + + ./eng/common/vmr-sync.sh \ + --vmr ${{ parameters.vmrPath }} \ + --tmp $(Agent.TempDirectory) \ + --azdev-pat '$(dn-bot-all-orgs-code-r)' \ + --ci \ + --debug + + if [ "$?" -ne 0 ]; then + echo "##vso[task.logissue type=error]Failed to synchronize the VMR" + exit 1 + fi + displayName: Sync ${{ assetName }} into (Unix) + condition: ne(variables['Agent.OS'], 'Windows_NT') + workingDirectory: $(Agent.BuildDirectory)/repo + + - powershell: | + $ErrorActionPreference = 'Stop' + + Write-Host "Searching for details of asset ${{ assetName }}..." + + $dependencies = .\.dotnet\dotnet darc get-dependencies --name '${{ assetName }}' --ci + + $repository = $dependencies | Select-String -Pattern 'Repo:\s+([^\s]+)' | Select-Object -First 1 + $repository -match 'Repo:\s+([^\s]+)' | Out-Null + $repository = $matches[1] + + if ($repository -eq $null) { + Write-Error "Asset ${{ assetName }} not found in the dependency list" + exit 1 + } + + $commit = $dependencies | Select-String -Pattern 'Commit:\s+([^\s]+)' | Select-Object -First 1 + $commit -match 'Commit:\s+([^\s]+)' | Out-Null + $commit = $matches[1] + + Write-Host "Updating the VMR from $repository / $commit..." + cd .. + git clone $repository ${{ assetName }} + cd ${{ assetName }} + git checkout $commit + git branch "sync/$commit" + + .\eng\common\vmr-sync.ps1 ` + -vmr ${{ parameters.vmrPath }} ` + -tmp $(Agent.TempDirectory) ` + -azdevPat '$(dn-bot-all-orgs-code-r)' ` + -ci ` + -debugOutput + + if ($LASTEXITCODE -ne 0) { + echo "##vso[task.logissue type=error]Failed to synchronize the VMR" + exit 1 + } + displayName: Sync ${{ assetName }} into (Windows) + condition: ne(variables['Agent.OS'], 'Windows_NT') + workingDirectory: $(Agent.BuildDirectory)/repo diff --git a/eng/common/templates/vmr-build-pr.yml b/eng/common/templates/vmr-build-pr.yml new file mode 100644 index 0000000000..ce3c29a62f --- /dev/null +++ b/eng/common/templates/vmr-build-pr.yml @@ -0,0 +1,42 @@ +# This pipeline is used for running the VMR verification of the PR changes in repo-level PRs. +# +# It will run a full set of verification jobs defined in: +# https://github.com/dotnet/dotnet/blob/10060d128e3f470e77265f8490f5e4f72dae738e/eng/pipelines/templates/stages/vmr-build.yml#L27-L38 +# +# For repos that do not need to run the full set, you would do the following: +# +# 1. Copy this YML file to a repo-specific location, i.e. outside of eng/common. +# +# 2. Add `verifications` parameter to VMR template reference +# +# Examples: +# - For source-build stage 1 verification, add the following: +# verifications: [ "source-build-stage1" ] +# +# - For Windows only verifications, add the following: +# verifications: [ "unified-build-windows-x64", "unified-build-windows-x86" ] + +trigger: none +pr: none + +variables: +- template: /eng/common/templates/variables/pool-providers.yml@self + +- name: skipComponentGovernanceDetection # we run CG on internal builds only + value: true + +- name: Codeql.Enabled # we run CodeQL on internal builds only + value: false + +resources: + repositories: + - repository: vmr + type: github + name: dotnet/dotnet + endpoint: dotnet + +stages: +- template: /eng/pipelines/templates/stages/vmr-build.yml@vmr + parameters: + isBuiltFromVmr: false + scope: lite diff --git a/eng/common/tools.ps1 b/eng/common/tools.ps1 index 7373e53054..4bc50bd568 100644 --- a/eng/common/tools.ps1 +++ b/eng/common/tools.ps1 @@ -65,10 +65,8 @@ $ErrorActionPreference = 'Stop' # Base-64 encoded SAS token that has permission to storage container described by $runtimeSourceFeed [string]$runtimeSourceFeedKey = if (Test-Path variable:runtimeSourceFeedKey) { $runtimeSourceFeedKey } else { $null } -# True if the build is a product build -[bool]$productBuild = if (Test-Path variable:productBuild) { $productBuild } else { $false } - -[String[]]$properties = if (Test-Path variable:properties) { $properties } else { @() } +# True when the build is running within the VMR. +[bool]$fromVMR = if (Test-Path variable:fromVMR) { $fromVMR } else { $false } function Create-Directory ([string[]] $path) { New-Item -Path $path -Force -ItemType 'Directory' | Out-Null @@ -259,7 +257,20 @@ function Retry($downloadBlock, $maxRetries = 5) { function GetDotNetInstallScript([string] $dotnetRoot) { $installScript = Join-Path $dotnetRoot 'dotnet-install.ps1' + $shouldDownload = $false + if (!(Test-Path $installScript)) { + $shouldDownload = $true + } else { + # Check if the script is older than 30 days + $fileAge = (Get-Date) - (Get-Item $installScript).LastWriteTime + if ($fileAge.Days -gt 30) { + Write-Host "Existing install script is too old, re-downloading..." + $shouldDownload = $true + } + } + + if ($shouldDownload) { Create-Directory $dotnetRoot $ProgressPreference = 'SilentlyContinue' # Don't display the console progress UI - it's a huge perf hit $uri = "https://builds.dotnet.microsoft.com/dotnet/scripts/$dotnetInstallScriptVersion/dotnet-install.ps1" @@ -383,8 +394,8 @@ function InitializeVisualStudioMSBuild([bool]$install, [object]$vsRequirements = # If the version of msbuild is going to be xcopied, # use this version. Version matches a package here: - # https://dev.azure.com/dnceng/public/_artifacts/feed/dotnet-eng/NuGet/Microsoft.DotNet.Arcade.MSBuild.Xcopy/versions/17.13.0 - $defaultXCopyMSBuildVersion = '17.13.0' + # https://dev.azure.com/dnceng/public/_artifacts/feed/dotnet-eng/NuGet/Microsoft.DotNet.Arcade.MSBuild.Xcopy/versions/17.14.16 + $defaultXCopyMSBuildVersion = '17.14.16' if (!$vsRequirements) { if (Get-Member -InputObject $GlobalJson.tools -Name 'vs') { @@ -416,7 +427,7 @@ function InitializeVisualStudioMSBuild([bool]$install, [object]$vsRequirements = # Locate Visual Studio installation or download x-copy msbuild. $vsInfo = LocateVisualStudio $vsRequirements - if ($vsInfo -ne $null) { + if ($vsInfo -ne $null -and $env:ForceUseXCopyMSBuild -eq $null) { # Ensure vsInstallDir has a trailing slash $vsInstallDir = Join-Path $vsInfo.installationPath "\" $vsMajorVersion = $vsInfo.installationVersion.Split('.')[0] @@ -533,7 +544,8 @@ function LocateVisualStudio([object]$vsRequirements = $null){ if (Get-Member -InputObject $GlobalJson.tools -Name 'vswhere') { $vswhereVersion = $GlobalJson.tools.vswhere } else { - $vswhereVersion = '2.5.2' + # keep this in sync with the VSWhereVersion in DefaultVersions.props + $vswhereVersion = '3.1.7' } $vsWhereDir = Join-Path $ToolsDir "vswhere\$vswhereVersion" @@ -541,7 +553,8 @@ function LocateVisualStudio([object]$vsRequirements = $null){ if (!(Test-Path $vsWhereExe)) { Create-Directory $vsWhereDir - Write-Host 'Downloading vswhere' + Write-Host "Downloading vswhere $vswhereVersion" + $ProgressPreference = 'SilentlyContinue' # Don't display the console progress UI - it's a huge perf hit Retry({ Invoke-WebRequest "https://netcorenativeassets.blob.core.windows.net/resource-packages/external/windows/vswhere/$vswhereVersion/vswhere.exe" -OutFile $vswhereExe }) @@ -646,7 +659,6 @@ function GetNuGetPackageCachePath() { $env:NUGET_PACKAGES = Join-Path $env:UserProfile '.nuget\packages\' } else { $env:NUGET_PACKAGES = Join-Path $RepoRoot '.packages\' - $env:RESTORENOHTTPCACHE = $true } } @@ -768,28 +780,13 @@ function MSBuild() { $toolsetBuildProject = InitializeToolset $basePath = Split-Path -parent $toolsetBuildProject - $possiblePaths = @( - # new scripts need to work with old packages, so we need to look for the old names/versions - (Join-Path $basePath (Join-Path $buildTool.Framework 'Microsoft.DotNet.ArcadeLogging.dll')), - (Join-Path $basePath (Join-Path $buildTool.Framework 'Microsoft.DotNet.Arcade.Sdk.dll')), - - # This list doesn't need to be updated anymore and can eventually be removed. - (Join-Path $basePath (Join-Path net9.0 'Microsoft.DotNet.ArcadeLogging.dll')), - (Join-Path $basePath (Join-Path net9.0 'Microsoft.DotNet.Arcade.Sdk.dll')), - (Join-Path $basePath (Join-Path net8.0 'Microsoft.DotNet.ArcadeLogging.dll')), - (Join-Path $basePath (Join-Path net8.0 'Microsoft.DotNet.Arcade.Sdk.dll')) - ) - $selectedPath = $null - foreach ($path in $possiblePaths) { - if (Test-Path $path -PathType Leaf) { - $selectedPath = $path - break - } - } + $selectedPath = Join-Path $basePath (Join-Path $buildTool.Framework 'Microsoft.DotNet.ArcadeLogging.dll') + if (-not $selectedPath) { - Write-PipelineTelemetryError -Category 'Build' -Message 'Unable to find arcade sdk logger assembly.' + Write-PipelineTelemetryError -Category 'Build' -Message "Unable to find arcade sdk logger assembly: $selectedPath" ExitWithExitCode 1 } + $args += "/logger:$selectedPath" } @@ -852,8 +849,8 @@ function MSBuild-Core() { } # When running on Azure Pipelines, override the returned exit code to avoid double logging. - # Skip this when the build is a child of the VMR orchestrator build. - if ($ci -and $env:SYSTEM_TEAMPROJECT -ne $null -and !$productBuild -and -not($properties -like "*DotNetBuildRepo=true*")) { + # Skip this when the build is a child of the VMR build. + if ($ci -and $env:SYSTEM_TEAMPROJECT -ne $null -and !$fromVMR) { Write-PipelineSetResult -Result "Failed" -Message "msbuild execution failed." # Exiting with an exit code causes the azure pipelines task to log yet another "noise" error # The above Write-PipelineSetResult will cause the task to be marked as failure without adding yet another error diff --git a/eng/common/tools.sh b/eng/common/tools.sh index d51f300c77..c1841c9dfd 100755 --- a/eng/common/tools.sh +++ b/eng/common/tools.sh @@ -5,6 +5,9 @@ # CI mode - set to true on CI server for PR validation build or official build. ci=${ci:-false} +# Build mode +source_build=${source_build:-false} + # Set to true to use the pipelines logger which will enable Azure logging output. # https://github.com/Microsoft/azure-pipelines-tasks/blob/master/docs/authoring/commands.md # This flag is meant as a temporary opt-opt for the feature while validate it across @@ -58,7 +61,8 @@ use_installed_dotnet_cli=${use_installed_dotnet_cli:-true} dotnetInstallScriptVersion=${dotnetInstallScriptVersion:-'v1'} # True to use global NuGet cache instead of restoring packages to repository-local directory. -if [[ "$ci" == true ]]; then +# Keep in sync with NuGetPackageroot in Arcade SDK's RepositoryLayout.props. +if [[ "$ci" == true || "$source_build" == true ]]; then use_global_nuget_cache=${use_global_nuget_cache:-false} else use_global_nuget_cache=${use_global_nuget_cache:-true} @@ -68,8 +72,8 @@ fi runtime_source_feed=${runtime_source_feed:-''} runtime_source_feed_key=${runtime_source_feed_key:-''} -# True if the build is a product build -product_build=${product_build:-false} +# True when the build is running within the VMR. +from_vmr=${from_vmr:-false} # Resolve any symlinks in the given path. function ResolvePath { @@ -296,8 +300,29 @@ function GetDotNetInstallScript { local root=$1 local install_script="$root/dotnet-install.sh" local install_script_url="https://builds.dotnet.microsoft.com/dotnet/scripts/$dotnetInstallScriptVersion/dotnet-install.sh" + local timestamp_file="$root/.dotnet-install.timestamp" + local should_download=false if [[ ! -a "$install_script" ]]; then + should_download=true + elif [[ -f "$timestamp_file" ]]; then + # Check if the script is older than 30 days using timestamp file + local download_time=$(cat "$timestamp_file" 2>/dev/null || echo "0") + local current_time=$(date +%s) + local age_seconds=$((current_time - download_time)) + + # 30 days = 30 * 24 * 60 * 60 = 2592000 seconds + if [[ $age_seconds -gt 2592000 ]]; then + echo "Existing install script is too old, re-downloading..." + should_download=true + fi + else + # No timestamp file exists, assume script is old and re-download + echo "No timestamp found for existing install script, re-downloading..." + should_download=true + fi + + if [[ "$should_download" == true ]]; then mkdir -p "$root" echo "Downloading '$install_script_url'" @@ -324,6 +349,9 @@ function GetDotNetInstallScript { ExitWithExitCode $exit_code } fi + + # Create timestamp file to track download time in seconds from epoch + date +%s > "$timestamp_file" fi # return value _GetDotNetInstallScript="$install_script" @@ -341,14 +369,12 @@ function InitializeBuildTool { _InitializeBuildToolCommand="msbuild" } -# Set RestoreNoHttpCache as a workaround for https://github.com/NuGet/Home/issues/3116 function GetNuGetPackageCachePath { if [[ -z ${NUGET_PACKAGES:-} ]]; then if [[ "$use_global_nuget_cache" == true ]]; then export NUGET_PACKAGES="$HOME/.nuget/packages/" else export NUGET_PACKAGES="$repo_root/.packages/" - export RESTORENOHTTPCACHE=true fi fi @@ -445,27 +471,13 @@ function MSBuild { fi local toolset_dir="${_InitializeToolset%/*}" - # new scripts need to work with old packages, so we need to look for the old names/versions - local selectedPath= - local possiblePaths=() - possiblePaths+=( "$toolset_dir/net/Microsoft.DotNet.ArcadeLogging.dll" ) - possiblePaths+=( "$toolset_dir/net/Microsoft.DotNet.Arcade.Sdk.dll" ) - - # This list doesn't need to be updated anymore and can eventually be removed. - possiblePaths+=( "$toolset_dir/net9.0/Microsoft.DotNet.ArcadeLogging.dll" ) - possiblePaths+=( "$toolset_dir/net9.0/Microsoft.DotNet.Arcade.Sdk.dll" ) - possiblePaths+=( "$toolset_dir/net8.0/Microsoft.DotNet.ArcadeLogging.dll" ) - possiblePaths+=( "$toolset_dir/net8.0/Microsoft.DotNet.Arcade.Sdk.dll" ) - for path in "${possiblePaths[@]}"; do - if [[ -f $path ]]; then - selectedPath=$path - break - fi - done + local selectedPath="$toolset_dir/net/Microsoft.DotNet.ArcadeLogging.dll" + if [[ -z "$selectedPath" ]]; then - Write-PipelineTelemetryError -category 'Build' "Unable to find arcade sdk logger assembly." + Write-PipelineTelemetryError -category 'Build' "Unable to find arcade sdk logger assembly: $selectedPath" ExitWithExitCode 1 fi + args+=( "-logger:$selectedPath" ) fi @@ -502,8 +514,8 @@ function MSBuild-Core { echo "Build failed with exit code $exit_code. Check errors above." # When running on Azure Pipelines, override the returned exit code to avoid double logging. - # Skip this when the build is a child of the VMR orchestrator build. - if [[ "$ci" == true && -n ${SYSTEM_TEAMPROJECT:-} && "$product_build" != true && "$properties" != *"DotNetBuildRepo=true"* ]]; then + # Skip this when the build is a child of the VMR build. + if [[ "$ci" == true && -n ${SYSTEM_TEAMPROJECT:-} && "$from_vmr" != true ]]; then Write-PipelineSetResult -result "Failed" -message "msbuild execution failed." # Exiting with an exit code causes the azure pipelines task to log yet another "noise" error # The above Write-PipelineSetResult will cause the task to be marked as failure without adding yet another error @@ -526,6 +538,7 @@ function GetDarc { fi "$eng_root/common/darc-init.sh" --toolpath "$darc_path" $version + darc_tool="$darc_path/darc" } # Returns a full path to an Arcade SDK task project file. diff --git a/eng/common/vmr-sync.ps1 b/eng/common/vmr-sync.ps1 new file mode 100644 index 0000000000..97302f3205 --- /dev/null +++ b/eng/common/vmr-sync.ps1 @@ -0,0 +1,138 @@ +<# +.SYNOPSIS + +This script is used for synchronizing the current repository into a local VMR. +It pulls the current repository's code into the specified VMR directory for local testing or +Source-Build validation. + +.DESCRIPTION + +The tooling used for synchronization will clone the VMR repository into a temporary folder if +it does not already exist. These clones can be reused in future synchronizations, so it is +recommended to dedicate a folder for this to speed up re-runs. + +.EXAMPLE + Synchronize current repository into a local VMR: + ./vmr-sync.ps1 -vmrDir "$HOME/repos/dotnet" -tmpDir "$HOME/repos/tmp" + +.PARAMETER tmpDir +Required. Path to the temporary folder where repositories will be cloned + +.PARAMETER vmrBranch +Optional. Branch of the 'dotnet/dotnet' repo to synchronize. The VMR will be checked out to this branch + +.PARAMETER azdevPat +Optional. Azure DevOps PAT to use for cloning private repositories. + +.PARAMETER vmrDir +Optional. Path to the dotnet/dotnet repository. When null, gets cloned to the temporary folder + +.PARAMETER debugOutput +Optional. Enables debug logging in the darc vmr command. + +.PARAMETER ci +Optional. Denotes that the script is running in a CI environment. +#> +param ( + [Parameter(Mandatory=$true, HelpMessage="Path to the temporary folder where repositories will be cloned")] + [string][Alias('t', 'tmp')]$tmpDir, + [string][Alias('b', 'branch')]$vmrBranch, + [string]$remote, + [string]$azdevPat, + [string][Alias('v', 'vmr')]$vmrDir, + [switch]$ci, + [switch]$debugOutput +) + +function Fail { + Write-Host "> $($args[0])" -ForegroundColor 'Red' +} + +function Highlight { + Write-Host "> $($args[0])" -ForegroundColor 'Cyan' +} + +$verbosity = 'verbose' +if ($debugOutput) { + $verbosity = 'debug' +} +# Validation + +if (-not $tmpDir) { + Fail "Missing -tmpDir argument. Please specify the path to the temporary folder where the repositories will be cloned" + exit 1 +} + +# Sanitize the input + +if (-not $vmrDir) { + $vmrDir = Join-Path $tmpDir 'dotnet' +} + +if (-not (Test-Path -Path $tmpDir -PathType Container)) { + New-Item -ItemType Directory -Path $tmpDir | Out-Null +} + +# Prepare the VMR + +if (-not (Test-Path -Path $vmrDir -PathType Container)) { + Highlight "Cloning 'dotnet/dotnet' into $vmrDir.." + git clone https://github.com/dotnet/dotnet $vmrDir + + if ($vmrBranch) { + git -C $vmrDir switch -c $vmrBranch + } +} +else { + if ((git -C $vmrDir diff --quiet) -eq $false) { + Fail "There are changes in the working tree of $vmrDir. Please commit or stash your changes" + exit 1 + } + + if ($vmrBranch) { + Highlight "Preparing $vmrDir" + git -C $vmrDir checkout $vmrBranch + git -C $vmrDir pull + } +} + +Set-StrictMode -Version Latest + +# Prepare darc + +Highlight 'Installing .NET, preparing the tooling..' +. .\eng\common\tools.ps1 +$dotnetRoot = InitializeDotNetCli -install:$true +$darc = Get-Darc +$dotnet = "$dotnetRoot\dotnet.exe" + +Highlight "Starting the synchronization of VMR.." + +# Synchronize the VMR +$darcArgs = ( + "vmr", "forwardflow", + "--tmp", $tmpDir, + "--$verbosity", + $vmrDir +) + +if ($ci) { + $darcArgs += ("--ci") +} + +if ($azdevPat) { + $darcArgs += ("--azdev-pat", $azdevPat) +} + +& "$darc" $darcArgs + +if ($LASTEXITCODE -eq 0) { + Highlight "Synchronization succeeded" +} +else { + Fail "Synchronization of repo to VMR failed!" + Fail "'$vmrDir' is left in its last state (re-run of this script will reset it)." + Fail "Please inspect the logs which contain path to the failing patch file (use -debugOutput to get all the details)." + Fail "Once you make changes to the conflicting VMR patch, commit it locally and re-run this script." + exit 1 +} diff --git a/eng/common/vmr-sync.sh b/eng/common/vmr-sync.sh new file mode 100644 index 0000000000..44239e331c --- /dev/null +++ b/eng/common/vmr-sync.sh @@ -0,0 +1,207 @@ +#!/bin/bash + +### This script is used for synchronizing the current repository into a local VMR. +### It pulls the current repository's code into the specified VMR directory for local testing or +### Source-Build validation. +### +### The tooling used for synchronization will clone the VMR repository into a temporary folder if +### it does not already exist. These clones can be reused in future synchronizations, so it is +### recommended to dedicate a folder for this to speed up re-runs. +### +### USAGE: +### Synchronize current repository into a local VMR: +### ./vmr-sync.sh --tmp "$HOME/repos/tmp" "$HOME/repos/dotnet" +### +### Options: +### -t, --tmp, --tmp-dir PATH +### Required. Path to the temporary folder where repositories will be cloned +### +### -b, --branch, --vmr-branch BRANCH_NAME +### Optional. Branch of the 'dotnet/dotnet' repo to synchronize. The VMR will be checked out to this branch +### +### --debug +### Optional. Turns on the most verbose logging for the VMR tooling +### +### --remote name:URI +### Optional. Additional remote to use during the synchronization +### This can be used to synchronize to a commit from a fork of the repository +### Example: 'runtime:https://github.com/yourfork/runtime' +### +### --azdev-pat +### Optional. Azure DevOps PAT to use for cloning private repositories. +### +### -v, --vmr, --vmr-dir PATH +### Optional. Path to the dotnet/dotnet repository. When null, gets cloned to the temporary folder + +source="${BASH_SOURCE[0]}" + +# resolve $source until the file is no longer a symlink +while [[ -h "$source" ]]; do + scriptroot="$( cd -P "$( dirname "$source" )" && pwd )" + source="$(readlink "$source")" + # if $source was a relative symlink, we need to resolve it relative to the path where the + # symlink file was located + [[ $source != /* ]] && source="$scriptroot/$source" +done +scriptroot="$( cd -P "$( dirname "$source" )" && pwd )" + +function print_help () { + sed -n '/^### /,/^$/p' "$source" | cut -b 5- +} + +COLOR_RED=$(tput setaf 1 2>/dev/null || true) +COLOR_CYAN=$(tput setaf 6 2>/dev/null || true) +COLOR_CLEAR=$(tput sgr0 2>/dev/null || true) +COLOR_RESET=uniquesearchablestring +FAILURE_PREFIX='> ' + +function fail () { + echo "${COLOR_RED}$FAILURE_PREFIX${1//${COLOR_RESET}/${COLOR_RED}}${COLOR_CLEAR}" >&2 +} + +function highlight () { + echo "${COLOR_CYAN}$FAILURE_PREFIX${1//${COLOR_RESET}/${COLOR_CYAN}}${COLOR_CLEAR}" +} + +tmp_dir='' +vmr_dir='' +vmr_branch='' +additional_remotes='' +verbosity=verbose +azdev_pat='' +ci=false + +while [[ $# -gt 0 ]]; do + opt="$(echo "$1" | tr "[:upper:]" "[:lower:]")" + case "$opt" in + -t|--tmp|--tmp-dir) + tmp_dir=$2 + shift + ;; + -v|--vmr|--vmr-dir) + vmr_dir=$2 + shift + ;; + -b|--branch|--vmr-branch) + vmr_branch=$2 + shift + ;; + --remote) + additional_remotes="$additional_remotes $2" + shift + ;; + --azdev-pat) + azdev_pat=$2 + shift + ;; + --ci) + ci=true + ;; + -d|--debug) + verbosity=debug + ;; + -h|--help) + print_help + exit 0 + ;; + *) + fail "Invalid argument: $1" + print_help + exit 1 + ;; + esac + + shift +done + +# Validation + +if [[ -z "$tmp_dir" ]]; then + fail "Missing --tmp-dir argument. Please specify the path to the temporary folder where the repositories will be cloned" + exit 1 +fi + +# Sanitize the input + +if [[ -z "$vmr_dir" ]]; then + vmr_dir="$tmp_dir/dotnet" +fi + +if [[ ! -d "$tmp_dir" ]]; then + mkdir -p "$tmp_dir" +fi + +if [[ "$verbosity" == "debug" ]]; then + set -x +fi + +# Prepare the VMR + +if [[ ! -d "$vmr_dir" ]]; then + highlight "Cloning 'dotnet/dotnet' into $vmr_dir.." + git clone https://github.com/dotnet/dotnet "$vmr_dir" + + if [[ -n "$vmr_branch" ]]; then + git -C "$vmr_dir" switch -c "$vmr_branch" + fi +else + if ! git -C "$vmr_dir" diff --quiet; then + fail "There are changes in the working tree of $vmr_dir. Please commit or stash your changes" + exit 1 + fi + + if [[ -n "$vmr_branch" ]]; then + highlight "Preparing $vmr_dir" + git -C "$vmr_dir" checkout "$vmr_branch" + git -C "$vmr_dir" pull + fi +fi + +set -e + +# Prepare darc + +highlight 'Installing .NET, preparing the tooling..' +source "./eng/common/tools.sh" +InitializeDotNetCli true +GetDarc +dotnetDir=$( cd ./.dotnet/; pwd -P ) +dotnet=$dotnetDir/dotnet + +highlight "Starting the synchronization of VMR.." +set +e + +if [[ -n "$additional_remotes" ]]; then + additional_remotes="--additional-remotes $additional_remotes" +fi + +if [[ -n "$azdev_pat" ]]; then + azdev_pat="--azdev-pat $azdev_pat" +fi + +ci_arg='' +if [[ "$ci" == "true" ]]; then + ci_arg="--ci" +fi + +# Synchronize the VMR + +export DOTNET_ROOT="$dotnetDir" + +"$darc_tool" vmr forwardflow \ + --tmp "$tmp_dir" \ + $azdev_pat \ + --$verbosity \ + $ci_arg \ + $additional_remotes \ + "$vmr_dir" + +if [[ $? == 0 ]]; then + highlight "Synchronization succeeded" +else + fail "Synchronization of repo to VMR failed!" + fail "'$vmr_dir' is left in its last state (re-run of this script will reset it)." + fail "Please inspect the logs which contain path to the failing patch file (use --debug to get all the details)." + fail "Once you make changes to the conflicting VMR patch, commit it locally and re-run this script." + exit 1 +fi diff --git a/eng/helix.proj b/eng/helix.proj index f9314415e7..71cd4fb2aa 100644 --- a/eng/helix.proj +++ b/eng/helix.proj @@ -91,7 +91,7 @@ - + @@ -111,6 +111,9 @@ $(HelixPreCommands);export LD_LIBRARY_PATH=/opt/homebrew/opt/mono-libgdiplus/lib;ls /usr/lib;ls $HELIX_WORKITEM_ROOT;export KMP_DUPLICATE_LIB_OK=TRUE;otool -L $HELIX_WORKITEM_ROOT/runtimes/osx-x64/native/lib_lightgbm.dylib + + $(HelixPreCommands);export DYLD_LIBRARY_PATH=$HELIX_WORKITEM_ROOT:$DYLD_LIBRARY_PATH;export DYLD_FALLBACK_LIBRARY_PATH=$HELIX_WORKITEM_ROOT:$DYLD_FALLBACK_LIBRARY_PATH + $(HelixPreCommands);sudo apt update;sudo apt-get install libomp-dev libomp5 -y $HELIX_CORRELATION_PAYLOAD @@ -119,11 +122,10 @@ runTests.sh .\runTests.cmd - /usr/local/opt/libomp/lib/libiomp5.dylib;/usr/local/opt/libomp/lib/libomp.dylib; - /usr/local/opt/libomp/lib/libomp.dylib; + /usr/local/opt/libomp/lib/libomp.dylib; - + + @@ -141,6 +144,10 @@ + + + diff --git a/es-metadata.yml b/es-metadata.yml new file mode 100644 index 0000000000..822421fbce --- /dev/null +++ b/es-metadata.yml @@ -0,0 +1,8 @@ +schemaVersion: 0.0.1 +isProduction: false +accountableOwners: + service: 7a9b52f6-7805-416c-9390-343168c0cdb3 +routing: + defaultAreaPath: + org: devdiv + path: DevDiv\NET Libraries \ No newline at end of file diff --git a/global.json b/global.json index c5ad302e41..3f90cd3b8a 100644 --- a/global.json +++ b/global.json @@ -1,6 +1,6 @@ { "tools": { - "dotnet": "10.0.100-preview.3.25201.16", + "dotnet": "10.0.100-rc.1.25451.107", "runtimes": { "dotnet": [ "$(DotNetRuntime60Version)", @@ -13,8 +13,8 @@ } }, "msbuild-sdks": { - "Microsoft.DotNet.Arcade.Sdk": "10.0.0-beta.25225.4", - "Microsoft.DotNet.Helix.Sdk": "10.0.0-beta.25225.4", + "Microsoft.DotNet.Arcade.Sdk": "11.0.0-beta.25524.1", + "Microsoft.DotNet.Helix.Sdk": "11.0.0-beta.25524.1", "Microsoft.Build.NoTargets": "3.7.0", "Microsoft.Build.Traversal": "3.2.0" } diff --git a/src/Microsoft.Data.Analysis.Interactive/Microsoft.Data.Analysis.Interactive.csproj b/src/Microsoft.Data.Analysis.Interactive/Microsoft.Data.Analysis.Interactive.csproj index 5aeca57200..0654164255 100644 --- a/src/Microsoft.Data.Analysis.Interactive/Microsoft.Data.Analysis.Interactive.csproj +++ b/src/Microsoft.Data.Analysis.Interactive/Microsoft.Data.Analysis.Interactive.csproj @@ -6,8 +6,7 @@ - - + diff --git a/src/Microsoft.Data.Analysis/Computations/Arithmetic.net8.cs b/src/Microsoft.Data.Analysis/Computations/Arithmetic.net8.cs index e3854e1ce2..491846e2f7 100644 --- a/src/Microsoft.Data.Analysis/Computations/Arithmetic.net8.cs +++ b/src/Microsoft.Data.Analysis/Computations/Arithmetic.net8.cs @@ -510,7 +510,7 @@ protected override void ElementwiseGreaterThanOrEqual(ReadOnlySpan x, { for (var i = 0; i < x.Length; i++) { - destination[i] = (x[i] == y); + destination[i] = (x[i] >= y); } } diff --git a/src/Microsoft.Data.Analysis/Microsoft.Data.Analysis.csproj b/src/Microsoft.Data.Analysis/Microsoft.Data.Analysis.csproj index aa3fd537f0..b1d2fb08ae 100644 --- a/src/Microsoft.Data.Analysis/Microsoft.Data.Analysis.csproj +++ b/src/Microsoft.Data.Analysis/Microsoft.Data.Analysis.csproj @@ -46,17 +46,11 @@ - - - - - - - + - + diff --git a/src/Microsoft.Extensions.ML/Microsoft.Extensions.ML.csproj b/src/Microsoft.Extensions.ML/Microsoft.Extensions.ML.csproj index 5dc6d360c5..c38263d75a 100644 --- a/src/Microsoft.Extensions.ML/Microsoft.Extensions.ML.csproj +++ b/src/Microsoft.Extensions.ML/Microsoft.Extensions.ML.csproj @@ -7,10 +7,8 @@ - - - - + + diff --git a/src/Microsoft.ML.AutoML.Interactive/Microsoft.ML.AutoML.Interactive.csproj b/src/Microsoft.ML.AutoML.Interactive/Microsoft.ML.AutoML.Interactive.csproj index 7144833fa2..72b8e5697e 100644 --- a/src/Microsoft.ML.AutoML.Interactive/Microsoft.ML.AutoML.Interactive.csproj +++ b/src/Microsoft.ML.AutoML.Interactive/Microsoft.ML.AutoML.Interactive.csproj @@ -11,9 +11,8 @@ - - - + + diff --git a/src/Microsoft.ML.AutoML/AutoMLExperiment/IMetricManager.cs b/src/Microsoft.ML.AutoML/AutoMLExperiment/IMetricManager.cs index 39e0b96073..135c093735 100644 --- a/src/Microsoft.ML.AutoML/AutoMLExperiment/IMetricManager.cs +++ b/src/Microsoft.ML.AutoML/AutoMLExperiment/IMetricManager.cs @@ -63,7 +63,7 @@ public double Evaluate(MLContext context, IDataView eval) BinaryClassificationMetric.PositivePrecision => metric.PositivePrecision, BinaryClassificationMetric.NegativePrecision => metric.NegativePrecision, BinaryClassificationMetric.NegativeRecall => metric.NegativeRecall, - BinaryClassificationMetric.PositiveRecall => metric.PositivePrecision, + BinaryClassificationMetric.PositiveRecall => metric.PositiveRecall, BinaryClassificationMetric.F1Score => metric.F1Score, _ => throw new NotImplementedException(), }; diff --git a/src/Microsoft.ML.AutoML/Microsoft.ML.AutoML.csproj b/src/Microsoft.ML.AutoML/Microsoft.ML.AutoML.csproj index d79ca0e5cf..497078735d 100644 --- a/src/Microsoft.ML.AutoML/Microsoft.ML.AutoML.csproj +++ b/src/Microsoft.ML.AutoML/Microsoft.ML.AutoML.csproj @@ -38,8 +38,6 @@ - - @@ -52,6 +50,12 @@ + + + + + + diff --git a/src/Microsoft.ML.AutoML/SweepableEstimator/Estimators/FastForest.cs b/src/Microsoft.ML.AutoML/SweepableEstimator/Estimators/FastForest.cs index c9e900f023..56bb421c88 100644 --- a/src/Microsoft.ML.AutoML/SweepableEstimator/Estimators/FastForest.cs +++ b/src/Microsoft.ML.AutoML/SweepableEstimator/Estimators/FastForest.cs @@ -13,6 +13,7 @@ public override IEstimator BuildFromOption(MLContext context, Fast var option = new FastForestBinaryTrainer.Options() { NumberOfTrees = param.NumberOfTrees, + NumberOfLeaves = param.NumberOfLeaves, LabelColumnName = param.LabelColumnName, FeatureColumnName = param.FeatureColumnName, ExampleWeightColumnName = param.ExampleWeightColumnName, @@ -31,6 +32,7 @@ public override IEstimator BuildFromOption(MLContext context, Fast var option = new FastForestRegressionTrainer.Options() { NumberOfTrees = param.NumberOfTrees, + NumberOfLeaves = param.NumberOfLeaves, FeatureFraction = param.FeatureFraction, LabelColumnName = param.LabelColumnName, FeatureColumnName = param.FeatureColumnName, diff --git a/src/Microsoft.ML.CodeGenerator/Microsoft.ML.CodeGenerator.csproj b/src/Microsoft.ML.CodeGenerator/Microsoft.ML.CodeGenerator.csproj index a468b72fde..84ea8106fd 100644 --- a/src/Microsoft.ML.CodeGenerator/Microsoft.ML.CodeGenerator.csproj +++ b/src/Microsoft.ML.CodeGenerator/Microsoft.ML.CodeGenerator.csproj @@ -17,8 +17,8 @@ - - + + diff --git a/src/Microsoft.ML.Core/Data/Repository.cs b/src/Microsoft.ML.Core/Data/Repository.cs index ff87dc413b..5685582d39 100644 --- a/src/Microsoft.ML.Core/Data/Repository.cs +++ b/src/Microsoft.ML.Core/Data/Repository.cs @@ -123,10 +123,14 @@ private static string GetShortTempDir(IExceptionContext ectx) string tempPath = ectx is IHostEnvironmentInternal iHostInternal ? iHostInternal.TempFilePath : Path.GetTempPath(); - int dirNumber = 0; - string mlNetTempDir = null!; - while (Directory.Exists(mlNetTempDir = Path.Combine(Path.GetFullPath(tempPath), $"ml_dotnet{dirNumber++}"))) ; - var path = Path.Combine(mlNetTempDir, Path.GetRandomFileName()); + + // Find a unique directory, the directory under Temp and must be unique to this process + string path = null; + while (path is null || Directory.Exists(path)) + { + path = Path.Combine(Path.GetFullPath(tempPath), $"ml_dotnet_{Path.GetFileNameWithoutExtension(Path.GetRandomFileName())}"); + } + Directory.CreateDirectory(path); return path; } diff --git a/src/Microsoft.ML.Core/Microsoft.ML.Core.csproj b/src/Microsoft.ML.Core/Microsoft.ML.Core.csproj index 807fa85088..36429997c5 100644 --- a/src/Microsoft.ML.Core/Microsoft.ML.Core.csproj +++ b/src/Microsoft.ML.Core/Microsoft.ML.Core.csproj @@ -10,8 +10,7 @@ - - + diff --git a/src/Microsoft.ML.Core/Utilities/BinFinder.cs b/src/Microsoft.ML.Core/Utilities/BinFinder.cs index ed4265f356..b5b3adc699 100644 --- a/src/Microsoft.ML.Core/Utilities/BinFinder.cs +++ b/src/Microsoft.ML.Core/Utilities/BinFinder.cs @@ -555,7 +555,7 @@ internal sealed class DynamicBinFinder : BinFinderBase // the energy for (row, col), we only use energies for (row-1, colPrev) where colPrev <= col. // Thus a single row of values is sufficient. // REVIEW: Consider storing energies in reverse order to match the common access pattern. - // REVEIW: What should we use for the energy type? + // REVIEW: What should we use for the energy type? private EnergyType[] _energies; private EnergyType[] _energiesBest; diff --git a/src/Microsoft.ML.CpuMath/Microsoft.ML.CpuMath.csproj b/src/Microsoft.ML.CpuMath/Microsoft.ML.CpuMath.csproj index 8e06f2b846..b02ce08ecf 100644 --- a/src/Microsoft.ML.CpuMath/Microsoft.ML.CpuMath.csproj +++ b/src/Microsoft.ML.CpuMath/Microsoft.ML.CpuMath.csproj @@ -24,13 +24,12 @@ - - + \ No newline at end of file diff --git a/src/Microsoft.ML.Data/Microsoft.ML.Data.csproj b/src/Microsoft.ML.Data/Microsoft.ML.Data.csproj index d212f5e4ee..419a4060d0 100644 --- a/src/Microsoft.ML.Data/Microsoft.ML.Data.csproj +++ b/src/Microsoft.ML.Data/Microsoft.ML.Data.csproj @@ -7,10 +7,10 @@ - - - - + + + + diff --git a/src/Microsoft.ML.Data/Scorers/PredictedLabelScorerBase.cs b/src/Microsoft.ML.Data/Scorers/PredictedLabelScorerBase.cs index 383052e145..a0be7945d6 100644 --- a/src/Microsoft.ML.Data/Scorers/PredictedLabelScorerBase.cs +++ b/src/Microsoft.ML.Data/Scorers/PredictedLabelScorerBase.cs @@ -38,6 +38,11 @@ private protected sealed class BindingsImpl : BindingsBase public readonly int ScoreColumnIndex; // The type of the derived column. public readonly DataViewType PredColType; + /// + /// The name of the column that contains the predicted labels. + /// This field is used in the scoring process to store or reference the predicted label column. + /// + public readonly string PredictedLabelColumnName; // The ScoreColumnKind metadata value for all score columns. public readonly string ScoreColumnKind; @@ -54,6 +59,7 @@ private BindingsImpl(DataViewSchema input, ISchemaBoundRowMapper mapper, string ScoreColumnIndex = scoreColIndex; ScoreColumnKind = scoreColumnKind; PredColType = predColType; + PredictedLabelColumnName = predictedLabelColumnName; _getScoreColumnKind = GetScoreColumnKind; _getScoreValueKind = GetScoreValueKind; @@ -113,7 +119,7 @@ public BindingsImpl ApplyToSchema(DataViewSchema input, ISchemaBindableMapper bi bool tmp = rowMapper.OutputSchema.TryGetColumnIndex(scoreCol, out mapperScoreColumn); env.Check(tmp, "Mapper doesn't have expected score column"); - return new BindingsImpl(input, rowMapper, Suffix, ScoreColumnKind, true, mapperScoreColumn, PredColType); + return new BindingsImpl(input, rowMapper, Suffix, ScoreColumnKind, true, mapperScoreColumn, PredColType, PredictedLabelColumnName); } public static BindingsImpl Create(ModelLoadContext ctx, DataViewSchema input, diff --git a/src/Microsoft.ML.DataView/Microsoft.ML.DataView.csproj b/src/Microsoft.ML.DataView/Microsoft.ML.DataView.csproj index 59ac30649e..5e07b1fb5e 100644 --- a/src/Microsoft.ML.DataView/Microsoft.ML.DataView.csproj +++ b/src/Microsoft.ML.DataView/Microsoft.ML.DataView.csproj @@ -7,8 +7,7 @@ - - + diff --git a/src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj b/src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj index 9ed5a2702e..363cc818a9 100644 --- a/src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj +++ b/src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj @@ -12,11 +12,11 @@ - - - - - + + + + + diff --git a/src/Microsoft.ML.GenAI.Core/Module/GenAILinear.cs b/src/Microsoft.ML.GenAI.Core/Module/GenAILinear.cs index 178b8fddda..a9b2e39db5 100644 --- a/src/Microsoft.ML.GenAI.Core/Module/GenAILinear.cs +++ b/src/Microsoft.ML.GenAI.Core/Module/GenAILinear.cs @@ -6,6 +6,7 @@ using static TorchSharp.torch; namespace Microsoft.ML.GenAI.Core; + internal class GenAILinear : nn.Module { #pragma warning disable MSML_GeneralName // This name should be PascalCased diff --git a/src/Microsoft.ML.GenAI.Core/Trainer/CasualLMSupervisedFineTuningTrainer.cs b/src/Microsoft.ML.GenAI.Core/Trainer/CausalLMSupervisedFineTuningTrainer.cs similarity index 93% rename from src/Microsoft.ML.GenAI.Core/Trainer/CasualLMSupervisedFineTuningTrainer.cs rename to src/Microsoft.ML.GenAI.Core/Trainer/CausalLMSupervisedFineTuningTrainer.cs index f5ee202cd5..ae447b60c4 100644 --- a/src/Microsoft.ML.GenAI.Core/Trainer/CasualLMSupervisedFineTuningTrainer.cs +++ b/src/Microsoft.ML.GenAI.Core/Trainer/CausalLMSupervisedFineTuningTrainer.cs @@ -13,12 +13,12 @@ namespace Microsoft.ML.GenAI.Core.Trainer; -public class CasualLMSupervisedFineTuningTrainer +public class CausalLMSupervisedFineTuningTrainer { - private readonly ILogger? _logger; + private readonly ILogger? _logger; private readonly ICausalLMPipeline _pipeline; - public CasualLMSupervisedFineTuningTrainer(ICausalLMPipeline pipeline, ILogger? logger = null) + public CausalLMSupervisedFineTuningTrainer(ICausalLMPipeline pipeline, ILogger? logger = null) { _logger = logger; _pipeline = pipeline; diff --git a/src/Microsoft.ML.GenAI.Core/Utility/AttentionMaskConverter.cs b/src/Microsoft.ML.GenAI.Core/Utility/AttentionMaskConverter.cs index b292c3d731..a2a3f92aea 100644 --- a/src/Microsoft.ML.GenAI.Core/Utility/AttentionMaskConverter.cs +++ b/src/Microsoft.ML.GenAI.Core/Utility/AttentionMaskConverter.cs @@ -13,12 +13,12 @@ namespace Microsoft.ML.GenAI.Core; public class AttentionMaskConverter { - private readonly bool _isCasual; + private readonly bool _isCausal; private readonly int? _slidingWindow; public AttentionMaskConverter(bool isCausal, int? slidingWindow) { - this._isCasual = isCausal; + this._isCausal = isCausal; this._slidingWindow = slidingWindow; } @@ -42,8 +42,8 @@ public Tensor To4D( // create causal mask // [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] - Tensor? casual4dMask = null; - if ((inputShape[^1] > 1 || this._slidingWindow is not null) && this._isCasual) + Tensor? causal4dMask = null; + if ((inputShape[^1] > 1 || this._slidingWindow is not null) && this._isCausal) { if (keyValueLength is null) { @@ -51,7 +51,7 @@ public Tensor To4D( } var pastKeyValuesLength = keyValueLength.Value - queryLength; - casual4dMask = MakeCasualMask(inputShape, dType, attentionMask2d.device, pastKeyValuesLength, this._slidingWindow); + causal4dMask = MakeCausalMask(inputShape, dType, attentionMask2d.device, pastKeyValuesLength, this._slidingWindow); } else if (this._slidingWindow is not null) { @@ -59,25 +59,25 @@ public Tensor To4D( } var expandedAttnMask = ExpandMask(attentionMask2d, dType, queryLength).to(attentionMask2d.device); - if (casual4dMask is not null) + if (causal4dMask is not null) { var min = torch.finfo(dType).min; - expandedAttnMask = casual4dMask.masked_fill(expandedAttnMask.to(ScalarType.Bool), min); + expandedAttnMask = causal4dMask.masked_fill(expandedAttnMask.to(ScalarType.Bool), min); } return expandedAttnMask; } - public Tensor? ToCasual4D( + public Tensor? ToCausal4D( int batchSize, int queryLength, int keyValueLength, ScalarType dType, Device device) { - if (!_isCasual) + if (!_isCausal) { - throw new ArgumentException("This is not a casual mask"); + throw new ArgumentException("This is not a causal mask"); } long[] inputShape = [batchSize, queryLength]; @@ -88,13 +88,13 @@ public Tensor To4D( Tensor? causal4DMask = null; if (queryLength > 1 || this._slidingWindow is int) { - causal4DMask = MakeCasualMask(inputShape, dType, device, pastKeyValueLength, this._slidingWindow); + causal4DMask = MakeCausalMask(inputShape, dType, device, pastKeyValueLength, this._slidingWindow); } return causal4DMask; } - public static Tensor MakeCasualMask( + public static Tensor MakeCausalMask( long[] inputIdsShape, ScalarType dType, Device device, @@ -158,7 +158,7 @@ public static Tensor MakeCasualMask( return converter.To4D(attentionMask, (int)inputShape[1], dType, keyValueLength); } - return converter.ToCasual4D(batchSize, queryLength, keyValueLength, dType, device); + return converter.ToCausal4D(batchSize, queryLength, keyValueLength, dType, device); } public static Tensor ExpandMask( diff --git a/src/Microsoft.ML.GenAI.Core/Utils.cs b/src/Microsoft.ML.GenAI.Core/Utils.cs index dccabad653..552500146a 100644 --- a/src/Microsoft.ML.GenAI.Core/Utils.cs +++ b/src/Microsoft.ML.GenAI.Core/Utils.cs @@ -14,6 +14,7 @@ using static TorchSharp.torch.nn; namespace Microsoft.ML.GenAI.Core; + public static class Utils { public static Tensor ApplyRotaryEmbeddings(Tensor input, Tensor freqsComplex) diff --git a/src/Microsoft.ML.GenAI.LLaMA/Microsoft.ML.GenAI.LLaMA.csproj b/src/Microsoft.ML.GenAI.LLaMA/Microsoft.ML.GenAI.LLaMA.csproj index 0e1207c3df..72bd38416c 100644 --- a/src/Microsoft.ML.GenAI.LLaMA/Microsoft.ML.GenAI.LLaMA.csproj +++ b/src/Microsoft.ML.GenAI.LLaMA/Microsoft.ML.GenAI.LLaMA.csproj @@ -12,8 +12,8 @@ - - + + diff --git a/src/Microsoft.ML.GenAI.Mistral/Microsoft.ML.GenAI.Mistral.csproj b/src/Microsoft.ML.GenAI.Mistral/Microsoft.ML.GenAI.Mistral.csproj index f20b587631..df80da30fa 100644 --- a/src/Microsoft.ML.GenAI.Mistral/Microsoft.ML.GenAI.Mistral.csproj +++ b/src/Microsoft.ML.GenAI.Mistral/Microsoft.ML.GenAI.Mistral.csproj @@ -12,8 +12,9 @@ - - + + + diff --git a/src/Microsoft.ML.GenAI.Phi/Extension/SemanticKernelExtension.cs b/src/Microsoft.ML.GenAI.Phi/Extension/SemanticKernelExtension.cs index ace7a7b425..82b4b759c5 100644 --- a/src/Microsoft.ML.GenAI.Phi/Extension/SemanticKernelExtension.cs +++ b/src/Microsoft.ML.GenAI.Phi/Extension/SemanticKernelExtension.cs @@ -15,7 +15,7 @@ public static class SemanticKernelExtension { public static IKernelBuilder AddGenAIChatCompletion( this IKernelBuilder builder, - ICausalLMPipeline pipeline) + ICausalLMPipeline pipeline) { builder.Services.AddSingleton(new Phi3CausalLMChatCompletionService(pipeline)); @@ -24,7 +24,7 @@ public static IKernelBuilder AddGenAIChatCompletion( public static IKernelBuilder AddGenAITextGeneration( this IKernelBuilder builder, - ICausalLMPipeline pipeline) + ICausalLMPipeline pipeline) { builder.Services.AddSingleton(new Phi3CausalLMTextGenerationService(pipeline)); diff --git a/src/Microsoft.ML.GenAI.Phi/Microsoft.ML.GenAI.Phi.csproj b/src/Microsoft.ML.GenAI.Phi/Microsoft.ML.GenAI.Phi.csproj index d19dbe58fa..68021be630 100644 --- a/src/Microsoft.ML.GenAI.Phi/Microsoft.ML.GenAI.Phi.csproj +++ b/src/Microsoft.ML.GenAI.Phi/Microsoft.ML.GenAI.Phi.csproj @@ -12,8 +12,9 @@ - - + + + diff --git a/src/Microsoft.ML.GenAI.Phi/Module/Phi2DecoderLayer.cs b/src/Microsoft.ML.GenAI.Phi/Module/Phi2DecoderLayer.cs index 7931e32b79..2ffae701de 100644 --- a/src/Microsoft.ML.GenAI.Phi/Module/Phi2DecoderLayer.cs +++ b/src/Microsoft.ML.GenAI.Phi/Module/Phi2DecoderLayer.cs @@ -7,6 +7,7 @@ using static TorchSharp.torch; namespace Microsoft.ML.GenAI.Phi.Module; + public class Phi2DecoderLayer : nn.Module< Tensor, // hidden_states Tensor, // position_ids diff --git a/src/Microsoft.ML.GenAI.Phi/Module/Phi2Model.cs b/src/Microsoft.ML.GenAI.Phi/Module/Phi2Model.cs index b96e0409f9..05edab0985 100644 --- a/src/Microsoft.ML.GenAI.Phi/Module/Phi2Model.cs +++ b/src/Microsoft.ML.GenAI.Phi/Module/Phi2Model.cs @@ -80,7 +80,7 @@ public override (Tensor, Tensor?, Tensor?) forward( // use 4d attention mask if (attentionMask is not null) { - attentionMask = this.Prepare4DCasualAttentionMask(attentionMask, seqLen, pastKeyValueLength, inputEmbeddings.dtype); + attentionMask = this.Prepare4DCausalAttentionMask(attentionMask, seqLen, pastKeyValueLength, inputEmbeddings.dtype); } var hiddenStates = inputEmbeddings; @@ -100,7 +100,7 @@ public override (Tensor, Tensor?, Tensor?) forward( return (hiddenStates, null, null); } - private Tensor Prepare4DCasualAttentionMask( + private Tensor Prepare4DCausalAttentionMask( Tensor attentionMask, int queryLength, int pastKeyValueLength, @@ -110,11 +110,11 @@ private Tensor Prepare4DCasualAttentionMask( var seqLen = attentionMask.shape[1]; Contract.Assert(seqLen == queryLength, "seqLen must be equal to queryLength"); var targetLength = queryLength + pastKeyValueLength; - var casual4DMask = this.MakeCasualAttentionMask(batchSize, queryLength, pastKeyValueLength, attentionMask.device, dtype); + var causal4DMask = this.MakeCausalAttentionMask(batchSize, queryLength, pastKeyValueLength, attentionMask.device, dtype); var expandedMask = this.ExpandMask(attentionMask, dtype, queryLength).to(attentionMask.device); - casual4DMask.masked_fill_(expandedMask.to_type(ScalarType.Bool), torch.finfo(dtype).min); - return casual4DMask; + causal4DMask.masked_fill_(expandedMask.to_type(ScalarType.Bool), torch.finfo(dtype).min); + return causal4DMask; } private Tensor ExpandMask( @@ -132,7 +132,7 @@ private Tensor ExpandMask( return invertedMask.masked_fill(invertedMask.to_type(ScalarType.Bool), torch.finfo(dtype).min); } - private Tensor MakeCasualAttentionMask( + private Tensor MakeCausalAttentionMask( int batchSize, int targetLen, int pastKeyValueLength, diff --git a/src/Microsoft.ML.GenAI.Phi/Module/Phi2RotaryEmbedding.cs b/src/Microsoft.ML.GenAI.Phi/Module/Phi2RotaryEmbedding.cs index a21ed4959e..6fd70dd9bd 100644 --- a/src/Microsoft.ML.GenAI.Phi/Module/Phi2RotaryEmbedding.cs +++ b/src/Microsoft.ML.GenAI.Phi/Module/Phi2RotaryEmbedding.cs @@ -6,6 +6,7 @@ using static TorchSharp.torch; namespace Microsoft.ML.GenAI.Phi.Module; + internal class Phi2RotaryEmbedding : nn.Module< Tensor, // input int, // seq_len diff --git a/src/Microsoft.ML.GenAI.Phi/Phi2/Phi2ForCasualLM.cs b/src/Microsoft.ML.GenAI.Phi/Phi2/Phi2ForCausalLM.cs similarity index 91% rename from src/Microsoft.ML.GenAI.Phi/Phi2/Phi2ForCasualLM.cs rename to src/Microsoft.ML.GenAI.Phi/Phi2/Phi2ForCausalLM.cs index 1d49375565..3c8c5c0a5b 100644 --- a/src/Microsoft.ML.GenAI.Phi/Phi2/Phi2ForCasualLM.cs +++ b/src/Microsoft.ML.GenAI.Phi/Phi2/Phi2ForCausalLM.cs @@ -14,15 +14,15 @@ namespace Microsoft.ML.GenAI.Phi; -public class Phi2ForCasualLM : nn.Module +public class Phi2ForCausalLM : nn.Module { #pragma warning disable MSML_PrivateFieldName // Private field name not in: _camelCase format private readonly Phi2Model model; private readonly GenAILinear lm_head; #pragma warning restore MSML_PrivateFieldName // Private field name not in: _camelCase format - public Phi2ForCasualLM(Phi2Config config) - : base(nameof(Phi2ForCasualLM)) + public Phi2ForCausalLM(Phi2Config config) + : base(nameof(Phi2ForCausalLM)) { this.model = new Phi2Model(config); this.lm_head = new GenAILinear(config.HiddenSize, config.VocabSize, dtype: config.Dtype); @@ -47,7 +47,7 @@ public override CausalLMModelOutput forward(CausalLMModelInput input) // use_cac return new CausalLMModelOutput(lastHiddenState: hiddenState, logits: lmLogits); } - public static Phi2ForCasualLM FromPretrained( + public static Phi2ForCausalLM FromPretrained( string modelFolder, string configName = "config.json", string checkPointName = "model.safetensors.index.json", @@ -58,7 +58,7 @@ public static Phi2ForCasualLM FromPretrained( var config = Path.Join(modelFolder, configName); var modelConfig = JsonSerializer.Deserialize(File.ReadAllText(config)) ?? throw new ArgumentNullException(nameof(config)); modelConfig.Dtype = torchDtype; - var wrapper = new Phi2ForCasualLM(modelConfig); + var wrapper = new Phi2ForCausalLM(modelConfig); var loadedParameters = new Dictionary(); wrapper.load_checkpoint(path: modelFolder, checkpointName: checkPointName, strict: true, loadedParameters: loadedParameters, useTqdm: useTqdm); wrapper = wrapper.to(device); diff --git a/src/Microsoft.ML.GenAI.Phi/Phi3/Config.cs b/src/Microsoft.ML.GenAI.Phi/Phi3/Config.cs index 0a020d6724..9261c044f8 100644 --- a/src/Microsoft.ML.GenAI.Phi/Phi3/Config.cs +++ b/src/Microsoft.ML.GenAI.Phi/Phi3/Config.cs @@ -12,6 +12,7 @@ using static TorchSharp.torch; namespace Microsoft.ML.GenAI.Phi; + public class Phi3Config { public Phi3Config() diff --git a/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMAgent.cs b/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMAgent.cs index 6971ac5991..e1e02d338b 100644 --- a/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMAgent.cs +++ b/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMAgent.cs @@ -17,12 +17,12 @@ namespace Microsoft.ML.GenAI.Phi; public class Phi3Agent : IStreamingAgent { private const char Newline = '\n'; - private readonly ICausalLMPipeline _pipeline; + private readonly ICausalLMPipeline _pipeline; private readonly string? _systemMessage; private readonly IAutoGenChatTemplateBuilder _templateBuilder; public Phi3Agent( - ICausalLMPipeline pipeline, + ICausalLMPipeline pipeline, string name, string? systemMessage = "you are a helpful assistant", IAutoGenChatTemplateBuilder? templateBuilder = null) diff --git a/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMChatClient.cs b/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMChatClient.cs index e297e40c5a..ebd74e4848 100644 --- a/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMChatClient.cs +++ b/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMChatClient.cs @@ -14,12 +14,12 @@ namespace Microsoft.ML.GenAI.Phi; -public class Phi3CausalLMChatClient : CausalLMPipelineChatClient +public class Phi3CausalLMChatClient : CausalLMPipelineChatClient { private readonly string _eotToken = "<|end|>"; public Phi3CausalLMChatClient( - ICausalLMPipeline pipeline, + ICausalLMPipeline pipeline, IMEAIChatTemplateBuilder? templateBuilder = null, ChatClientMetadata? metadata = null) : base( diff --git a/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMChatCompletionService.cs b/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMChatCompletionService.cs index 1d95882655..896926c43b 100644 --- a/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMChatCompletionService.cs +++ b/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMChatCompletionService.cs @@ -14,12 +14,12 @@ namespace Microsoft.ML.GenAI.Phi; public class Phi3CausalLMChatCompletionService : IChatCompletionService { - private readonly ICausalLMPipeline _pipeline; + private readonly ICausalLMPipeline _pipeline; private readonly Phi3CausalLMTextGenerationService _textGenerationService; private readonly ISemanticKernelChatTemplateBuilder _templateBuilder; public Phi3CausalLMChatCompletionService( - ICausalLMPipeline pipeline, + ICausalLMPipeline pipeline, ISemanticKernelChatTemplateBuilder? templateBuilder = null) { _pipeline = pipeline; diff --git a/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMTextGenerationService.cs b/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMTextGenerationService.cs index d4c8c34e85..01a574c8f6 100644 --- a/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMTextGenerationService.cs +++ b/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMTextGenerationService.cs @@ -12,10 +12,10 @@ namespace Microsoft.ML.GenAI.Phi; public class Phi3CausalLMTextGenerationService : ITextGenerationService { - private readonly ICausalLMPipeline _pipeline; + private readonly ICausalLMPipeline _pipeline; public Phi3CausalLMTextGenerationService( - ICausalLMPipeline pipeline) + ICausalLMPipeline pipeline) { _pipeline = pipeline; } diff --git a/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3ForCasualLM.cs b/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3ForCausalLM.cs similarity index 90% rename from src/Microsoft.ML.GenAI.Phi/Phi3/Phi3ForCasualLM.cs rename to src/Microsoft.ML.GenAI.Phi/Phi3/Phi3ForCausalLM.cs index a5840b242a..c46cbaf57e 100644 --- a/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3ForCasualLM.cs +++ b/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3ForCausalLM.cs @@ -18,7 +18,7 @@ namespace Microsoft.ML.GenAI.Phi; -public class Phi3ForCasualLM : nn.Module +public class Phi3ForCausalLM : nn.Module { private readonly Phi3Config _config; @@ -27,8 +27,8 @@ public class Phi3ForCasualLM : nn.Module(File.ReadAllText(config)) ?? throw new ArgumentNullException(nameof(config)); modelConfig.DType = torchDtype; - var phi = new Phi3ForCasualLM(modelConfig); + var phi = new Phi3ForCausalLM(modelConfig); phi.LoadSafeTensors(modelFolder, checkPointName); phi = phi.to(device); phi.eval(); @@ -67,7 +67,7 @@ public static Phi3ForCasualLM FromPretrained( return phi; } - public static Phi3ForCasualLM FromPretrained( + public static Phi3ForCausalLM FromPretrained( string modelFolder, string configName = "config.json", string checkPointName = "model.safetensors.index.json", @@ -87,7 +87,7 @@ public static Phi3ForCasualLM FromPretrained( var config = Path.Join(modelFolder, configName); var modelConfig = JsonSerializer.Deserialize(File.ReadAllText(config)) ?? throw new ArgumentNullException(nameof(config)); modelConfig.DType = torchDtype; - var model = new Phi3ForCasualLM(modelConfig); + var model = new Phi3ForCausalLM(modelConfig); if (quantizeToInt8) { @@ -105,7 +105,7 @@ public static Phi3ForCasualLM FromPretrained( ]); torch.set_default_device("cpu"); - model = new Phi3ForCasualLM(modelConfig); + model = new Phi3ForCausalLM(modelConfig); model.LoadSafeTensors(modelFolder, checkPointName); diff --git a/src/Microsoft.ML.GenAI.Phi/README.md b/src/Microsoft.ML.GenAI.Phi/README.md index 2daf51039e..0e0bbb4dda 100644 --- a/src/Microsoft.ML.GenAI.Phi/README.md +++ b/src/Microsoft.ML.GenAI.Phi/README.md @@ -25,7 +25,7 @@ git clone https://huggingface.co/microsoft/Phi-3-mini-4k-instruct var weightFolder = "/path/to/Phi-3-mini-4k-instruct"; var configName = "config.json"; var config = JsonSerializier.Deserialize(File.ReadAllText(Path.Combine(weightFolder, configName))); -var model = new Phi3ForCasualLM(config); +var model = new Phi3ForCausalLM(config); // load tokenizer var tokenizerModelName = "tokenizer.model"; diff --git a/src/Microsoft.ML.ImageAnalytics/Microsoft.ML.ImageAnalytics.csproj b/src/Microsoft.ML.ImageAnalytics/Microsoft.ML.ImageAnalytics.csproj index ad687a0ad0..4f4213309b 100644 --- a/src/Microsoft.ML.ImageAnalytics/Microsoft.ML.ImageAnalytics.csproj +++ b/src/Microsoft.ML.ImageAnalytics/Microsoft.ML.ImageAnalytics.csproj @@ -8,12 +8,11 @@ - - + + - - + diff --git a/src/Microsoft.ML.LightGbm/Microsoft.ML.LightGbm.csproj b/src/Microsoft.ML.LightGbm/Microsoft.ML.LightGbm.csproj index 7b63f306d5..408cfd6816 100644 --- a/src/Microsoft.ML.LightGbm/Microsoft.ML.LightGbm.csproj +++ b/src/Microsoft.ML.LightGbm/Microsoft.ML.LightGbm.csproj @@ -18,7 +18,7 @@ - + diff --git a/src/Microsoft.ML.Mkl.Components/Microsoft.ML.Mkl.Components.csproj b/src/Microsoft.ML.Mkl.Components/Microsoft.ML.Mkl.Components.csproj index ae356239ac..e828bf59ff 100644 --- a/src/Microsoft.ML.Mkl.Components/Microsoft.ML.Mkl.Components.csproj +++ b/src/Microsoft.ML.Mkl.Components/Microsoft.ML.Mkl.Components.csproj @@ -21,7 +21,7 @@ - + \ No newline at end of file diff --git a/src/Microsoft.ML.OnnxConverter/Microsoft.ML.OnnxConverter.csproj b/src/Microsoft.ML.OnnxConverter/Microsoft.ML.OnnxConverter.csproj index 6182288560..bfd73f5ad8 100644 --- a/src/Microsoft.ML.OnnxConverter/Microsoft.ML.OnnxConverter.csproj +++ b/src/Microsoft.ML.OnnxConverter/Microsoft.ML.OnnxConverter.csproj @@ -8,7 +8,7 @@ - + diff --git a/src/Microsoft.ML.OnnxTransformer/Microsoft.ML.OnnxTransformer.csproj b/src/Microsoft.ML.OnnxTransformer/Microsoft.ML.OnnxTransformer.csproj index 2b0aefc490..b49cb42b08 100644 --- a/src/Microsoft.ML.OnnxTransformer/Microsoft.ML.OnnxTransformer.csproj +++ b/src/Microsoft.ML.OnnxTransformer/Microsoft.ML.OnnxTransformer.csproj @@ -13,8 +13,8 @@ - - + + diff --git a/src/Microsoft.ML.Parquet/Microsoft.ML.Parquet.csproj b/src/Microsoft.ML.Parquet/Microsoft.ML.Parquet.csproj index df7c11bb8b..715a45860a 100644 --- a/src/Microsoft.ML.Parquet/Microsoft.ML.Parquet.csproj +++ b/src/Microsoft.ML.Parquet/Microsoft.ML.Parquet.csproj @@ -7,7 +7,7 @@ - + diff --git a/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj b/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj index 1698d12a23..f5c2037e4e 100644 --- a/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj +++ b/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj @@ -12,7 +12,7 @@ - + diff --git a/src/Microsoft.ML.SearchSpace/Microsoft.ML.SearchSpace.csproj b/src/Microsoft.ML.SearchSpace/Microsoft.ML.SearchSpace.csproj index adc0eb67eb..c0ba1d7f66 100644 --- a/src/Microsoft.ML.SearchSpace/Microsoft.ML.SearchSpace.csproj +++ b/src/Microsoft.ML.SearchSpace/Microsoft.ML.SearchSpace.csproj @@ -9,7 +9,7 @@ - + diff --git a/src/Microsoft.ML.TensorFlow/Microsoft.ML.TensorFlow.csproj b/src/Microsoft.ML.TensorFlow/Microsoft.ML.TensorFlow.csproj index d267d32217..00555d3950 100644 --- a/src/Microsoft.ML.TensorFlow/Microsoft.ML.TensorFlow.csproj +++ b/src/Microsoft.ML.TensorFlow/Microsoft.ML.TensorFlow.csproj @@ -9,10 +9,8 @@ - - - - + + diff --git a/src/Microsoft.ML.TensorFlow/TensorTypeExtensions.cs b/src/Microsoft.ML.TensorFlow/TensorTypeExtensions.cs index 330c398133..5742a5f8cb 100644 --- a/src/Microsoft.ML.TensorFlow/TensorTypeExtensions.cs +++ b/src/Microsoft.ML.TensorFlow/TensorTypeExtensions.cs @@ -25,7 +25,7 @@ public static void ToScalar(this Tensor tensor, ref T dst) where T : unmanage return; } - if (typeof(T).as_dtype() != tensor.dtype) + if (typeof(T).as_tf_dtype() != tensor.dtype) throw new NotSupportedException(); unsafe @@ -37,7 +37,7 @@ public static void ToScalar(this Tensor tensor, ref T dst) where T : unmanage public static void CopyTo(this Tensor tensor, Span values) where T : unmanaged { - if (typeof(T).as_dtype() != tensor.dtype) + if (typeof(T).as_tf_dtype() != tensor.dtype) throw new NotSupportedException(); unsafe diff --git a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs index fd556a175f..231b132297 100644 --- a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs +++ b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs @@ -16,8 +16,8 @@ using Microsoft.ML.Runtime; using Microsoft.ML.TensorFlow; using Microsoft.ML.Transforms; -using NumSharp; using Tensorflow; +using Tensorflow.NumPy; using static Microsoft.ML.TensorFlow.TensorFlowUtils; using static Tensorflow.Binding; using Utils = Microsoft.ML.Internal.Utilities.Utils; @@ -51,7 +51,7 @@ public sealed class TensorFlowTransformer : RowToRowTransformerBase, IDisposable internal readonly DataViewType[] OutputTypes; internal readonly TF_DataType[] TFOutputTypes; internal readonly TF_DataType[] TFInputTypes; - internal readonly TensorShape[] TFInputShapes; + internal readonly Shape[] TFInputShapes; internal readonly (Operation, int)[] TFInputOperations; internal readonly (Operation, int)[] TFOutputOperations; internal TF_Output[] TFInputNodes; @@ -212,14 +212,14 @@ internal TensorFlowTransformer(IHostEnvironment env, TensorFlowEstimator.Options env.CheckValue(options, nameof(options)); } - private static ITensorValueGetter CreateTensorValueGetter(DataViewRow input, bool isVector, int colIndex, TensorShape tfShape) + private static ITensorValueGetter CreateTensorValueGetter(DataViewRow input, bool isVector, int colIndex, Shape tfShape) { if (isVector) return new TensorValueGetterVec(input, colIndex, tfShape); return new TensorValueGetter(input, colIndex, tfShape); } - private static ITensorValueGetter CreateTensorValueGetter(DataViewRow input, TF_DataType tfType, bool isVector, int colIndex, TensorShape tfShape) + private static ITensorValueGetter CreateTensorValueGetter(DataViewRow input, TF_DataType tfType, bool isVector, int colIndex, Shape tfShape) { var type = Tf2MlNetType(tfType); return Utils.MarshalInvoke(CreateTensorValueGetter, type.RawType, input, isVector, colIndex, tfShape); @@ -230,7 +230,7 @@ private static ITensorValueGetter[] GetTensorValueGetters( int[] inputColIndices, bool[] isInputVector, TF_DataType[] tfInputTypes, - TensorShape[] tfInputShapes) + Shape[] tfInputShapes) { var srcTensorGetters = new ITensorValueGetter[inputColIndices.Length]; for (int i = 0; i < inputColIndices.Length; i++) @@ -331,10 +331,10 @@ private static (Operation, int) GetOperationFromName(string operation, Session s return (session.graph.OperationByName(operation), 0); } - internal static (TF_DataType[] tfInputTypes, TensorShape[] tfInputShapes, (Operation, int)[]) GetInputInfo(IHost host, Session session, string[] inputs, int batchSize = 1) + internal static (TF_DataType[] tfInputTypes, Shape[] tfInputShapes, (Operation, int)[]) GetInputInfo(IHost host, Session session, string[] inputs, int batchSize = 1) { var tfInputTypes = new TF_DataType[inputs.Length]; - var tfInputShapes = new TensorShape[inputs.Length]; + var tfInputShapes = new Shape[inputs.Length]; var tfInputOperations = new (Operation, int)[inputs.Length]; int index = 0; @@ -351,7 +351,7 @@ internal static (TF_DataType[] tfInputTypes, TensorShape[] tfInputShapes, (Opera throw host.ExceptParam(nameof(session), $"Input type '{tfInputType}' of input column '{input}' is not supported in TensorFlow"); tfInputTypes[index] = tfInputType; - tfInputShapes[index] = ((Tensor)inputTensor).TensorShape; + tfInputShapes[index] = ((Tensor)inputTensor).shape; tfInputOperations[index] = (inputTensor, inputTensorIndex); index++; } @@ -359,7 +359,7 @@ internal static (TF_DataType[] tfInputTypes, TensorShape[] tfInputShapes, (Opera return (tfInputTypes, tfInputShapes, tfInputOperations); } - internal static TensorShape GetTensorShape(TF_Output output, Graph graph, Status status = null) + internal static Shape GetTensorShape(TF_Output output, Graph graph, Status status = null) { if (graph == IntPtr.Zero) throw new ObjectDisposedException(nameof(graph)); @@ -370,12 +370,12 @@ internal static TensorShape GetTensorShape(TF_Output output, Graph graph, Status cstatus.Check(); if (n == -1) - return new TensorShape(new int[0]); + return new Shape(new int[0]); var dims = new long[n]; c_api.TF_GraphGetTensorShape(graph, output, dims, dims.Length, cstatus.Handle); cstatus.Check(); - return new TensorShape(dims.Select(x => (int)x).ToArray()); + return new Shape(dims.Select(x => (int)x).ToArray()); } internal static (TF_DataType[] tfOutputTypes, DataViewType[] outputTypes, (Operation, int)[]) GetOutputInfo(IHost host, Session session, string[] outputs, bool treatOutputAsBatched) @@ -402,7 +402,7 @@ internal static (TF_DataType[] tfOutputTypes, DataViewType[] outputTypes, (Opera // i.e. the first dimension (if unknown) is assumed to be batch dimension. // If there are other dimension that are unknown the transformer will return a variable length vector. // This is the work around in absence of reshape transformer. - var idims = shape.dims; + var idims = shape.dims.Select(x => checked((int)x)).ToArray(); int[] dims = idims; if (treatOutputAsBatched) @@ -517,7 +517,7 @@ public void Dispose() if (Session != null && Session != IntPtr.Zero) { - Session.close(); // invoked Dispose() + Session.Dispose(); } } finally @@ -536,7 +536,7 @@ private sealed class Mapper : MapperBase private readonly TensorFlowTransformer _parent; private readonly int[] _inputColIndices; private readonly bool[] _isInputVector; - private readonly TensorShape[] _fullySpecifiedShapes; + private readonly Shape[] _fullySpecifiedShapes; private readonly ConcurrentBag _runners; public Mapper(TensorFlowTransformer parent, DataViewSchema inputSchema) : @@ -546,7 +546,7 @@ public Mapper(TensorFlowTransformer parent, DataViewSchema inputSchema) : _parent = parent; _inputColIndices = new int[_parent.Inputs.Length]; _isInputVector = new bool[_parent.Inputs.Length]; - _fullySpecifiedShapes = new TensorShape[_parent.Inputs.Length]; + _fullySpecifiedShapes = new Shape[_parent.Inputs.Length]; for (int i = 0; i < _parent.Inputs.Length; i++) { if (!inputSchema.TryGetColumnIndex(_parent.Inputs[i], out _inputColIndices[i])) @@ -570,11 +570,11 @@ public Mapper(TensorFlowTransformer parent, DataViewSchema inputSchema) : { vecType = (VectorDataViewType)type; var colTypeDims = vecType.Dimensions.Select(dim => (int)dim).ToArray(); - _fullySpecifiedShapes[i] = new TensorShape(colTypeDims); + _fullySpecifiedShapes[i] = new Shape(colTypeDims); } else // for primitive type use default TensorShape - _fullySpecifiedShapes[i] = new TensorShape(); + _fullySpecifiedShapes[i] = new Shape(Array.Empty()); } else { @@ -582,7 +582,7 @@ public Mapper(TensorFlowTransformer parent, DataViewSchema inputSchema) : var colTypeDims = vecType.Dimensions.Select(dim => (int)dim).ToArray(); // If the column is one dimension we make sure that the total size of the TF shape matches. // Compute the total size of the known dimensions of the shape. - int valCount = 1; + long valCount = 1; int numOfUnkDim = 0; foreach (var s in shape) { @@ -592,7 +592,7 @@ public Mapper(TensorFlowTransformer parent, DataViewSchema inputSchema) : numOfUnkDim++; } // The column length should be divisible by this, so that the other dimensions can be integral. - int typeValueCount = type.GetValueCount(); + long typeValueCount = type.GetValueCount(); if (typeValueCount % valCount != 0) throw Contracts.Except($"Input shape mismatch: Input '{_parent.Inputs[i]}' has shape {originalShape.ToString()}, but input data is of length {typeValueCount}."); @@ -616,10 +616,10 @@ public Mapper(TensorFlowTransformer parent, DataViewSchema inputSchema) : throw Contracts.Except($"Input shape mismatch: Input '{_parent.Inputs[i]}' has shape {originalShape.ToString()}, but input data is of length {typeValueCount}."); // Fill in the unknown dimensions. - var l = new int[originalShapeNdim]; + var l = new long[originalShapeNdim]; for (int ishape = 0; ishape < originalShapeNdim; ishape++) - l[ishape] = originalShapeDims[ishape] == -1 ? (int)d : originalShapeDims[ishape]; - _fullySpecifiedShapes[i] = new TensorShape(l); + l[ishape] = originalShapeDims[ishape] == -1 ? (long)d : originalShapeDims[ishape]; + _fullySpecifiedShapes[i] = new Shape(l); } if (_parent._addBatchDimensionInput) @@ -627,11 +627,11 @@ public Mapper(TensorFlowTransformer parent, DataViewSchema inputSchema) : // ndim of default TensorShape is -1, make originDim to 0 in this case. // after addBatchDimension, input column will be changed: type -> type[] var originDim = _fullySpecifiedShapes[i].ndim < 0 ? 0 : _fullySpecifiedShapes[i].ndim; - var l = new int[originDim + 1]; + var l = new long[originDim + 1]; l[0] = 1; for (int ishape = 1; ishape < l.Length; ishape++) l[ishape] = _fullySpecifiedShapes[i].dims[ishape - 1]; - _fullySpecifiedShapes[i] = new TensorShape(l); + _fullySpecifiedShapes[i] = new Shape(l); } } @@ -720,7 +720,7 @@ private Delegate MakeGetter(DataViewRow input, int iinfo, ITensorValueGetter[ UpdateCacheIfNeeded(input.Position, srcTensorGetters, activeOutputColNames, outputCache); var tensor = outputCache.Outputs[_parent.Outputs[iinfo]]; - var tensorSize = tensor.TensorShape.dims.Where(x => x > 0).Aggregate((x, y) => x * y); + var tensorSize = tensor.shape.dims.Where(x => x > 0).Aggregate((x, y) => x * y); var editor = VBufferEditor.Create(ref dst, (int)tensorSize); FetchStringData(tensor, editor.Values); @@ -735,7 +735,7 @@ private Delegate MakeGetter(DataViewRow input, int iinfo, ITensorValueGetter[ UpdateCacheIfNeeded(input.Position, srcTensorGetters, activeOutputColNames, outputCache); var tensor = outputCache.Outputs[_parent.Outputs[iinfo]]; - var tensorSize = tensor.TensorShape.dims.Where(x => x > 0).Aggregate((x, y) => x * y); + var tensorSize = tensor.shape.dims.Where(x => x > 0).Aggregate((x, y) => x * y); var editor = VBufferEditor.Create(ref dst, (int)tensorSize); @@ -821,10 +821,10 @@ private class TensorValueGetter : ITensorValueGetter { private readonly ValueGetter _srcgetter; private readonly T[] _bufferedData; - private readonly TensorShape _tfShape; + private readonly Shape _tfShape; private int _position; - public TensorValueGetter(DataViewRow input, int colIndex, TensorShape tfShape) + public TensorValueGetter(DataViewRow input, int colIndex, Shape tfShape) { _srcgetter = input.GetGetter(input.Schema[colIndex]); _tfShape = tfShape; @@ -864,7 +864,7 @@ public Tensor GetBufferedBatchTensor() private class TensorValueGetterVec : ITensorValueGetter { private readonly ValueGetter> _srcgetter; - private readonly TensorShape _tfShape; + private readonly Shape _tfShape; private VBuffer _vBuffer; private T[] _denseData; private T[] _bufferedData; @@ -872,7 +872,7 @@ private class TensorValueGetterVec : ITensorValueGetter private readonly long[] _dims; private readonly long _bufferedDataSize; - public TensorValueGetterVec(DataViewRow input, int colIndex, TensorShape tfShape) + public TensorValueGetterVec(DataViewRow input, int colIndex, Shape tfShape) { _srcgetter = input.GetGetter>(input.Schema[colIndex]); _tfShape = tfShape; diff --git a/src/Microsoft.ML.TensorFlow/TensorflowUtils.cs b/src/Microsoft.ML.TensorFlow/TensorflowUtils.cs index faec243057..bf724ab9a8 100644 --- a/src/Microsoft.ML.TensorFlow/TensorflowUtils.cs +++ b/src/Microsoft.ML.TensorFlow/TensorflowUtils.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System; +using System.ComponentModel; using System.IO; using System.Linq; using System.Security.AccessControl; @@ -12,8 +13,8 @@ using Microsoft.ML.Runtime; using Microsoft.ML.TensorFlow; using Microsoft.ML.Transforms; -using NumSharp; using Tensorflow; +using Tensorflow.NumPy; using static Tensorflow.Binding; using Utils = Microsoft.ML.Internal.Utilities.Utils; @@ -77,9 +78,9 @@ internal static DataViewSchema GetModelSchema(IExceptionContext ectx, Graph grap } // Construct the final ML.NET type of a Tensorflow variable. - var tensorShape = op.output.TensorShape.dims; + var dimensions = op.output.shape.dims?.Select(x => checked((int)x))?.ToArray(); - if (tensorShape == null) + if (dimensions == null) { // primitive column type schemaBuilder.AddColumn(op.name, mlType, metadataBuilder.ToAnnotations()); @@ -88,15 +89,15 @@ internal static DataViewSchema GetModelSchema(IExceptionContext ectx, Graph grap { // vector column type DataViewType columnType = new VectorDataViewType(mlType); - if (!(Utils.Size(tensorShape) == 1 && tensorShape[0] <= 0) && - (Utils.Size(tensorShape) > 0 && tensorShape.Skip(1).All(x => x > 0))) + if (!(Utils.Size(dimensions) == 1 && dimensions[0] <= 0) && + (Utils.Size(dimensions) > 0 && dimensions.Skip(1).All(x => x > 0))) // treatOutputAsBatched == true means that if the first dimension is greater // than 0 we take the tensor shape as is. If the first value is less then 0, we treat it as the batch input so we can // ignore it for the shape of the ML.NET vector. I.E. if the input dimensions are [-1, 5], ML.NET will read the -1 as // batch input, and so the ML.NET data type will be a vector of length 5. if (treatOutputAsBatched) { - columnType = new VectorDataViewType(mlType, tensorShape[0] > 0 ? tensorShape : tensorShape.Skip(1).ToArray()); + columnType = new VectorDataViewType(mlType, dimensions[0] > 0 ? dimensions : dimensions.Skip(1).ToArray()); } // When treatOutputAsBatched is false, if the first value is less than 0 we want to set it to 0. TensorFlow // represents an unknown size as -1, but ML.NET represents it as 0 so we need to convert it. @@ -104,9 +105,9 @@ internal static DataViewSchema GetModelSchema(IExceptionContext ectx, Graph grap // data type will be a vector of 2 dimensions, where the first dimension is unknown and the second has a length of 5. else { - if (tensorShape[0] < 0) - tensorShape[0] = 0; - columnType = new VectorDataViewType(mlType, tensorShape); + if (dimensions[0] < 0) + dimensions[0] = 0; + columnType = new VectorDataViewType(mlType, dimensions); } schemaBuilder.AddColumn(op.name, columnType, metadataBuilder.ToAnnotations()); @@ -441,32 +442,32 @@ internal static bool IsTypeSupported(TF_DataType tfoutput) } } - internal static Tensor CastDataAndReturnAsTensor(T[] data, TensorShape tfShape) + internal static Tensor CastDataAndReturnAsTensor(T[] data, Shape tfShape) { var dims = tfShape.dims.Select(x => (long)x).ToArray(); if (typeof(T) == typeof(sbyte)) - return new Tensor((sbyte[])(object)data, dims, TF_DataType.TF_INT8); + return new Tensor((sbyte[])(object)data, dims); else if (typeof(T) == typeof(long)) - return new Tensor((long[])(object)data, dims, TF_DataType.TF_INT64); + return new Tensor((long[])(object)data, dims); else if (typeof(T) == typeof(Int32)) - return new Tensor((Int32[])(object)data, dims, TF_DataType.TF_INT32); + return new Tensor((Int32[])(object)data, dims); else if (typeof(T) == typeof(Int16)) - return new Tensor((Int16[])(object)data, dims, TF_DataType.TF_INT16); + return new Tensor((Int16[])(object)data, dims); else if (typeof(T) == typeof(byte)) - return new Tensor((byte[])(object)data, dims, TF_DataType.TF_UINT8); + return new Tensor((byte[])(object)data, dims); else if (typeof(T) == typeof(ulong)) - return new Tensor((ulong[])(object)data, dims, TF_DataType.TF_UINT64); + return new Tensor((ulong[])(object)data, dims); else if (typeof(T) == typeof(UInt32)) - return new Tensor((UInt32[])(object)data, dims, TF_DataType.TF_UINT32); + return new Tensor((UInt32[])(object)data, dims); else if (typeof(T) == typeof(UInt16)) - return new Tensor((UInt16[])(object)data, dims, TF_DataType.TF_UINT16); + return new Tensor((UInt16[])(object)data, dims); else if (typeof(T) == typeof(bool)) - return new Tensor((bool[])(object)data, dims, TF_DataType.TF_BOOL); + return new Tensor((bool[])(object)data, dims); else if (typeof(T) == typeof(float)) - return new Tensor((float[])(object)data, dims, TF_DataType.TF_FLOAT); + return new Tensor((float[])(object)data, dims); else if (typeof(T) == typeof(double)) - return new Tensor((double[])(object)data, dims, TF_DataType.TF_DOUBLE); + return new Tensor((double[])(object)data, dims); else if (typeof(T) == typeof(ReadOnlyMemory)) { string[] strings = new string[data.Length]; @@ -484,27 +485,30 @@ internal static Tensor CastDataAndReturnAsTensor(T[] data, TensorShape tfShap internal static Tensor CastDataAndReturnAsTensor(T data) { if (typeof(T) == typeof(sbyte)) - return new Tensor((sbyte)(object)data, TF_DataType.TF_INT8); + return new Tensor((sbyte)(object)data); else if (typeof(T) == typeof(long)) - return new Tensor((long)(object)data, TF_DataType.TF_INT64); + return new Tensor((long)(object)data); else if (typeof(T) == typeof(Int32)) - return new Tensor((Int32)(object)data, TF_DataType.TF_INT32); + return new Tensor((Int32)(object)data); else if (typeof(T) == typeof(Int16)) - return new Tensor((Int16)(object)data, TF_DataType.TF_INT16); + return new Tensor((Int16)(object)data); else if (typeof(T) == typeof(byte)) - return new Tensor((byte)(object)data, TF_DataType.TF_UINT8); + return new Tensor((byte)(object)data); else if (typeof(T) == typeof(ulong)) - return new Tensor((ulong)(object)data, TF_DataType.TF_UINT64); + return new Tensor((ulong)(object)data); else if (typeof(T) == typeof(UInt32)) - return new Tensor((UInt32)(object)data, TF_DataType.TF_UINT32); + return new Tensor((UInt32)(object)data); else if (typeof(T) == typeof(UInt16)) - return new Tensor((UInt16)(object)data, TF_DataType.TF_UINT16); +#pragma warning disable IDE0055 + // Tensorflow.NET v2.7 has no constructor for UInt16 so using the array version + return new Tensor(new UInt16[]{(UInt16)(object)data}); +#pragma warning restore IDE0055 else if (typeof(T) == typeof(bool)) - return new Tensor((bool)(object)data, TF_DataType.TF_BOOL); + return new Tensor((bool)(object)data); else if (typeof(T) == typeof(float)) - return new Tensor((float)(object)data, TF_DataType.TF_FLOAT); + return new Tensor((float)(object)data); else if (typeof(T) == typeof(double)) - return new Tensor((double)(object)data, TF_DataType.TF_DOUBLE); + return new Tensor((double)(object)data); else if (typeof(T) == typeof(ReadOnlyMemory)) return new Tensor(data.ToString()); @@ -556,7 +560,8 @@ public Runner AddInput(Tensor value, int index) { _inputTensors[index]?.Dispose(); _inputTensors[index] = value; - _inputValues[index] = value; + _inputValues[index] = value.Handle.DangerousGetHandle(); + return this; } @@ -613,7 +618,9 @@ public Tensor[] Run() _status.Check(true); for (int i = 0; i < _outputs.Length; i++) - _outputTensors[i] = new Tensor(_outputValues[i]); + { + _outputTensors[i] = new Tensor(new SafeTensorHandle(_outputValues[i])); + } return _outputTensors; } diff --git a/src/Microsoft.ML.Tokenizers/Microsoft.ML.Tokenizers.csproj b/src/Microsoft.ML.Tokenizers/Microsoft.ML.Tokenizers.csproj index 15166fdd1d..ffc5946c78 100644 --- a/src/Microsoft.ML.Tokenizers/Microsoft.ML.Tokenizers.csproj +++ b/src/Microsoft.ML.Tokenizers/Microsoft.ML.Tokenizers.csproj @@ -17,13 +17,13 @@ - + - - - + + + diff --git a/src/Microsoft.ML.Tokenizers/Model/BPETokenizer.cs b/src/Microsoft.ML.Tokenizers/Model/BPETokenizer.cs index 9874709eb7..b9592d2e2b 100644 --- a/src/Microsoft.ML.Tokenizers/Model/BPETokenizer.cs +++ b/src/Microsoft.ML.Tokenizers/Model/BPETokenizer.cs @@ -132,6 +132,11 @@ public static BpeTokenizer Create( return new BpeTokenizer(result.vocab, result.merges, preTokenizer, normalizer, specialTokens, unknownToken, continuingSubwordPrefix, endOfWordSuffix, fuseUnknownTokens); } + /// + /// Create a new Bpe tokenizer object to use for text encoding. + /// + /// The options used to create the Bpe tokenizer. + /// The Bpe tokenizer object. public static BpeTokenizer Create(BpeOptions options) { if (options is null) @@ -146,9 +151,9 @@ public static BpeTokenizer Create(BpeOptions options) Dictionary vocab = new Dictionary(1000); - foreach ((string token, int id) in options.Vocabulary) + foreach (KeyValuePair kvp in options.Vocabulary) { - vocab.Add(new StringSpanOrdinalKey(token), id); + vocab.Add(new StringSpanOrdinalKey(kvp.Key), kvp.Value); } if (vocab.Count == 0) @@ -315,7 +320,7 @@ private BpeTokenizer( if (beginningOfSentenceToken is not null) { - if (!_vocab.TryGetValue(beginningOfSentenceToken, out int aId)) + if (_vocab.TryGetValue(beginningOfSentenceToken, out int aId) is false && specialTokens?.TryGetValue(beginningOfSentenceToken, out aId) is false) { throw new InvalidOperationException($"The beginning of sentence token '{beginningOfSentenceToken}' was not present in the vocabulary."); } @@ -326,7 +331,7 @@ private BpeTokenizer( if (endOfSentenceToken is not null) { - if (!_vocab.TryGetValue(endOfSentenceToken, out int aId)) + if (_vocab.TryGetValue(endOfSentenceToken, out int aId) is false && specialTokens?.TryGetValue(endOfSentenceToken, out aId) is false) { throw new InvalidOperationException($"The end of sentence token '{endOfSentenceToken}' was not present in the vocabulary."); } @@ -395,7 +400,7 @@ private BpeTokenizer( /// /// Gets the optional beginning of sentence token. /// - internal string? BeginningOfSentenceToken { get; } + public string? BeginningOfSentenceToken { get; } /// /// The id of the beginning of sentence token. @@ -787,31 +792,30 @@ public string Decode(IEnumerable ids, bool considerSpecialTokens) ValueStringBuilder sb = new ValueStringBuilder(); - bool decodeUnknownToken = _unknownTokenId.HasValue && considerSpecialTokens; - - if (decodeUnknownToken) + foreach (int id in ids) { - foreach (int id in ids) + if (_specialTokensReverse?.TryGetValue(id, out string? token) is true) { - if (MapIdToToken(id) is string s) + if (considerSpecialTokens) { - sb.Append(s); + sb.Append(token); } + continue; } - } - else - { - foreach (int id in ids) + + if (id == _unknownTokenId) { - if (id == _unknownTokenId) + if (considerSpecialTokens) { - continue; + Debug.Assert(UnknownToken is not null); + sb.Append(UnknownToken); } + continue; + } - if (MapIdToToken(id) is string s) - { - sb.Append(s); - } + if (MapIdToToken(id) is string s) + { + sb.Append(s); } } diff --git a/src/Microsoft.ML.Tokenizers/Model/BpeOptions.cs b/src/Microsoft.ML.Tokenizers/Model/BpeOptions.cs index 94c3e0913b..8eee50ac66 100644 --- a/src/Microsoft.ML.Tokenizers/Model/BpeOptions.cs +++ b/src/Microsoft.ML.Tokenizers/Model/BpeOptions.cs @@ -4,6 +4,8 @@ using System; using System.Collections.Generic; +using System.IO; +using System.Text.Json; namespace Microsoft.ML.Tokenizers { @@ -15,7 +17,9 @@ public sealed class BpeOptions /// /// Initializes a new instance of the class. /// - public BpeOptions(IEnumerable<(string Token, int Id)> vocabulary) + /// The vocabulary to use. + /// Thrown when is null. + public BpeOptions(IEnumerable> vocabulary) { if (vocabulary == null) { @@ -25,10 +29,74 @@ public BpeOptions(IEnumerable<(string Token, int Id)> vocabulary) Vocabulary = vocabulary; } + /// + /// Initializes a new instance of the class. + /// + /// The JSON file path containing the dictionary of string keys and their ids. + /// The file path containing the tokens's pairs list. + public BpeOptions(string vocabFile, string? mergesFile = null) + { + if (vocabFile is null) + { + throw new ArgumentNullException(nameof(vocabFile)); + } + + if (!File.Exists(vocabFile)) + { + throw new ArgumentException($"Could not find the vocabulary file '{vocabFile}'."); + } + + using Stream vocabStream = File.OpenRead(vocabFile); + Dictionary? dictionary = JsonSerializer.Deserialize>(vocabStream); + + if (dictionary is null) + { + throw new InvalidOperationException($"The content of the vocabulary file '{vocabFile}' is not valid."); + } + + Vocabulary = dictionary; + + if (mergesFile is not null) + { + if (!File.Exists(mergesFile)) + { + throw new ArgumentException($"Could not find the merges file '{mergesFile}'."); + } + + using Stream mergesStream = File.OpenRead(mergesFile); + using StreamReader reader = new(mergesStream); + + List merges = new(); + + int lineNumber = 0; + string? line; + + while ((line = reader.ReadLine()) is not null) + { + lineNumber++; + if (line.StartsWith("#version", StringComparison.Ordinal) || line.Length == 0) + { + continue; + } + + // validate the merges format + int index = line.IndexOf(' '); + if (index < 0 || index == line.Length - 1 || line.IndexOf(' ', index + 1) >= 0) + { + throw new InvalidOperationException($"Invalid merge file format at line: {lineNumber}"); + } + + merges.Add(line); + } + + Merges = merges; + } + } + /// /// Gets or sets the vocabulary to use. /// - public IEnumerable<(string Token, int Id)> Vocabulary { get; } + public IEnumerable> Vocabulary { get; } /// /// Gets or sets the list of the merge strings used to merge tokens during encoding. @@ -38,7 +106,7 @@ public BpeOptions(IEnumerable<(string Token, int Id)> vocabulary) /// /// Gets or sets the optional special tokens to use. /// - public Dictionary? SpecialTokens { get; set; } + public IReadOnlyDictionary? SpecialTokens { get; set; } /// /// Gets or sets the optional normalizer to normalize the input text before encoding it. diff --git a/src/Microsoft.ML.Tokenizers/Model/SentencePieceTokenizer.cs b/src/Microsoft.ML.Tokenizers/Model/SentencePieceTokenizer.cs index f41516e270..cb945d24fa 100644 --- a/src/Microsoft.ML.Tokenizers/Model/SentencePieceTokenizer.cs +++ b/src/Microsoft.ML.Tokenizers/Model/SentencePieceTokenizer.cs @@ -436,7 +436,7 @@ public OperationStatus Decode(IEnumerable ids, Span destination, bool /// https://github.com/google/sentencepiece/blob/master/src/sentencepiece_model.proto. /// /// The stream containing the SentencePiece Bpe or Unigram model. - /// Indicate emitting the beginning of sentence token during the encoding. + /// Indicate emitting the beginning of sentence token during the encoding. /// Indicate emitting the end of sentence token during the encoding. /// The additional tokens to add to the vocabulary. /// @@ -444,7 +444,7 @@ public OperationStatus Decode(IEnumerable ids, Span destination, bool /// public static SentencePieceTokenizer Create( Stream modelStream, - bool addBeginOfSentence = true, + bool addBeginningOfSentence = true, bool addEndOfSentence = false, IReadOnlyDictionary? specialTokens = null) { @@ -455,7 +455,7 @@ public static SentencePieceTokenizer Create( throw new ArgumentNullException(nameof(modelProto)); } - return new SentencePieceTokenizer(modelProto, addBeginOfSentence, addEndOfSentence, specialTokens); + return new SentencePieceTokenizer(modelProto, addBeginningOfSentence, addEndOfSentence, specialTokens); } } } diff --git a/src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs b/src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs index 8fb73a5225..0b9e64cec9 100644 --- a/src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs +++ b/src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs @@ -1013,6 +1013,15 @@ public override OperationStatus Decode(IEnumerable ids, Span destinat private const string IMStart = "<|im_start|>"; private const string IMEnd = "<|im_end|>"; private const string IMSep = "<|im_sep|>"; + private const string StartOfText = "<|startoftext|>"; + private const string Return = "<|return|>"; + private const string Constrain = "<|constrain|>"; + private const string Channel = "<|channel|>"; + private const string Start = "<|start|>"; + private const string End = "<|end|>"; + private const string Message = "<|message|>"; + private const string Call = "<|call|>"; + private const string ReservedPrefix = "<|reserved_"; private enum ModelEncoding { @@ -1022,37 +1031,69 @@ private enum ModelEncoding P50kEdit, R50kBase, GPT2, - O200kBase + O200kBase, + O200kHarmony } private const string Phi4ModelName = "phi-4"; private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixToEncoding = [ - // chat ( "o1-", ModelEncoding.O200kBase ), // e.g. o1-mini ( "o3-", ModelEncoding.O200kBase ), // e.g. o3-mini + ( "o4-mini-", ModelEncoding.O200kBase ), // e.g. o4-mini + + // chat + ( "gpt-5-", ModelEncoding.O200kBase), + ( "gpt-4.1-", ModelEncoding.O200kBase), // e.g., gpt-4.1-mini + ( "gpt-4.5-", ModelEncoding.O200kBase), // e.g., gpt-4.5 ( "gpt-4o-", ModelEncoding.O200kBase), // e.g., gpt-4o-2024-05-13 + ( "chatgpt-4o-", ModelEncoding.O200kBase), ( "gpt-4-", ModelEncoding.Cl100kBase), // e.g., gpt-4-0314, etc., plus gpt-4-32k ( "gpt-3.5-", ModelEncoding.Cl100kBase), // e.g, gpt-3.5-turbo-0301, -0401, etc. - ( "gpt-35-", ModelEncoding.Cl100kBase ) // Azure deployment name + ( "gpt-35-", ModelEncoding.Cl100kBase ), // Azure deployment name + ( "gpt-oss-", ModelEncoding.O200kHarmony ), + + // fine-tuned + ( "ft:gpt-4o", ModelEncoding.O200kBase ), + ( "ft:gpt-4", ModelEncoding.Cl100kBase ), + ( "ft:gpt-3.5-turbo", ModelEncoding.Cl100kBase ), + ( "ft:davinci-002", ModelEncoding.Cl100kBase ), + ( "ft:babbage-002", ModelEncoding.Cl100kBase ), ]; private static readonly Dictionary _modelToEncoding = new Dictionary(StringComparer.OrdinalIgnoreCase) { - // chat - { "gpt-4o", ModelEncoding.O200kBase }, + // reasoning { "o1", ModelEncoding.O200kBase }, { "o3", ModelEncoding.O200kBase }, + { "o4-mini", ModelEncoding.O200kBase }, + + // chat + { "gpt-5", ModelEncoding.O200kBase }, + { "gpt-4.1", ModelEncoding.O200kBase }, + { "gpt-4o", ModelEncoding.O200kBase }, { "gpt-4", ModelEncoding.Cl100kBase }, { "gpt-3.5-turbo", ModelEncoding.Cl100kBase }, + { "gpt-3.5", ModelEncoding.Cl100kBase }, { "gpt-3.5-turbo-16k", ModelEncoding.Cl100kBase }, { "gpt-35", ModelEncoding.Cl100kBase }, // Azure deployment name { "gpt-35-turbo", ModelEncoding.Cl100kBase }, // Azure deployment name { "gpt-35-turbo-16k", ModelEncoding.Cl100kBase }, // Azure deployment name - // text + // Base + { "davinci-002", ModelEncoding.Cl100kBase }, + { "babbage-002", ModelEncoding.Cl100kBase }, + + // embeddings + // https://platform.openai.com/docs/guides/embeddings/what-are-embeddings + { "text-embedding-ada-002", ModelEncoding.Cl100kBase }, + { "text-embedding-3-small", ModelEncoding.Cl100kBase }, + { "text-embedding-3-large", ModelEncoding.Cl100kBase }, + + // DEPRECATED MODELS + // text (DEPRECATED) { "text-davinci-003", ModelEncoding.P50kBase }, { "text-davinci-002", ModelEncoding.P50kBase }, { "text-davinci-001", ModelEncoding.R50kBase }, @@ -1064,7 +1105,7 @@ private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixTo { "babbage", ModelEncoding.R50kBase }, { "ada", ModelEncoding.R50kBase }, - // code + // code (DEPRECATED) { "code-davinci-002", ModelEncoding.P50kBase }, { "code-davinci-001", ModelEncoding.P50kBase }, { "code-cushman-002", ModelEncoding.P50kBase }, @@ -1072,17 +1113,12 @@ private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixTo { "davinci-codex", ModelEncoding.P50kBase }, { "cushman-codex", ModelEncoding.P50kBase }, - // edit + // edit (DEPRECATED) { "text-davinci-edit-001", ModelEncoding.P50kEdit }, { "code-davinci-edit-001", ModelEncoding.P50kEdit }, - // embeddings - // https://platform.openai.com/docs/guides/embeddings/what-are-embeddings - { "text-embedding-ada-002", ModelEncoding.Cl100kBase }, - { "text-embedding-3-small", ModelEncoding.Cl100kBase }, - { "text-embedding-3-large", ModelEncoding.Cl100kBase }, - // old embeddings + // old embeddings (DEPRECATED) { "text-similarity-davinci-001", ModelEncoding.R50kBase }, { "text-similarity-curie-001", ModelEncoding.R50kBase }, { "text-similarity-babbage-001", ModelEncoding.R50kBase }, @@ -1096,6 +1132,7 @@ private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixTo // open source { "gpt2", ModelEncoding.GPT2 }, + { "gpt-2", ModelEncoding.GPT2 }, // phi-4 { Phi4ModelName, ModelEncoding.Cl100kBase }, @@ -1123,6 +1160,32 @@ private static ModelEncoding GetModelEncoding(string modelName) return encoder; } + private static Dictionary CreateHarmonyEncodingSpecialTokens() => + new Dictionary + { + { StartOfText, 199998 }, + { EndOfText, 199999 }, + { $"{ReservedPrefix}200000|>", 200000 }, + { $"{ReservedPrefix}200001|>", 200001 }, + { Return, 200002 }, + { Constrain, 200003 }, + { $"{ReservedPrefix}200004|>", 200004 }, + { Channel, 200005 }, + { Start, 200006 }, + { End, 200007 }, + { Message, 200008 }, + { $"{ReservedPrefix}200009|>", 200009 }, + { $"{ReservedPrefix}200010|>", 200010 }, + { $"{ReservedPrefix}200011|>", 200011 }, + { Call, 200012 }, + { $"{ReservedPrefix}200013|>", 200013 }, + { $"{ReservedPrefix}200014|>", 200014 }, + { $"{ReservedPrefix}200015|>", 200015 }, + { $"{ReservedPrefix}200016|>", 200016 }, + { $"{ReservedPrefix}200017|>", 200017 }, + { EndOfPrompt, 200018 }, + }; + private static (Dictionary SpecialTokens, Regex Regex, string VocabFile, Type? DataType, string PackageName) GetTiktokenConfigurations(string modelName) => GetTiktokenConfigurations(GetModelEncoding(modelName), modelName); private static (Dictionary SpecialTokens, Regex Regex, string VocabFile, Type? DataType, string PackageName) GetTiktokenConfigurations(ModelEncoding modelEncoding, string? modelName = null) @@ -1154,6 +1217,9 @@ private static (Dictionary SpecialTokens, Regex Regex, string Vocab case ModelEncoding.R50kBase: return (new Dictionary { { EndOfText, 50256 } }, P50kBaseRegex(), R50RanksFile, Type.GetType(R50kBaseTypeName), R50kBasePackageName); + case ModelEncoding.O200kHarmony: + return (CreateHarmonyEncodingSpecialTokens(), O200kBaseRegex(), O200kBaseFile, Type.GetType(O200kBaseTypeName), O200kBasePackageName); + default: throw new NotSupportedException($"The model '{modelName ?? modelEncoding.ToString()}' is not supported."); } @@ -1176,6 +1242,7 @@ private static (Dictionary SpecialTokens, Regex Regex, string Vocab internal const string P50kEditEncodingName = "p50k_edit"; internal const string R50kBaseEncodingName = "r50k_base"; internal const string O200kBaseEncodingName = "o200k_base"; + internal const string O200kHarmonyEncodingName = "o200k_harmony"; internal const string Cl100kBasePackageName = "Microsoft.ML.Tokenizers.Data.Cl100kBase"; internal const string Gpt2PackageName = "Microsoft.ML.Tokenizers.Data.Gpt2"; @@ -1471,6 +1538,10 @@ public static TiktokenTokenizer CreateForEncoding(string encodingName, IReadOnly { modelEncoding = ModelEncoding.O200kBase; } + else if (encodingName.Equals(O200kHarmonyEncodingName, StringComparison.OrdinalIgnoreCase)) + { + modelEncoding = ModelEncoding.O200kHarmony; + } else if (encodingName.Equals(P50kBaseEncodingName, StringComparison.OrdinalIgnoreCase)) { modelEncoding = ModelEncoding.P50kBase; diff --git a/src/Microsoft.ML.Tokenizers/PreTokenizer/CompositePreTokenizer.cs b/src/Microsoft.ML.Tokenizers/PreTokenizer/CompositePreTokenizer.cs index 5e1422bfab..5081296098 100644 --- a/src/Microsoft.ML.Tokenizers/PreTokenizer/CompositePreTokenizer.cs +++ b/src/Microsoft.ML.Tokenizers/PreTokenizer/CompositePreTokenizer.cs @@ -10,6 +10,8 @@ using System.Linq; using System.Text.RegularExpressions; +namespace Microsoft.ML.Tokenizers; + /// /// CompositePreTokenizer is a pre-tokenizer that applies multiple pre-tokenizers in sequence. /// diff --git a/src/Microsoft.ML.Tokenizers/Utils/DoubleArrayTrie.cs b/src/Microsoft.ML.Tokenizers/Utils/DoubleArrayTrie.cs index 8e1fafbd74..f0465e315d 100644 --- a/src/Microsoft.ML.Tokenizers/Utils/DoubleArrayTrie.cs +++ b/src/Microsoft.ML.Tokenizers/Utils/DoubleArrayTrie.cs @@ -20,7 +20,7 @@ namespace Microsoft.ML.Tokenizers // // Succinct bit vector. // - public class BitVector + internal class BitVector { private const int UnitSize = sizeof(uint) * 8; private readonly List _units = new(); @@ -279,7 +279,7 @@ internal struct DawgUnit // Directed Acyclic Word Graph (DAWG) builder. // - public class DawgBuilder + internal class DawgBuilder { private const int InitialTableSize = 1 << 10; diff --git a/src/Microsoft.ML.TorchSharp/Microsoft.ML.TorchSharp.csproj b/src/Microsoft.ML.TorchSharp/Microsoft.ML.TorchSharp.csproj index c347333d27..9d26ec2fdd 100644 --- a/src/Microsoft.ML.TorchSharp/Microsoft.ML.TorchSharp.csproj +++ b/src/Microsoft.ML.TorchSharp/Microsoft.ML.TorchSharp.csproj @@ -14,12 +14,11 @@ - - - - - - + + + + + diff --git a/src/Microsoft.ML.Transforms/CategoricalCatalog.cs b/src/Microsoft.ML.Transforms/CategoricalCatalog.cs index 7bbff58793..5d65724380 100644 --- a/src/Microsoft.ML.Transforms/CategoricalCatalog.cs +++ b/src/Microsoft.ML.Transforms/CategoricalCatalog.cs @@ -51,7 +51,7 @@ public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.Cate new[] { new OneHotEncodingEstimator.ColumnOptions(outputColumnName, inputColumnName, outputKind, maximumNumberOfKeys, keyOrdinality) }, keyData); /// - /// Create a , which converts one or more input text columns specified in + /// Creates a , which converts one or more input text columns specified in /// into as many columns of one-hot encoded vectors. /// /// If multiple columns are passed to the estimator, all of the columns will be processed in a single pass over the data. diff --git a/src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj b/src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj index 09fb537c0e..97fe2ea6a9 100644 --- a/src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj +++ b/src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj @@ -45,7 +45,7 @@ - + diff --git a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs index ffd2bbf8d9..2e714176be 100644 --- a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs +++ b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs @@ -667,7 +667,7 @@ private static ImmutableDictionary if (model is ITransformerChainAccessor chain) { - foreach (var transformer in chain.Transformers.Reverse()) + foreach (var transformer in ((IEnumerable)chain.Transformers).Reverse()) { if (transformer is ISingleFeaturePredictionTransformer singlePredictionTransformer) { diff --git a/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs b/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs index a3a90b0673..74ec426e9a 100644 --- a/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs +++ b/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs @@ -32,8 +32,8 @@ namespace Microsoft.ML.Transforms.Text public interface IStopWordsRemoverOptions { } /// - /// An estimator that turns a collection of text documents into numerical feature vectors. - /// The feature vectors are normalized counts of word and/or character n-grams (based on the options supplied). + /// Represents an estimator that turns a collection of text documents into numerical feature vectors. + /// The feature vectors are normalized counts of word or character n-grams (based on the options supplied). /// /// /// , where the estimator can be further tuned. /// - /// Check the See Also section for links to usage examples. + /// For links to usage examples, see and . /// ]]> /// /// @@ -67,7 +67,7 @@ public interface IStopWordsRemoverOptions { } public sealed class TextFeaturizingEstimator : IEstimator { /// - /// Text language. This enumeration is serialized. + /// Specifies text languages. This enumeration is serialized. /// public enum Language { @@ -81,7 +81,7 @@ public enum Language } /// - /// Text vector normalizer kind. + /// Specifies the kinds of text vector normalizers. /// public enum NormFunction { @@ -121,7 +121,7 @@ internal bool TryUnparse(StringBuilder sb) } /// - /// Advanced options for the . + /// Provides advanced options for the . /// public sealed class Options : TransformInputBase { @@ -140,18 +140,20 @@ public sealed class Options : TransformInputBase private IStopWordsRemoverOptions _stopWordsRemoverOptions; /// - /// Option to set type of stop word remover to use. + /// Gets or sets the type of stop word remover to use. + /// + /// /// The following options are available /// /// - /// The removes the language specific list of stop words from the input. + /// The removes the language-specific list of stop words from the input. /// /// - /// The uses user provided list of stop words. + /// The uses a user-provided list of stop words. /// /// /// Setting this to 'null' does not remove stop words from the input. - /// + /// public IStopWordsRemoverOptions StopWordsRemoverOptions { get { return _stopWordsRemoverOptions; } @@ -208,15 +210,17 @@ public IStopWordsRemoverOptions StopWordsRemoverOptions private WordBagEstimator.Options _wordFeatureExtractor; /// - /// Norm of the output vector. It will be normalized to one. + /// Gets the norm of the output vector. It will be normalized to one. /// [Argument(ArgumentType.AtMostOnce, HelpText = "Normalize vectors (rows) individually by rescaling them to unit norm.", Name = "VectorNormalizer", ShortName = "norm", SortOrder = 13)] public NormFunction Norm = NormFunction.L2; /// - /// Ngram feature extractor to use for words (WordBag/WordHashBag). - /// Set to to turn off n-gram generation for words. + /// Gets or sets the n-gram feature extractor to use for words (WordBag/WordHashBag). /// + /// + /// Set to to turn off n-gram generation for words. + /// public WordBagEstimator.Options WordFeatureExtractor { get { return _wordFeatureExtractor; } @@ -247,9 +251,11 @@ public WordBagEstimator.Options WordFeatureExtractor private WordBagEstimator.Options _charFeatureExtractor; /// - /// Ngram feature extractor to use for characters (WordBag/WordHashBag). - /// Set to to turn off n-gram generation for characters. + /// Gets or sets the n-gram feature extractor to use for characters (WordBag/WordHashBag). /// + /// + /// Set to to turn off n-gram generation for characters. + /// public WordBagEstimator.Options CharFeatureExtractor { get { return _charFeatureExtractor; } @@ -625,9 +631,11 @@ private static string GenerateColumnName(DataViewSchema schema, string srcName, } /// - /// Returns the of the schema which will be produced by the transformer. - /// Used for schema propagation and verification in a pipeline. + /// Returns the of the schema that will be produced by the transformer. /// + /// + /// This method is used for schema propagation and verification in a pipeline. + /// public SchemaShape GetOutputSchema(SchemaShape inputSchema) { _host.CheckValue(inputSchema, nameof(inputSchema)); diff --git a/src/Microsoft.ML.Vision/DnnRetrainTransform.cs b/src/Microsoft.ML.Vision/DnnRetrainTransform.cs index d172633057..b0fce6146c 100644 --- a/src/Microsoft.ML.Vision/DnnRetrainTransform.cs +++ b/src/Microsoft.ML.Vision/DnnRetrainTransform.cs @@ -15,7 +15,6 @@ using Microsoft.ML.Runtime; using Microsoft.ML.TensorFlow; using Microsoft.ML.Transforms; -using NumSharp; using Tensorflow; using static Microsoft.ML.TensorFlow.TensorFlowUtils; using static Tensorflow.Binding; @@ -50,7 +49,7 @@ internal sealed class DnnRetrainTransformer : RowToRowTransformerBase, IDisposab private readonly DataViewType[] _outputTypes; private readonly TF_DataType[] _tfOutputTypes; private readonly TF_DataType[] _tfInputTypes; - private readonly TensorShape[] _tfInputShapes; + private readonly Shape[] _tfInputShapes; private readonly (Operation, int)[] _tfInputOperations; private readonly (Operation, int)[] _tfOutputOperations; private readonly TF_Output[] _tfInputNodes; @@ -225,7 +224,7 @@ private void CheckTrainingParameters(DnnRetrainEstimator.Options options) } } - private (int, bool, TF_DataType, TensorShape) GetTrainingInputInfo(DataViewSchema inputSchema, string columnName, string tfNodeName, int batchSize) + private (int, bool, TF_DataType, Shape) GetTrainingInputInfo(DataViewSchema inputSchema, string columnName, string tfNodeName, int batchSize) { if (!inputSchema.TryGetColumnIndex(columnName, out int inputColIndex)) throw Host.Except($"Column {columnName} doesn't exist"); @@ -237,7 +236,7 @@ private void CheckTrainingParameters(DnnRetrainEstimator.Options options) var tfInput = new TF_Input(inputTensor, index); var tfInputType = inputTensor.OpType == "Placeholder" ? inputTensor.OutputType(index) : inputTensor.InputType(index); - var tfInputShape = ((Tensor)inputTensor).TensorShape; + var tfInputShape = ((Tensor)inputTensor).shape; var numInputDims = tfInputShape != null ? tfInputShape.ndim : -1; if (isInputVector && (tfInputShape == null || (numInputDims == 0))) @@ -248,17 +247,17 @@ private void CheckTrainingParameters(DnnRetrainEstimator.Options options) for (int indexLocal = 0; indexLocal < vecType.Dimensions.Length; indexLocal += 1) colTypeDims[indexLocal + 1] = vecType.Dimensions[indexLocal]; - tfInputShape = new TensorShape(colTypeDims); + tfInputShape = new Shape(colTypeDims); } if (numInputDims != -1) { - var newShape = new int[numInputDims]; + var newShape = new long[numInputDims]; var dims = tfInputShape.dims; newShape[0] = dims[0] == 0 || dims[0] == -1 ? batchSize : dims[0]; for (int j = 1; j < numInputDims; j++) newShape[j] = dims[j]; - tfInputShape = new TensorShape(newShape); + tfInputShape = new Shape(newShape); } var expectedType = Tf2MlNetType(tfInputType); @@ -278,7 +277,7 @@ private void TrainCore(DnnRetrainEstimator.Options options, IDataView input, IDa var inputColIndices = new int[inputsForTraining.Length]; var isInputVector = new bool[inputsForTraining.Length]; var tfInputTypes = new TF_DataType[inputsForTraining.Length]; - var tfInputShapes = new TensorShape[inputsForTraining.Length]; + var tfInputShapes = new Shape[inputsForTraining.Length]; for (int i = 0; i < _inputs.Length; i++) inputsForTraining[i] = _idvToTfMapping[_inputs[i]]; @@ -382,13 +381,13 @@ private void TrainCore(DnnRetrainEstimator.Options options, IDataView input, IDa runner.AddInput(srcTensorGetters[i].GetBufferedBatchTensor(), i + 1); Tensor[] tensor = runner.Run(); - if (tensor.Length > 0 && tensor[0] != IntPtr.Zero) + if (tensor.Length > 0 && tensor[0].TensorDataPointer != IntPtr.Zero) { tensor[0].ToScalar(ref loss); tensor[0].Dispose(); } - if (tensor.Length > 1 && tensor[1] != IntPtr.Zero) + if (tensor.Length > 1 && tensor[1].TensorDataPointer != IntPtr.Zero) { tensor[1].ToScalar(ref metric); tensor[1].Dispose(); @@ -460,14 +459,14 @@ private void UpdateModelOnDisk(string modelDir, DnnRetrainEstimator.Options opti } } - private static ITensorValueGetter CreateTensorValueGetter(DataViewRow input, bool isVector, int colIndex, TensorShape tfShape, bool keyType = false) + private static ITensorValueGetter CreateTensorValueGetter(DataViewRow input, bool isVector, int colIndex, Shape tfShape, bool keyType = false) { if (isVector) return new TensorValueGetterVec(input, colIndex, tfShape); return new TensorValueGetter(input, colIndex, tfShape, keyType); } - private static ITensorValueGetter CreateTensorValueGetter(DataViewRow input, TF_DataType tfType, bool isVector, int colIndex, TensorShape tfShape) + private static ITensorValueGetter CreateTensorValueGetter(DataViewRow input, TF_DataType tfType, bool isVector, int colIndex, Shape tfShape) { var type = Tf2MlNetType(tfType); if (input.Schema[colIndex].Type is KeyDataViewType && type.RawType == typeof(Int64)) @@ -481,7 +480,7 @@ private static ITensorValueGetter[] GetTensorValueGetters( int[] inputColIndices, bool[] isInputVector, TF_DataType[] tfInputTypes, - TensorShape[] tfInputShapes) + Shape[] tfInputShapes) { var srcTensorGetters = new ITensorValueGetter[inputColIndices.Length]; for (int i = 0; i < inputColIndices.Length; i++) @@ -574,10 +573,10 @@ private static (Operation, int) GetOperationFromName(string operation, Session s return (session.graph.OperationByName(operation), 0); } - internal static (TF_DataType[] tfInputTypes, TensorShape[] tfInputShapes, (Operation, int)[]) GetInputInfo(IHost host, Session session, string[] inputs, int batchSize = 1) + internal static (TF_DataType[] tfInputTypes, Shape[] tfInputShapes, (Operation, int)[]) GetInputInfo(IHost host, Session session, string[] inputs, int batchSize = 1) { var tfInputTypes = new TF_DataType[inputs.Length]; - var tfInputShapes = new TensorShape[inputs.Length]; + var tfInputShapes = new Shape[inputs.Length]; var tfInputOperations = new (Operation, int)[inputs.Length]; int index = 0; @@ -594,7 +593,7 @@ internal static (TF_DataType[] tfInputTypes, TensorShape[] tfInputShapes, (Opera throw host.ExceptParam(nameof(session), $"Input type '{tfInputType}' of input column '{input}' is not supported in TensorFlow"); tfInputTypes[index] = tfInputType; - tfInputShapes[index] = ((Tensor)inputTensor).TensorShape; + tfInputShapes[index] = ((Tensor)inputTensor).shape; tfInputOperations[index] = (inputTensor, inputTensorIndex); index++; } @@ -602,7 +601,7 @@ internal static (TF_DataType[] tfInputTypes, TensorShape[] tfInputShapes, (Opera return (tfInputTypes, tfInputShapes, tfInputOperations); } - internal static TensorShape GetTensorShape(TF_Output output, Graph graph, Status status = null) + internal static Shape GetTensorShape(TF_Output output, Graph graph, Status status = null) { if (graph == IntPtr.Zero) throw new ObjectDisposedException(nameof(graph)); @@ -613,12 +612,12 @@ internal static TensorShape GetTensorShape(TF_Output output, Graph graph, Status cstatus.Check(); if (n == -1) - return new TensorShape(new int[0]); + return new Shape(new int[0]); var dims = new long[n]; c_api.TF_GraphGetTensorShape(graph, output, dims, dims.Length, cstatus.Handle); cstatus.Check(); - return new TensorShape(dims.Select(x => (int)x).ToArray()); + return new Shape(dims.Select(x => (int)x).ToArray()); } internal static (TF_DataType[] tfOutputTypes, DataViewType[] outputTypes, (Operation, int)[]) GetOutputInfo(IHost host, Session session, string[] outputs) @@ -645,12 +644,12 @@ internal static (TF_DataType[] tfOutputTypes, DataViewType[] outputTypes, (Opera // i.e. the first dimension (if unknown) is assumed to be batch dimension. // If there are other dimension that are unknown the transformer will return a variable length vector. // This is the work around in absence of reshape transformer. - int[] dims = shape.ndim > 0 ? shape.dims.Skip(shape.dims[0] == -1 ? 1 : 0).ToArray() : new[] { 0 }; + int[] dims = shape.ndim > 0 ? shape.dims.Skip(shape.dims[0] == -1 ? 1 : 0).Select(x => checked((int)x)).ToArray() : new int[] { 0 }; for (int j = 0; j < dims.Length; j++) dims[j] = dims[j] == -1 ? 0 : dims[j]; if (dims == null || dims.Length == 0) { - dims = new[] { 1 }; + dims = new int[] { 1 }; outputTypes[i] = Tf2MlNetType(tfOutputType); } else @@ -741,7 +740,7 @@ public void Dispose() { if (_session.graph != null) _session.graph.Dispose(); - _session.close(); + _session.Dispose(); } } finally @@ -760,7 +759,7 @@ private sealed class Mapper : MapperBase private readonly DnnRetrainTransformer _parent; private readonly int[] _inputColIndices; private readonly bool[] _isInputVector; - private readonly TensorShape[] _fullySpecifiedShapes; + private readonly Shape[] _fullySpecifiedShapes; private readonly ConcurrentBag _runners; public Mapper(DnnRetrainTransformer parent, DataViewSchema inputSchema) : @@ -770,7 +769,7 @@ public Mapper(DnnRetrainTransformer parent, DataViewSchema inputSchema) : _parent = parent; _inputColIndices = new int[_parent._inputs.Length]; _isInputVector = new bool[_parent._inputs.Length]; - _fullySpecifiedShapes = new TensorShape[_parent._inputs.Length]; + _fullySpecifiedShapes = new Shape[_parent._inputs.Length]; for (int i = 0; i < _parent._inputs.Length; i++) { if (!inputSchema.TryGetColumnIndex(_parent._inputs[i], out _inputColIndices[i])) @@ -792,12 +791,12 @@ public Mapper(DnnRetrainTransformer parent, DataViewSchema inputSchema) : var colTypeDims = vecType.Dimensions.Select(dim => (int)dim).ToArray(); if (shape == null || (shape.Length == 0)) - _fullySpecifiedShapes[i] = new TensorShape(colTypeDims); + _fullySpecifiedShapes[i] = new Shape(colTypeDims); else { // If the column is one dimension we make sure that the total size of the TF shape matches. // Compute the total size of the known dimensions of the shape. - int valCount = 1; + long valCount = 1; int numOfUnkDim = 0; foreach (var s in shape) { @@ -821,19 +820,19 @@ public Mapper(DnnRetrainTransformer parent, DataViewSchema inputSchema) : // Fill in the unknown dimensions. var originalShapeDims = originalShape.dims; var originalShapeNdim = originalShape.ndim; - var l = new int[originalShapeNdim]; + var l = new long[originalShapeNdim]; for (int ishape = 0; ishape < originalShapeNdim; ishape++) l[ishape] = originalShapeDims[ishape] == -1 ? (int)d : originalShapeDims[ishape]; - _fullySpecifiedShapes[i] = new TensorShape(l); + _fullySpecifiedShapes[i] = new Shape(l); } if (_parent._addBatchDimensionInput) { - var l = new int[_fullySpecifiedShapes[i].ndim + 1]; + var l = new long[_fullySpecifiedShapes[i].ndim + 1]; l[0] = 1; for (int ishape = 1; ishape < l.Length; ishape++) l[ishape] = _fullySpecifiedShapes[i].dims[ishape - 1]; - _fullySpecifiedShapes[i] = new TensorShape(l); + _fullySpecifiedShapes[i] = new Shape(l); } } @@ -891,7 +890,7 @@ private Delegate MakeGetter(DataViewRow input, int iinfo, ITensorValueGetter[ UpdateCacheIfNeeded(input.Position, srcTensorGetters, activeOutputColNames, outputCache); var tensor = outputCache.Outputs[_parent._outputs[iinfo]]; - var tensorSize = tensor.TensorShape.dims.Where(x => x > 0).Aggregate((x, y) => x * y); + var tensorSize = tensor.shape.dims.Where(x => x > 0).Aggregate((x, y) => x * y); var editor = VBufferEditor.Create(ref dst, (int)tensorSize); FetchStringData(tensor, editor.Values); @@ -906,7 +905,7 @@ private Delegate MakeGetter(DataViewRow input, int iinfo, ITensorValueGetter[ UpdateCacheIfNeeded(input.Position, srcTensorGetters, activeOutputColNames, outputCache); var tensor = outputCache.Outputs[_parent._outputs[iinfo]]; - var tensorSize = tensor.TensorShape.dims.Where(x => x > 0).Aggregate((x, y) => x * y); + var tensorSize = tensor.shape.dims.Where(x => x > 0).Aggregate((x, y) => x * y); var editor = VBufferEditor.Create(ref dst, (int)tensorSize); @@ -972,12 +971,12 @@ private class TensorValueGetter : ITensorValueGetter private readonly ValueGetter _srcgetter; private readonly T[] _bufferedData; private readonly Int64[] _bufferedDataLong; - private readonly TensorShape _tfShape; + private readonly Shape _tfShape; private int _position; private readonly bool _keyType; private readonly long[] _dims; - public TensorValueGetter(DataViewRow input, int colIndex, TensorShape tfShape, bool keyType = false) + public TensorValueGetter(DataViewRow input, int colIndex, Shape tfShape, bool keyType = false) { _srcgetter = input.GetGetter(input.Schema[colIndex]); _tfShape = tfShape; @@ -1035,7 +1034,7 @@ public Tensor GetBufferedBatchTensor() { if (_keyType) { - var tensor = new Tensor(_bufferedDataLong, _dims, TF_DataType.TF_INT64); + var tensor = new Tensor(_bufferedDataLong, _dims); _position = 0; return tensor; } @@ -1051,7 +1050,7 @@ public Tensor GetBufferedBatchTensor() private class TensorValueGetterVec : ITensorValueGetter { private readonly ValueGetter> _srcgetter; - private readonly TensorShape _tfShape; + private readonly Shape _tfShape; private VBuffer _vBuffer; private T[] _denseData; private T[] _bufferedData; @@ -1059,7 +1058,7 @@ private class TensorValueGetterVec : ITensorValueGetter private readonly long[] _dims; private readonly long _bufferedDataSize; - public TensorValueGetterVec(DataViewRow input, int colIndex, TensorShape tfShape) + public TensorValueGetterVec(DataViewRow input, int colIndex, Shape tfShape) { _srcgetter = input.GetGetter>(input.Schema[colIndex]); _tfShape = tfShape; diff --git a/src/Microsoft.ML.Vision/ImageClassificationTrainer.cs b/src/Microsoft.ML.Vision/ImageClassificationTrainer.cs index 846de00518..9e6e2985e0 100644 --- a/src/Microsoft.ML.Vision/ImageClassificationTrainer.cs +++ b/src/Microsoft.ML.Vision/ImageClassificationTrainer.cs @@ -763,23 +763,7 @@ private void CheckTrainingParameters(Options options) private static Tensor EncodeByteAsString(VBuffer buffer) { - int length = buffer.Length; - var size = c_api.TF_StringEncodedSize((ulong)length); - var handle = c_api.TF_AllocateTensor(TF_DataType.TF_STRING, Array.Empty(), 0, ((ulong)size + 8)); - - IntPtr tensor = c_api.TF_TensorData(handle); - Marshal.WriteInt64(tensor, 0); - - var status = new Status(); - unsafe - { - fixed (byte* src = buffer.GetValues()) - c_api.TF_StringEncode(src, (ulong)length, (byte*)(tensor + sizeof(Int64)), size, status.Handle); - } - - status.Check(true); - status.Dispose(); - return new Tensor(handle); + return StringTensorFactory.CreateStringTensor(buffer.DenseValues().ToArray()); } internal sealed class ImageProcessor @@ -976,8 +960,8 @@ private void TrainAndEvaluateClassificationLayer(string trainBottleneckFilePath, metrics.Train = new TrainMetrics(); float accuracy = 0; float crossentropy = 0; - var labelTensorShape = _labelTensor.TensorShape.dims.Select(x => (long)x).ToArray(); - var featureTensorShape = _bottleneckInput.TensorShape.dims.Select(x => (long)x).ToArray(); + var labelTensorShape = _labelTensor.shape.dims.Select(x => (long)x).ToArray(); + var featureTensorShape = _bottleneckInput.shape.dims.Select(x => (long)x).ToArray(); byte[] buffer = new byte[sizeof(int)]; trainSetFeatureReader.ReadExactly(buffer, 0, 4); int trainingExamples = BitConverter.ToInt32(buffer, 0); @@ -1119,12 +1103,12 @@ private void TrainAndEvaluateClassificationLayerCore(int epoch, float learningRa { // Add learning rate as a placeholder only when learning rate scheduling is used. metrics.Train.LearningRate = learningRateScheduler.GetLearningRate(trainState); - runner.AddInput(new Tensor(metrics.Train.LearningRate, TF_DataType.TF_FLOAT), 2); + runner.AddInput(new Tensor(metrics.Train.LearningRate), 2); } - var outputTensors = runner.AddInput(new Tensor(featureBufferPtr, featureTensorShape, TF_DataType.TF_FLOAT, featuresFileBytesRead), 0) - .AddInput(new Tensor(labelBufferPtr, labelTensorShape, TF_DataType.TF_INT64, labelFileBytesRead), 1) - .Run(); + var outputTensors = runner.AddInput(new Tensor(featureBufferPtr, featureTensorShape, TF_DataType.TF_FLOAT), 0) + .AddInput(new Tensor(labelBufferPtr, labelTensorShape, TF_DataType.TF_INT64), 1) + .Run(); metrics.Train.BatchProcessedCount += 1; metricsAggregator(outputTensors, metrics); @@ -1186,7 +1170,7 @@ private void TryCleanupTemporaryWorkspace() { tf_with(tf.name_scope("correct_prediction"), delegate { - _prediction = tf.argmax(resultTensor, 1); + _prediction = tf.math.argmax(resultTensor, 1); correctPrediction = tf.equal(_prediction, groundTruthTensor); }); @@ -1240,7 +1224,7 @@ private void VariableSummaries(ResourceVariable var) string scoreColumnName, Tensor bottleneckTensor, bool isTraining, bool useLearningRateScheduler, float learningRate) { - var bottleneckTensorDims = bottleneckTensor.TensorShape.dims; + var bottleneckTensorDims = bottleneckTensor.shape.dims.Select(x => checked((int)x)).ToArray(); var (batch_size, bottleneck_tensor_size) = (bottleneckTensorDims[0], bottleneckTensorDims[1]); tf_with(tf.name_scope("input"), scope => { @@ -1254,7 +1238,7 @@ private void VariableSummaries(ResourceVariable var) _learningRateInput = tf.placeholder(tf.float32, null, name: "learningRateInputPlaceholder"); } - _labelTensor = tf.placeholder(tf.int64, new TensorShape(batch_size), name: labelColumn); + _labelTensor = tf.placeholder(tf.int64, new Shape(batch_size), name: labelColumn); }); string layerName = "final_retrain_ops"; @@ -1274,7 +1258,7 @@ private void VariableSummaries(ResourceVariable var) ResourceVariable layerBiases = null; tf_with(tf.name_scope("biases"), delegate { - TensorShape shape = new TensorShape(classCount); + Shape shape = new Shape(classCount); layerBiases = tf.Variable(tf.zeros(shape), name: "final_biases"); VariableSummaries(layerBiases); }); @@ -1514,10 +1498,94 @@ public void Dispose() if (_session != null && _session != IntPtr.Zero) { - _session.close(); + _session.Dispose(); } _isDisposed = true; } } + +#pragma warning disable MSML_GeneralName +#pragma warning disable MSML_ParameterLocalVarName +#pragma warning disable IDE0055 + public class StringTensorFactory + { + // Define TF_TString struct + [StructLayout(LayoutKind.Sequential)] + struct TF_TString + { + public IntPtr data; + public UIntPtr length; + public UIntPtr capacity; + public int memory_type; + } + + // Import TF_TString methods from TensorFlow C API + [DllImport("tensorflow", CallingConvention = CallingConvention.Cdecl)] + private static extern unsafe void TF_StringInit(TF_TString* tstring); + + [DllImport("tensorflow", CallingConvention = CallingConvention.Cdecl)] + private static extern unsafe void TF_StringCopy(TF_TString* dst, byte* src, UIntPtr size); + + [DllImport("tensorflow", CallingConvention = CallingConvention.Cdecl)] + private static extern unsafe void TF_StringDealloc(TF_TString* tstring); + + private static readonly TF_Deallocator _deallocatorInstance = new StringTensorFactory.TF_Deallocator(Deallocator); + + // Delegate for TensorFlow deallocator + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + public delegate void TF_Deallocator(IntPtr data, UIntPtr length, IntPtr arg); + + // Deallocator function + public static void Deallocator(IntPtr data, UIntPtr length, IntPtr arg) + { + unsafe + { + TF_StringDealloc((TF_TString*)data); + } + Marshal.FreeHGlobal(data); + } + + public static Tensor CreateStringTensor(byte[] data) + { + int sizeOfTString = Marshal.SizeOf(); + + // Allocate memory for TF_TString + IntPtr tstringPtr = Marshal.AllocHGlobal(sizeOfTString); + unsafe + { + TF_TString* tstring = (TF_TString*)tstringPtr; + TF_StringInit(tstring); + + fixed (byte* src = data) + { + TF_StringCopy(tstring, src, (UIntPtr)data.Length); + } + } + + // Create a scalar tensor (rank 0, so no shape dims) + Tensor tensor = new Tensor(new SafeTensorHandle(TF_NewTensor( + TF_DataType.TF_STRING, + Array.Empty(), + 0, + tstringPtr, + (UIntPtr)sizeOfTString, + _deallocatorInstance, + IntPtr.Zero + ))); + + return tensor; + } + + [DllImport("tensorflow", CallingConvention = CallingConvention.Cdecl)] + private static extern IntPtr TF_NewTensor( + TF_DataType dtype, + long[] dims, int num_dims, + IntPtr data, UIntPtr len, + TF_Deallocator deallocator, + IntPtr deallocator_arg); + } +#pragma warning restore MSML_GeneralName +#pragma warning restore MSML_ParameterLocalVarName +#pragma warning restore IDE0055 } diff --git a/src/Microsoft.ML/Microsoft.ML.csproj b/src/Microsoft.ML/Microsoft.ML.csproj index d16a22cd74..2d0022e1ce 100644 --- a/src/Microsoft.ML/Microsoft.ML.csproj +++ b/src/Microsoft.ML/Microsoft.ML.csproj @@ -25,12 +25,11 @@ - - - - - - + + + + + diff --git a/src/Native/CMakeLists.txt b/src/Native/CMakeLists.txt index 50e94feef6..0082d0dcd5 100644 --- a/src/Native/CMakeLists.txt +++ b/src/Native/CMakeLists.txt @@ -13,6 +13,9 @@ set(RESOURCES) # Include 'bin/obj' dir since it contains _version.h include_directories("$ENV{__IntermediatesDir}") +# Define path to native source link file +set(NATIVE_SOURCELINK_FILE_PATH "$ENV{__IntermediatesDir}/native.sourcelink.json") + if(WIN32) add_definitions(-DWIN32) add_definitions(-D_WIN32=1) @@ -22,11 +25,9 @@ if(WIN32) endif() add_compile_options($<$:-DDEBUG>) add_compile_options($<$:-DNDEBUG>) - add_compile_options($<$:-DNDEBUG>) add_compile_options($<$:/Od>) add_compile_options($<$:/MTd>) # /MT will static link the VC runtime library, so it doesn't need to be installed on the target machine add_compile_options($<$:/MT>) - add_compile_options($<$:/MT>) add_compile_options(/guard:cf) add_compile_options(/Zo) # make optimized builds debugging easier. /Zo is the newer documented flag. add_compile_options(/nologo) # Suppress Startup Banner @@ -41,6 +42,11 @@ if(WIN32) add_compile_options(/Zc:inline) add_compile_options(/fp:precise) add_compile_options(/EHsc) + add_compile_options(/Brepro) + add_compile_options(/d1nodatetime) + add_compile_options(/experimental:deterministic) + add_compile_options(/GL) + add_compile_options(/d2CastGuardFailureMode:fastfail) # From here below are warnings required to be explicitly enabled. add_compile_options(/w34242) @@ -62,8 +68,14 @@ if(WIN32) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /DEBUG /PDBCOMPRESS") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /STACK:1572864") - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /guard:cf") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /guard:cf") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /guard:cf /Brepro") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /guard:cf /Brepro") + + # Enable native source link if the source link file exists + if(EXISTS ${NATIVE_SOURCELINK_FILE_PATH}) + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /sourcelink:${NATIVE_SOURCELINK_FILE_PATH}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /sourcelink:${NATIVE_SOURCELINK_FILE_PATH}") + endif(EXISTS ${NATIVE_SOURCELINK_FILE_PATH}) # Debug build specific flags set(CMAKE_SHARED_LINKER_FLAGS_DEBUG "/NOVCFEATURE") @@ -71,18 +83,12 @@ if(WIN32) set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} /NODEFAULTLIB:vcompd.lib /DEFAULTLIB:vcomp.lib") # Release build specific flags - set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF") - set(CMAKE_STATIC_LINKER_FLAGS_RELEASE "${CMAKE_STATIC_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF") - set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF") + set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF /LTCG") + set(CMAKE_STATIC_LINKER_FLAGS_RELEASE "${CMAKE_STATIC_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF /LTCG") + set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF /LTCG") set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /NODEFAULTLIB:libucrt.lib /DEFAULTLIB:ucrt.lib") set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /NODEFAULTLIB:libucrt.lib /DEFAULTLIB:ucrt.lib") - # RelWithDebInfo specific flags - set(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO} /DEBUG /OPT:REF /OPT:ICF") - set(CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO} /DEBUG /OPT:REF /OPT:ICF") - set(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} /DEBUG /OPT:REF /OPT:ICF") - set(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO} /NODEFAULTLIB:libucrt.lib /DEFAULTLIB:ucrt.lib") - set(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} /NODEFAULTLIB:libucrt.lib /DEFAULTLIB:ucrt.lib") list(APPEND RESOURCES $ENV{__IntermediatesDir}/NativeVersion.rc) else() add_compile_options(-Wno-unused-local-typedef) diff --git a/src/Native/Native.proj b/src/Native/Native.proj index 232e1047af..02a95495bf 100644 --- a/src/Native/Native.proj +++ b/src/Native/Native.proj @@ -48,6 +48,7 @@ true $(IntermediateOutputPath)_version.h + $(IntermediateOutputPath)native.sourcelink.json @@ -120,7 +121,7 @@ + DependsOnTargets="GenerateNativeVersionFile;GenerateNativeSourcelinkFile"> $(Configuration) $(TargetArchitecture) --mkllibpath $(NuGetPackageRoot)mlnetmkldeps\$(MlNetMklDepsVersion)\runtimes\$(PackageRid)\native @@ -248,6 +249,15 @@ + + + + + diff --git a/src/Native/build.cmd b/src/Native/build.cmd index af85898bac..8876c6ff2a 100644 --- a/src/Native/build.cmd +++ b/src/Native/build.cmd @@ -3,10 +3,12 @@ setlocal :: Store current script directory before %~dp0 gets affected by another process later. set __currentScriptDir=%~dp0 +set "__currentScriptDir=%__currentScriptDir:~0,-1%" + :SetupArgs :: Initialize the args that will be passed to cmake -set __rootDir=%__currentScriptDir%..\.. +set __rootDir=%__currentScriptDir%\..\.. set __artifactsDir=%__rootDir%\artifacts set __binDir=%__artifactsDir%\bin set __objDir=%__artifactsDir%\obj @@ -51,7 +53,9 @@ set "VSCMD_START_DIR=%__currentScriptDir%" call "%_VSCOMNTOOLS%\VsDevCmd.bat" :RunVCVars -if "%VisualStudioVersion%"=="17.0" ( +if "%VisualStudioVersion%"=="18.0" ( + goto :VS2026 +) else if "%VisualStudioVersion%"=="17.0" ( goto :VS2022 ) else if "%VisualStudioVersion%"=="16.0" ( goto :VS2019 @@ -67,6 +71,14 @@ echo Error: Visual Studio 2015, 2017, 2019, or 2022 required echo Please see https://github.com/dotnet/machinelearning/tree/main/Documentation for build instructions. exit /b 1 +:VS2026 +:: Setup vars for VS2026 +set __PlatformToolset=v145 +set __VSVersion=18 2026 +:: Set the environment for the native build +call "%VS180COMNTOOLS%..\..\VC\Auxiliary\Build\vcvarsall.bat" %__VCBuildArch% +goto :SetupDirs + :VS2022 :: Setup vars for VS2022 set __PlatformToolset=v143 diff --git a/src/Native/gen-buildsys-win.bat b/src/Native/gen-buildsys-win.bat index 4d4248200b..38c3c31434 100644 --- a/src/Native/gen-buildsys-win.bat +++ b/src/Native/gen-buildsys-win.bat @@ -30,6 +30,7 @@ if /i "%3" == "x64" (set __ExtraCmakeParams=%__ExtraCmakeParams% -A x64) if /i "%3" == "x86" (set __ExtraCmakeParams=%__ExtraCmakeParams% -A Win32) if /i "%3" == "arm64" (set __ExtraCmakeParams=%__ExtraCmakeParams% -A arm64) if /i "%3" == "arm" (set __ExtraCmakeParams=%__ExtraCmakeParams% -A arm) +echo "%CMakePath%" "-DCMAKE_BUILD_TYPE=%CMAKE_BUILD_TYPE%" "-DCMAKE_INSTALL_PREFIX=%__CMakeBinDir%" "-DMKL_LIB_PATH=%MKL_LIB_PATH%" "-DONEDAL_DEVEL_PATH=%ONEDAL_DEVEL_PATH%" "-DONETBB_DEVEL_PATH=%ONETBB_DEVEL_PATH%" "-DARCHITECTURE=%3" -G "Visual Studio %__VSString%" %__ExtraCmakeParams% -B. -H%1 "%CMakePath%" "-DCMAKE_BUILD_TYPE=%CMAKE_BUILD_TYPE%" "-DCMAKE_INSTALL_PREFIX=%__CMakeBinDir%" "-DMKL_LIB_PATH=%MKL_LIB_PATH%" "-DONEDAL_DEVEL_PATH=%ONEDAL_DEVEL_PATH%" "-DONETBB_DEVEL_PATH=%ONETBB_DEVEL_PATH%" "-DARCHITECTURE=%3" -G "Visual Studio %__VSString%" %__ExtraCmakeParams% -B. -H%1 endlocal GOTO :DONE diff --git a/test/Directory.Build.props b/test/Directory.Build.props index 64554b2fe5..10ff4aad95 100644 --- a/test/Directory.Build.props +++ b/test/Directory.Build.props @@ -28,12 +28,12 @@ - - - - - - + + + + + + diff --git a/test/Microsoft.Data.Analysis.PerformanceTests/Microsoft.Data.Analysis.PerformanceTests.csproj b/test/Microsoft.Data.Analysis.PerformanceTests/Microsoft.Data.Analysis.PerformanceTests.csproj index 41ea50c39d..3b6b8995bd 100644 --- a/test/Microsoft.Data.Analysis.PerformanceTests/Microsoft.Data.Analysis.PerformanceTests.csproj +++ b/test/Microsoft.Data.Analysis.PerformanceTests/Microsoft.Data.Analysis.PerformanceTests.csproj @@ -8,7 +8,7 @@ - + diff --git a/test/Microsoft.Data.Analysis.Tests/Microsoft.Data.Analysis.Tests.csproj b/test/Microsoft.Data.Analysis.Tests/Microsoft.Data.Analysis.Tests.csproj index d37043ec31..1aba751d57 100644 --- a/test/Microsoft.Data.Analysis.Tests/Microsoft.Data.Analysis.Tests.csproj +++ b/test/Microsoft.Data.Analysis.Tests/Microsoft.Data.Analysis.Tests.csproj @@ -49,10 +49,9 @@ - - - - + + + diff --git a/test/Microsoft.Extensions.ML.Tests/Microsoft.Extensions.ML.Tests.csproj b/test/Microsoft.Extensions.ML.Tests/Microsoft.Extensions.ML.Tests.csproj index d582222296..cbdfa65823 100644 --- a/test/Microsoft.Extensions.ML.Tests/Microsoft.Extensions.ML.Tests.csproj +++ b/test/Microsoft.Extensions.ML.Tests/Microsoft.Extensions.ML.Tests.csproj @@ -1,7 +1,7 @@  - + @@ -14,10 +14,8 @@ - - - - + + diff --git a/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj b/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj index 84018f0343..cc7bf33b65 100644 --- a/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj +++ b/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj @@ -21,11 +21,11 @@ - - - - - + + + + + diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Code/InstanceInitializerTest.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/Code/InstanceInitializerTest.cs index a1242c9f74..d2251a780c 100644 --- a/test/Microsoft.ML.CodeAnalyzer.Tests/Code/InstanceInitializerTest.cs +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/Code/InstanceInitializerTest.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Threading.Tasks; using Microsoft.CodeAnalysis.Testing; using Xunit; @@ -13,7 +14,7 @@ namespace Microsoft.ML.InternalCodeAnalyzer.Tests { public sealed class InstanceInitializerTest { - [Fact] + [Fact(Skip = "The analyzer behind this test will be removed in a future PR. Disabling for now to allow version updates.")] public async Task InstanceInitializer() { const string test = @" diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Code/NameTest.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/Code/NameTest.cs index 68d93a3d02..f7d5775fb0 100644 --- a/test/Microsoft.ML.CodeAnalyzer.Tests/Code/NameTest.cs +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/Code/NameTest.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Threading.Tasks; using Microsoft.CodeAnalysis.Testing; using Xunit; diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Code/ParameterVariableNameTest.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/Code/ParameterVariableNameTest.cs index d6a655eaee..6b7e0c6406 100644 --- a/test/Microsoft.ML.CodeAnalyzer.Tests/Code/ParameterVariableNameTest.cs +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/Code/ParameterVariableNameTest.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Threading.Tasks; using Microsoft.CodeAnalysis.Testing; using Xunit; diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Code/SingleVariableDeclarationTest.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/Code/SingleVariableDeclarationTest.cs index 89a15a1516..468136f7fe 100644 --- a/test/Microsoft.ML.CodeAnalyzer.Tests/Code/SingleVariableDeclarationTest.cs +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/Code/SingleVariableDeclarationTest.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Threading.Tasks; using Microsoft.CodeAnalysis.Testing; using Xunit; diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Code/TypeParamNameTest.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/Code/TypeParamNameTest.cs index 4517af47ba..69f87bad1f 100644 --- a/test/Microsoft.ML.CodeAnalyzer.Tests/Code/TypeParamNameTest.cs +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/Code/TypeParamNameTest.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Threading.Tasks; using Microsoft.CodeAnalysis.Testing; using Xunit; diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Helpers/CSharpCodeFixVerifier`2.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/Helpers/CSharpCodeFixVerifier`2.cs index 5a72337a68..351fb05d77 100644 --- a/test/Microsoft.ML.CodeAnalyzer.Tests/Helpers/CSharpCodeFixVerifier`2.cs +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/Helpers/CSharpCodeFixVerifier`2.cs @@ -18,6 +18,7 @@ internal static class CSharpCodeFixVerifier where TCodeFix : CodeFixProvider, new() { public static DiagnosticResult Diagnostic() +#pragma warning disable CS0618 // Type or member is obsolete => CSharpCodeFixVerifier.Diagnostic(); public static DiagnosticResult Diagnostic(string diagnosticId) @@ -25,6 +26,7 @@ public static DiagnosticResult Diagnostic(string diagnosticId) public static DiagnosticResult Diagnostic(DiagnosticDescriptor descriptor) => CSharpCodeFixVerifier.Diagnostic(descriptor); +#pragma warning restore CS0618 // Type or member is obsolete public static async Task VerifyAnalyzerAsync(string source, params DiagnosticResult[] expected) { @@ -55,7 +57,9 @@ public static async Task VerifyCodeFixAsync(string source, DiagnosticResult[] ex await test.RunAsync(); } +#pragma warning disable CS0618 // Type or member is obsolete internal class Test : CSharpCodeFixTest +#pragma warning restore CS0618 // Type or member is obsolete { public Test() { diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Microsoft.ML.CodeAnalyzer.Tests.csproj b/test/Microsoft.ML.CodeAnalyzer.Tests/Microsoft.ML.CodeAnalyzer.Tests.csproj index fe86711df0..4e7bff36b7 100644 --- a/test/Microsoft.ML.CodeAnalyzer.Tests/Microsoft.ML.CodeAnalyzer.Tests.csproj +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/Microsoft.ML.CodeAnalyzer.Tests.csproj @@ -3,11 +3,13 @@ false + + $(NoWarn);RS1036;RS1038;RS1041 - - - + + + diff --git a/test/Microsoft.ML.CodeGenerator.Tests/Microsoft.ML.CodeGenerator.Tests.csproj b/test/Microsoft.ML.CodeGenerator.Tests/Microsoft.ML.CodeGenerator.Tests.csproj index 8360a90458..207bb4e6c3 100644 --- a/test/Microsoft.ML.CodeGenerator.Tests/Microsoft.ML.CodeGenerator.Tests.csproj +++ b/test/Microsoft.ML.CodeGenerator.Tests/Microsoft.ML.CodeGenerator.Tests.csproj @@ -10,8 +10,8 @@ - - + + diff --git a/test/Microsoft.ML.Core.Tests/Microsoft.ML.Core.Tests.csproj b/test/Microsoft.ML.Core.Tests/Microsoft.ML.Core.Tests.csproj index 71e10d7501..8484d27c9a 100644 --- a/test/Microsoft.ML.Core.Tests/Microsoft.ML.Core.Tests.csproj +++ b/test/Microsoft.ML.Core.Tests/Microsoft.ML.Core.Tests.csproj @@ -35,9 +35,9 @@ - - - + + + diff --git a/test/Microsoft.ML.CpuMath.PerformanceTests/Microsoft.ML.CpuMath.PerformanceTests.csproj b/test/Microsoft.ML.CpuMath.PerformanceTests/Microsoft.ML.CpuMath.PerformanceTests.csproj index bf597d4489..b1f4b3cc7b 100644 --- a/test/Microsoft.ML.CpuMath.PerformanceTests/Microsoft.ML.CpuMath.PerformanceTests.csproj +++ b/test/Microsoft.ML.CpuMath.PerformanceTests/Microsoft.ML.CpuMath.PerformanceTests.csproj @@ -22,7 +22,7 @@ - + diff --git a/test/Microsoft.ML.CpuMath.UnitTests/Microsoft.ML.CpuMath.UnitTests.csproj b/test/Microsoft.ML.CpuMath.UnitTests/Microsoft.ML.CpuMath.UnitTests.csproj index e7a146c72f..911d1e6962 100644 --- a/test/Microsoft.ML.CpuMath.UnitTests/Microsoft.ML.CpuMath.UnitTests.csproj +++ b/test/Microsoft.ML.CpuMath.UnitTests/Microsoft.ML.CpuMath.UnitTests.csproj @@ -3,7 +3,7 @@ - + diff --git a/test/Microsoft.ML.FSharp.Tests/Microsoft.ML.FSharp.Tests.fsproj b/test/Microsoft.ML.FSharp.Tests/Microsoft.ML.FSharp.Tests.fsproj index 40929bf38a..edc8502754 100644 --- a/test/Microsoft.ML.FSharp.Tests/Microsoft.ML.FSharp.Tests.fsproj +++ b/test/Microsoft.ML.FSharp.Tests/Microsoft.ML.FSharp.Tests.fsproj @@ -30,4 +30,8 @@ + + + + \ No newline at end of file diff --git a/test/Microsoft.ML.Fairlearn.Tests/Microsoft.ML.Fairlearn.Tests.csproj b/test/Microsoft.ML.Fairlearn.Tests/Microsoft.ML.Fairlearn.Tests.csproj index a986b823a1..9449caa039 100644 --- a/test/Microsoft.ML.Fairlearn.Tests/Microsoft.ML.Fairlearn.Tests.csproj +++ b/test/Microsoft.ML.Fairlearn.Tests/Microsoft.ML.Fairlearn.Tests.csproj @@ -10,9 +10,9 @@ - - - + + + diff --git a/test/Microsoft.ML.GenAI.Core.Tests/CasualLMDatasetTest.cs b/test/Microsoft.ML.GenAI.Core.Tests/CausalLMDatasetTest.cs similarity index 99% rename from test/Microsoft.ML.GenAI.Core.Tests/CasualLMDatasetTest.cs rename to test/Microsoft.ML.GenAI.Core.Tests/CausalLMDatasetTest.cs index f451dcb718..25b8874d7d 100644 --- a/test/Microsoft.ML.GenAI.Core.Tests/CasualLMDatasetTest.cs +++ b/test/Microsoft.ML.GenAI.Core.Tests/CausalLMDatasetTest.cs @@ -16,7 +16,7 @@ namespace Microsoft.ML.GenAI.Core.Tests; -public class CasualLMDatasetTest +public class CausalLMDatasetTest { private static Tokenizer CreateLlamaTokenizer() { diff --git a/test/Microsoft.ML.GenAI.Core.Tests/Microsoft.ML.GenAI.Core.Tests.csproj b/test/Microsoft.ML.GenAI.Core.Tests/Microsoft.ML.GenAI.Core.Tests.csproj index 90d312095e..afdfe94e5a 100644 --- a/test/Microsoft.ML.GenAI.Core.Tests/Microsoft.ML.GenAI.Core.Tests.csproj +++ b/test/Microsoft.ML.GenAI.Core.Tests/Microsoft.ML.GenAI.Core.Tests.csproj @@ -15,22 +15,23 @@ - - - - - - + + + + + + + - + - - - + + + diff --git a/test/Microsoft.ML.GenAI.LLaMA.Tests/Microsoft.ML.GenAI.LLaMA.Tests.csproj b/test/Microsoft.ML.GenAI.LLaMA.Tests/Microsoft.ML.GenAI.LLaMA.Tests.csproj index e0499f49c8..75b3c41c0b 100644 --- a/test/Microsoft.ML.GenAI.LLaMA.Tests/Microsoft.ML.GenAI.LLaMA.Tests.csproj +++ b/test/Microsoft.ML.GenAI.LLaMA.Tests/Microsoft.ML.GenAI.LLaMA.Tests.csproj @@ -15,13 +15,14 @@ - - - - - - - + + + + + + + + @@ -38,9 +39,9 @@ - - - + + + diff --git a/test/Microsoft.ML.GenAI.Mistral.Tests/Microsoft.ML.GenAI.Mistral.Tests.csproj b/test/Microsoft.ML.GenAI.Mistral.Tests/Microsoft.ML.GenAI.Mistral.Tests.csproj index 1f1c34453f..4c4b492447 100644 --- a/test/Microsoft.ML.GenAI.Mistral.Tests/Microsoft.ML.GenAI.Mistral.Tests.csproj +++ b/test/Microsoft.ML.GenAI.Mistral.Tests/Microsoft.ML.GenAI.Mistral.Tests.csproj @@ -15,13 +15,13 @@ - - - - - - - + + + + + + + @@ -37,9 +37,9 @@ - - - + + + diff --git a/test/Microsoft.ML.GenAI.Phi.Tests/AutoGenTests.cs b/test/Microsoft.ML.GenAI.Phi.Tests/AutoGenTests.cs index 33ab565fe7..6f0928f3c0 100644 --- a/test/Microsoft.ML.GenAI.Phi.Tests/AutoGenTests.cs +++ b/test/Microsoft.ML.GenAI.Phi.Tests/AutoGenTests.cs @@ -16,7 +16,7 @@ public class AutoGenTests [Fact] public async Task ItGenerateTextReply() { - var pipeline = Mock.Of>(); + var pipeline = Mock.Of>(); // mock generate api Mock.Get(pipeline).Setup(p => p.Generate( It.IsAny(), // prompt diff --git a/test/Microsoft.ML.GenAI.Phi.Tests/Microsoft.ML.GenAI.Phi.Tests.csproj b/test/Microsoft.ML.GenAI.Phi.Tests/Microsoft.ML.GenAI.Phi.Tests.csproj index 086654f79c..62d6260523 100644 --- a/test/Microsoft.ML.GenAI.Phi.Tests/Microsoft.ML.GenAI.Phi.Tests.csproj +++ b/test/Microsoft.ML.GenAI.Phi.Tests/Microsoft.ML.GenAI.Phi.Tests.csproj @@ -16,13 +16,14 @@ - - - - - - - + + + + + + + + @@ -37,9 +38,9 @@ - - - + + + diff --git a/test/Microsoft.ML.GenAI.Phi.Tests/Phi2Tests.cs b/test/Microsoft.ML.GenAI.Phi.Tests/Phi2Tests.cs index 33402e73a0..a196d8ab81 100644 --- a/test/Microsoft.ML.GenAI.Phi.Tests/Phi2Tests.cs +++ b/test/Microsoft.ML.GenAI.Phi.Tests/Phi2Tests.cs @@ -30,7 +30,7 @@ public Phi2Tests() [UseApprovalSubdirectory("Approvals")] public void LoadSafeTensorShapeTest() { - var model = new Phi2ForCasualLM(Phi2Config.Phi2); + var model = new Phi2ForCausalLM(Phi2Config.Phi2); var stateDictStr = model.PeekShape(); Approvals.Verify(stateDictStr); } diff --git a/test/Microsoft.ML.GenAI.Phi.Tests/Phi3Tests.cs b/test/Microsoft.ML.GenAI.Phi.Tests/Phi3Tests.cs index 1200d79f9d..aec4016fd7 100644 --- a/test/Microsoft.ML.GenAI.Phi.Tests/Phi3Tests.cs +++ b/test/Microsoft.ML.GenAI.Phi.Tests/Phi3Tests.cs @@ -32,7 +32,7 @@ public Phi3Tests() [UseApprovalSubdirectory("Approvals")] public void Phi3Mini4KShapeTest() { - var model = new Phi3ForCasualLM(Phi3Config.Phi3Mini4kInstruct); + var model = new Phi3ForCausalLM(Phi3Config.Phi3Mini4kInstruct); var stateDictStr = model.PeekShape(); Approvals.Verify(stateDictStr); } @@ -42,7 +42,7 @@ public void Phi3Mini4KShapeTest() [UseApprovalSubdirectory("Approvals")] public void Phi3Mini4KInt8QuantizeShapeTest() { - var model = new Phi3ForCasualLM(Phi3Config.Phi3Mini4kInstruct); + var model = new Phi3ForCausalLM(Phi3Config.Phi3Mini4kInstruct); model.ToInt8QuantizeModule(); var size = model.GetSizeInBytes(); var stateDictStr = model.PeekShape(); @@ -56,7 +56,7 @@ public void Phi3Mini4KInt8QuantizeShapeTest() [UseApprovalSubdirectory("Approvals")] public void Phi3Mini4KInt4QuantizeShapeTest() { - var model = new Phi3ForCasualLM(Phi3Config.Phi3Mini4kInstruct); + var model = new Phi3ForCausalLM(Phi3Config.Phi3Mini4kInstruct); model.ToInt4QuantizeModule(); var size = model.GetSizeInBytes(); var stateDictStr = model.PeekShape(); @@ -70,7 +70,7 @@ public void Phi3Mini4KInt4QuantizeShapeTest() [UseApprovalSubdirectory("Approvals")] public void Phi3Medium4KShapeTest() { - var model = new Phi3ForCasualLM(Phi3Config.Phi3Medium4kInstruct); + var model = new Phi3ForCausalLM(Phi3Config.Phi3Medium4kInstruct); var stateDictStr = model.PeekShape(); Approvals.Verify(stateDictStr); } @@ -81,7 +81,7 @@ public void Phi3Medium4KShapeTest() [UseApprovalSubdirectory("Approvals")] public void Phi3Medium128KShapeTest() { - var model = new Phi3ForCasualLM(Phi3Config.Phi3Medium128kInstruct); + var model = new Phi3ForCausalLM(Phi3Config.Phi3Medium128kInstruct); var stateDictStr = model.PeekShape(); Approvals.Verify(stateDictStr); } @@ -91,7 +91,7 @@ public void Phi3Medium128KShapeTest() [UseApprovalSubdirectory("Approvals")] public void Phi3Mini128KShapeTest() { - var model = new Phi3ForCasualLM(Phi3Config.Phi3Mini128kInstruct); + var model = new Phi3ForCausalLM(Phi3Config.Phi3Mini128kInstruct); var stateDictStr = model.PeekShape(); Approvals.Verify(stateDictStr); } @@ -101,7 +101,7 @@ public void Phi3Mini128KShapeTest() [UseApprovalSubdirectory("Approvals")] public void Phi3Mini128KLayerSizeTest() { - var model = new Phi3ForCasualLM(Phi3Config.Phi3Mini128kInstruct); + var model = new Phi3ForCausalLM(Phi3Config.Phi3Mini128kInstruct); var size = model.GetSizeForEachDynamicLayerInBytes(); // convert size to MB var sizeInMB = size.ToDictionary(x => x.Key, x => x.Value / 1024 / 1024); diff --git a/test/Microsoft.ML.GenAI.Phi.Tests/SemanticKernelTests.cs b/test/Microsoft.ML.GenAI.Phi.Tests/SemanticKernelTests.cs index 98359a8722..63777bc3bf 100644 --- a/test/Microsoft.ML.GenAI.Phi.Tests/SemanticKernelTests.cs +++ b/test/Microsoft.ML.GenAI.Phi.Tests/SemanticKernelTests.cs @@ -19,7 +19,7 @@ public class SemanticKernelTests [Fact] public async Task ItAddPhi3CausalLMChatCompletionServiceTestAsync() { - var pipeline = Mock.Of>(); + var pipeline = Mock.Of>(); // mock generate api Mock.Get(pipeline).Setup(p => p.Generate( It.IsAny(), // prompt @@ -55,7 +55,7 @@ public async Task ItAddPhi3CausalLMChatCompletionServiceTestAsync() [Fact] public async Task ItAddPhi3CausalLMTextGenerationServiceTestAsync() { - var pipeline = Mock.Of>(); + var pipeline = Mock.Of>(); // mock generate api Mock.Get(pipeline).Setup(p => p.Generate( It.IsAny(), // prompt diff --git a/test/Microsoft.ML.IntegrationTests/Microsoft.ML.IntegrationTests.csproj b/test/Microsoft.ML.IntegrationTests/Microsoft.ML.IntegrationTests.csproj index bafda75fba..57dc804271 100644 --- a/test/Microsoft.ML.IntegrationTests/Microsoft.ML.IntegrationTests.csproj +++ b/test/Microsoft.ML.IntegrationTests/Microsoft.ML.IntegrationTests.csproj @@ -45,8 +45,8 @@ - - + + diff --git a/test/Microsoft.ML.NightlyBuild.Tests/Microsoft.ML.NightlyBuild.Tests.csproj b/test/Microsoft.ML.NightlyBuild.Tests/Microsoft.ML.NightlyBuild.Tests.csproj index 6f9bc4859c..a721bb7587 100644 --- a/test/Microsoft.ML.NightlyBuild.Tests/Microsoft.ML.NightlyBuild.Tests.csproj +++ b/test/Microsoft.ML.NightlyBuild.Tests/Microsoft.ML.NightlyBuild.Tests.csproj @@ -33,8 +33,8 @@ - - + + diff --git a/test/Microsoft.ML.OnnxTransformerTest/Microsoft.ML.OnnxTransformerTest.csproj b/test/Microsoft.ML.OnnxTransformerTest/Microsoft.ML.OnnxTransformerTest.csproj index 2f566613c8..af52872237 100644 --- a/test/Microsoft.ML.OnnxTransformerTest/Microsoft.ML.OnnxTransformerTest.csproj +++ b/test/Microsoft.ML.OnnxTransformerTest/Microsoft.ML.OnnxTransformerTest.csproj @@ -15,8 +15,8 @@ - - + + diff --git a/test/Microsoft.ML.PerformanceTests/Microsoft.ML.PerformanceTests.csproj b/test/Microsoft.ML.PerformanceTests/Microsoft.ML.PerformanceTests.csproj index 55b9b7c6d4..61e0928be8 100644 --- a/test/Microsoft.ML.PerformanceTests/Microsoft.ML.PerformanceTests.csproj +++ b/test/Microsoft.ML.PerformanceTests/Microsoft.ML.PerformanceTests.csproj @@ -12,10 +12,9 @@ - - - - + + + diff --git a/test/Microsoft.ML.SearchSpace.Tests/Microsoft.ML.SearchSpace.Tests.csproj b/test/Microsoft.ML.SearchSpace.Tests/Microsoft.ML.SearchSpace.Tests.csproj index 1aa8bba589..866f365ab1 100644 --- a/test/Microsoft.ML.SearchSpace.Tests/Microsoft.ML.SearchSpace.Tests.csproj +++ b/test/Microsoft.ML.SearchSpace.Tests/Microsoft.ML.SearchSpace.Tests.csproj @@ -5,9 +5,9 @@ - - - + + + diff --git a/test/Microsoft.ML.TensorFlow.Tests/Microsoft.ML.TensorFlow.Tests.csproj b/test/Microsoft.ML.TensorFlow.Tests/Microsoft.ML.TensorFlow.Tests.csproj index 78085851c2..6f605d67bd 100644 --- a/test/Microsoft.ML.TensorFlow.Tests/Microsoft.ML.TensorFlow.Tests.csproj +++ b/test/Microsoft.ML.TensorFlow.Tests/Microsoft.ML.TensorFlow.Tests.csproj @@ -19,8 +19,8 @@ - - + + diff --git a/test/Microsoft.ML.TensorFlow.Tests/TensorFlowEstimatorTests.cs b/test/Microsoft.ML.TensorFlow.Tests/TensorFlowEstimatorTests.cs index 255d198eae..18fdfc66ba 100644 --- a/test/Microsoft.ML.TensorFlow.Tests/TensorFlowEstimatorTests.cs +++ b/test/Microsoft.ML.TensorFlow.Tests/TensorFlowEstimatorTests.cs @@ -208,7 +208,8 @@ public void TreatOutputAsBatched() .Append(ML.Model.LoadTensorFlowModel(modelLocation, false).ScoreTensorFlowModel("Output", "Input")); TestEstimatorCore(pipe, data); - var schema = pipe.Fit(data).Transform(data).Schema; + using var pipelineModel = pipe.Fit(data); + var schema = pipelineModel.Transform(data).Schema; // The dimensions of the output with treatOutputAsBatched set to false should be * 10 // as the first dimension of -1 is treated as an unknown dimension. @@ -222,7 +223,8 @@ public void TreatOutputAsBatched() .Append(ML.Model.LoadTensorFlowModel(modelLocation).ScoreTensorFlowModel("Output", "Input")); TestEstimatorCore(pipe, data); - schema = pipe.Fit(data).Transform(data).Schema; + using var pipelineModelBatched = pipe.Fit(data); + schema = pipelineModelBatched.Transform(data).Schema; // The dimensions of the output with treatOutputAsBatched set to true should be 10 // as the first dimension of -1 is treated as the batch dimension. @@ -285,8 +287,8 @@ public void TestLoadMultipleModel() MLContext context = new MLContext(seed: 1); - TensorFlowModel model1 = context.Model.LoadTensorFlowModel(modelFile1); - TensorFlowModel model2 = context.Model.LoadTensorFlowModel(modelFile2); + using TensorFlowModel model1 = context.Model.LoadTensorFlowModel(modelFile1); + using TensorFlowModel model2 = context.Model.LoadTensorFlowModel(modelFile2); model1.ScoreTensorFlowModel(new[] { "c" }, new[] { "a", "b" }); model2.ScoreTensorFlowModel("Output", "Input"); diff --git a/test/Microsoft.ML.TensorFlow.Tests/TensorflowTests.cs b/test/Microsoft.ML.TensorFlow.Tests/TensorflowTests.cs index 16bc4a6b74..d9d362d020 100644 --- a/test/Microsoft.ML.TensorFlow.Tests/TensorflowTests.cs +++ b/test/Microsoft.ML.TensorFlow.Tests/TensorflowTests.cs @@ -18,6 +18,7 @@ using Microsoft.ML.Transforms; using Microsoft.ML.Transforms.Image; using Microsoft.ML.Vision; +using Tensorflow; using Xunit; using Xunit.Abstractions; using static Microsoft.ML.DataOperationsCatalog; @@ -1187,7 +1188,9 @@ public void TensorFlowSaveAndLoadSavedModel() predictFunction.Dispose(); // Reload the model and check the output schema consistency +#pragma warning disable IDE0055 DataViewSchema loadedInputschema; +#pragma warning restore IDE0055 var testTransformer = _mlContext.Model.Load(mlModelLocation, out loadedInputschema); var testOutputSchema = transformer.GetOutputSchema(data.Schema); Assert.True(TestCommon.CheckSameSchemas(outputSchema, testOutputSchema)); @@ -2055,7 +2058,7 @@ public void TensorflowPlaceholderShapeInferenceTest() new TextLoader.Column("name", DataKind.String, 1) }); - Tensorflow.TensorShape[] tfInputShape; + Tensorflow.Shape[] tfInputShape; using (var tfModel = _mlContext.Model.LoadTensorFlowModel(modelLocation)) { @@ -2070,8 +2073,8 @@ public void TensorflowPlaceholderShapeInferenceTest() transformer.Dispose(); } - Assert.Equal(imageHeight, tfInputShape.ElementAt(0)[1].dims[0]); - Assert.Equal(imageWidth, tfInputShape.ElementAt(0)[2].dims[0]); + Assert.Equal(imageHeight, tfInputShape.ElementAt(0)[Slice.Index(1)].dims[0]); + Assert.Equal(imageWidth, tfInputShape.ElementAt(0)[Slice.Index(2)].dims[0]); } } } diff --git a/test/Microsoft.ML.TestFramework/Microsoft.ML.TestFramework.csproj b/test/Microsoft.ML.TestFramework/Microsoft.ML.TestFramework.csproj index 53783ee35b..340bcbf063 100644 --- a/test/Microsoft.ML.TestFramework/Microsoft.ML.TestFramework.csproj +++ b/test/Microsoft.ML.TestFramework/Microsoft.ML.TestFramework.csproj @@ -33,7 +33,7 @@ - + diff --git a/test/Microsoft.ML.TestFrameworkCommon/Microsoft.ML.TestFrameworkCommon.csproj b/test/Microsoft.ML.TestFrameworkCommon/Microsoft.ML.TestFrameworkCommon.csproj index 33049ea360..3fd2a833d3 100644 --- a/test/Microsoft.ML.TestFrameworkCommon/Microsoft.ML.TestFrameworkCommon.csproj +++ b/test/Microsoft.ML.TestFrameworkCommon/Microsoft.ML.TestFrameworkCommon.csproj @@ -9,8 +9,8 @@ - - + + diff --git a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj index daf2eb2daf..0ca7369d95 100644 --- a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj +++ b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj @@ -47,14 +47,13 @@ - - - - - - - - + + + + + + + diff --git a/test/Microsoft.ML.Tokenizers.Tests/BpeTests.cs b/test/Microsoft.ML.Tokenizers.Tests/BpeTests.cs index 6a0bdcb52b..5c2da4aece 100644 --- a/test/Microsoft.ML.Tokenizers.Tests/BpeTests.cs +++ b/test/Microsoft.ML.Tokenizers.Tests/BpeTests.cs @@ -257,6 +257,19 @@ public void SimpleTestWithUnknownToken( continuingSubwordPrefix: continuingSubwordPrefix, endOfWordSuffix: endOfWordSuffix, fuseUnknownTokens: fuseUnknownToken); SimpleWithUnknownTokenTest(bpe, sentence, offsets, ids, expectedTokens, decodedTokens, decodedTokensWithoutUnknownToken); + + BpeOptions bpeOptions = new BpeOptions(vocabFile, mergesFile) + { + PreTokenizer = PreTokenizer.CreateWordOrNonWord(), + Normalizer = null, + UnknownToken = unknownToken, + ContinuingSubwordPrefix = continuingSubwordPrefix, + EndOfWordSuffix = endOfWordSuffix, + FuseUnknownTokens = fuseUnknownToken + }; + + bpe = BpeTokenizer.Create(bpeOptions); + SimpleWithUnknownTokenTest(bpe, sentence, offsets, ids, expectedTokens, decodedTokens, decodedTokensWithoutUnknownToken); } finally { @@ -267,7 +280,7 @@ public void SimpleTestWithUnknownToken( } } - BpeOptions bpeOptions = new BpeOptions(vocab.Select(kvp => (kvp.Key, kvp.Value))) + BpeOptions bpeOptions1 = new BpeOptions(vocab) { Merges = merges?.Select(kvp => $"{kvp.Item1} {kvp.Item2}"), PreTokenizer = PreTokenizer.CreateWordOrNonWord(), @@ -278,7 +291,7 @@ public void SimpleTestWithUnknownToken( FuseUnknownTokens = fuseUnknownToken }; - BpeTokenizer bpe1 = BpeTokenizer.Create(bpeOptions); + BpeTokenizer bpe1 = BpeTokenizer.Create(bpeOptions1); SimpleWithUnknownTokenTest(bpe1, sentence, offsets, ids, expectedTokens, decodedTokens, decodedTokensWithoutUnknownToken); } @@ -387,7 +400,7 @@ public async Task TestBpeCreation() Dictionary? dictionary = JsonSerializer.Deserialize>(jsonString); bpe = BpeTokenizer.Create( - new BpeOptions(dictionary!.Select(kvp => (kvp.Key, kvp.Value))) + new BpeOptions(dictionary!) { Merges = File.ReadAllLines(mergesFile).Skip(1).ToArray() // Skip the first line which is the header "#version". }); @@ -872,6 +885,66 @@ public void TestDeepSeekR1Tokenizer(string text, int[] ids, string[] tokens, (in Assert.Equal(text, tokenizer.Decode(ids, considerSpecialTokens: false)); } + [Fact] + public void TestTokenizerWithSpecialTokens() + { + // "https://huggingface.co/openai-community/gpt2/raw/main/vocab.json"; + // "https://huggingface.co/openai-community/gpt2/raw/main/merges.txt"; + + BpeOptions options = new BpeOptions(Path.Combine(@"Gpt-2", "vocab.json"), Path.Combine(@"Gpt-2", "merges.txt")) + { + UnknownToken = "unk", + + SpecialTokens = new Dictionary // SpecialTokens not part of the original vocab.json + { + { "<|sos|>", 50257 }, + { "<|eos|>", 50258 } + }, + BeginningOfSentenceToken = "<|sos|>", + EndOfSentenceToken = "<|eos|>" + }; + + BpeTokenizer bpeTokenizer = BpeTokenizer.Create(options); + Assert.True(bpeTokenizer.Vocabulary.TryGetValue(options.UnknownToken, out int unkId)); + + string text = "Hello world!\uD800"; + + var ids = bpeTokenizer.EncodeToIds(text, considerPreTokenization: false); + Assert.Equal([50257, 15496, 2954, 6894, 0, 2954, 50258], ids); // space and u+D800 couldn't be encoded and produced unk tokens + Assert.Equal(unkId, ids[ids.Count - 2]); + Assert.Equal(options.SpecialTokens["<|sos|>"], ids[0]); + Assert.Equal(options.SpecialTokens["<|eos|>"], ids[^1]); + + var tokens = bpeTokenizer.EncodeToTokens(text, out _, considerPreTokenization: false).Select(t => t.Value).ToArray(); + Assert.Equal(["<|sos|>", "Hello", "unk", "world", "!", "unk", "<|eos|>"], tokens); + + Assert.Equal("<|sos|>Hellounkworld!unk<|eos|>", bpeTokenizer.Decode(ids)); + Assert.Equal("Helloworld!", bpeTokenizer.Decode(ids, considerSpecialTokens: false)); + + BpeOptions options1 = new BpeOptions(options.Vocabulary) + { + // Null UnknownToken means no unknown token support + Merges = options.Merges, + SpecialTokens = options.SpecialTokens, + BeginningOfSentenceToken = options.BeginningOfSentenceToken, + EndOfSentenceToken = options.EndOfSentenceToken + }; + + bpeTokenizer = BpeTokenizer.Create(options1); + ids = bpeTokenizer.EncodeToIds(text, considerPreTokenization: false); + + // Because Unknown is not supported in this encoding, the encoding will produce different encoding results + Assert.Equal([50257, 39, 5037, 1764, 0, 50258], ids); + Assert.Equal(options.SpecialTokens["<|sos|>"], ids[0]); + Assert.Equal(options.SpecialTokens["<|eos|>"], ids[^1]); + + tokens = bpeTokenizer.EncodeToTokens(text, out _, considerPreTokenization: false).Select(t => t.Value).ToArray(); + Assert.Equal(["<|sos|>", "H", "ellow", "orld", "!", "<|eos|>"], tokens); + + Assert.Equal("<|sos|>Helloworld!<|eos|>", bpeTokenizer.Decode(ids)); + Assert.Equal("Helloworld!", bpeTokenizer.Decode(ids, considerSpecialTokens: false)); + } + private static BpeTokenizer CreateBpeTokenizerFromJson() { // @"https://huggingface.co/deepseek-ai/DeepSeek-R1/resolve/main/tokenizer.json?download=true" @@ -928,11 +1001,11 @@ private static BpeTokenizer CreateBpeTokenizerFromJson() return BpeTokenizer.Create(bpeOptions); } - private static IEnumerable<(string Token, int Id)> GetVocabulary(JsonElement vocabElement) + private static IEnumerable> GetVocabulary(JsonElement vocabElement) { foreach (JsonProperty token in vocabElement.EnumerateObject()) { - yield return (token.Name, token.Value.GetInt32()); + yield return new KeyValuePair(token.Name, token.Value.GetInt32()); } } diff --git a/test/Microsoft.ML.Tokenizers.Tests/Microsoft.ML.Tokenizers.Tests.csproj b/test/Microsoft.ML.Tokenizers.Tests/Microsoft.ML.Tokenizers.Tests.csproj index c87b6d143d..fc20a69291 100644 --- a/test/Microsoft.ML.Tokenizers.Tests/Microsoft.ML.Tokenizers.Tests.csproj +++ b/test/Microsoft.ML.Tokenizers.Tests/Microsoft.ML.Tokenizers.Tests.csproj @@ -66,8 +66,8 @@ - - + + \ No newline at end of file diff --git a/test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs b/test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs index 049f2cd82b..e7a0bf5acc 100644 --- a/test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs +++ b/test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs @@ -35,7 +35,9 @@ public class TiktokenTests public static Tokenizer R50kBase { get; } = TiktokenTokenizer.CreateForModel("ada"); public static Tokenizer P50kEdit { get; } = TiktokenTokenizer.CreateForModel("text-davinci-edit-001"); public static Tokenizer GPT4o { get; } = TiktokenTokenizer.CreateForModel("gpt-4o"); + public static Tokenizer GPT5 { get; } = TiktokenTokenizer.CreateForModel("gpt-5"); public static Tokenizer Phi4 { get; } = TiktokenTokenizer.CreateForModel("phi-4"); + public static TiktokenTokenizer GptOss { get; } = TiktokenTokenizer.CreateForModel("gpt-oss-20b"); [Fact] public async Task TestTokenizerCreation() @@ -282,40 +284,43 @@ public void TestEncode5() } [Fact] - public void TestEncodeGpt4o() + public void TestEncodeO200kBaseEncoding() { - string text = ReadAndSanitizeFile("./Data/lib.rs.txt"); - IReadOnlyList encoded = GPT4o.EncodeToIds(text); - int idsCount = GPT4o.CountTokens(text); + foreach (TiktokenTokenizer tokenizer in new[] { GPT4o, GptOss, GPT5 }) + { + string text = ReadAndSanitizeFile("./Data/lib.rs.txt"); + IReadOnlyList encoded = tokenizer.EncodeToIds(text); + int idsCount = tokenizer.CountTokens(text); - Assert.Equal(5609, encoded.Count); - Assert.Equal(encoded.Count, idsCount); + Assert.Equal(5609, encoded.Count); + Assert.Equal(encoded.Count, idsCount); - using (Stream stream = File.OpenRead("./Data/tokens_gpt4o.json")) - { - int[]? expected = JsonSerializer.Deserialize(stream) as int[]; - Assert.Equal(expected!, encoded); - } + using (Stream stream = File.OpenRead("./Data/tokens_gpt4o.json")) + { + int[]? expected = JsonSerializer.Deserialize(stream) as int[]; + Assert.Equal(expected!, encoded); + } - Assert.Equal(text, GPT4o.Decode(encoded)); - TestDecodingWithSpan((GPT4o as TiktokenTokenizer)!, encoded.ToArray(), text); + Assert.Equal(text, tokenizer.Decode(encoded)); + TestDecodingWithSpan(tokenizer, encoded.ToArray(), text); - text = "<|endoftext|>Hello ⭐ World<|endofprompt|>"; + text = "<|endoftext|>Hello ⭐ World<|endofprompt|>"; - encoded = GPT4o.EncodeToIds(text); - idsCount = GPT4o.CountTokens(text); - Assert.Equal(new List() { 199999, 13225, 161181, 5922, 200018 }, encoded); - Assert.Equal(text, GPT4o.Decode(encoded)); - TestDecodingWithSpan((GPT4o as TiktokenTokenizer)!, encoded.ToArray(), text); + encoded = tokenizer.EncodeToIds(text); + idsCount = tokenizer.CountTokens(text); + Assert.Equal(new List() { 199999, 13225, 161181, 5922, 200018 }, encoded); + Assert.Equal(text, tokenizer.Decode(encoded)); + TestDecodingWithSpan(tokenizer, encoded.ToArray(), text); - IReadOnlyList result = GPT4o.EncodeToTokens(text, out string? normalizedText); + IReadOnlyList result = tokenizer.EncodeToTokens(text, out string? normalizedText); - Assert.Equal(encoded, result.Select(token => token.Id).ToArray()); - Assert.Equal(encoded.Count, idsCount); - Assert.Equal(new string[] { "<|endoftext|>", "Hello", " ⭐", " World", "<|endofprompt|>" }, result.Select(token => token.Value).ToArray()); - Assert.Equal(new List<(int, int)> { (0, 13), (13, 5), (18, 2), (20, 6), (26, 15) }, result.Select(token => (token.Offset.Start.Value, token.Offset.End.Value - token.Offset.Start.Value)).ToArray()); + Assert.Equal(encoded, result.Select(token => token.Id).ToArray()); + Assert.Equal(encoded.Count, idsCount); + Assert.Equal(new string[] { "<|endoftext|>", "Hello", " ⭐", " World", "<|endofprompt|>" }, result.Select(token => token.Value).ToArray()); + Assert.Equal(new List<(int, int)> { (0, 13), (13, 5), (18, 2), (20, 6), (26, 15) }, result.Select(token => (token.Offset.Start.Value, token.Offset.End.Value - token.Offset.Start.Value)).ToArray()); - TokenizerTests.TestTokenLimits(GPT4o); + TokenizerTests.TestTokenLimits(tokenizer); + } } [Fact] @@ -398,13 +403,22 @@ public void TestEncodeR50kBase() [InlineData("o1")] [InlineData("o1-")] [InlineData("o1-mini")] + [InlineData("o4-mini-")] [InlineData("o3")] [InlineData("o3-")] [InlineData("o3-mini")] + [InlineData("o4-mini")] + [InlineData("gpt-4.1")] + [InlineData("gpt-4.1-mini")] + [InlineData("gpt-4.5-")] [InlineData("gpt-4o")] [InlineData("gpt-4o-")] + [InlineData("gpt-5")] + [InlineData("gpt-5-chat")] + [InlineData("chatgpt-4o-")] [InlineData("gpt-4")] [InlineData("gpt-4-")] + [InlineData("gpt-3.5")] [InlineData("gpt-3.5-")] [InlineData("gpt-3.5-turbo")] [InlineData("gpt-3.5-turbo-")] @@ -421,8 +435,10 @@ public void TestEncodeR50kBase() [InlineData("text-babbage-001")] [InlineData("text-ada-001")] [InlineData("davinci")] + [InlineData("davinci-002")] [InlineData("curie")] [InlineData("babbage")] + [InlineData("babbage-002")] [InlineData("ada")] [InlineData("code-davinci-002")] [InlineData("code-davinci-001")] @@ -446,7 +462,16 @@ public void TestEncodeR50kBase() [InlineData("code-search-babbage-code-001")] [InlineData("code-search-ada-code-001")] [InlineData("gpt2")] + [InlineData("gpt-2")] [InlineData("phi-4")] + [InlineData("gpt-oss-")] + [InlineData("gpt-oss-120b")] + [InlineData("gpt-oss-20b")] + [InlineData("ft:gpt-4o")] + [InlineData("ft:gpt-4")] + [InlineData("ft:gpt-3.5-turbo")] + [InlineData("ft:davinci-002")] + [InlineData("ft:babbage-002")] public void TestAllSupportedModelNames(string modelName) { Tokenizer tokenizer = TiktokenTokenizer.CreateForModel(modelName); @@ -460,6 +485,7 @@ public void TestAllSupportedModelNames(string modelName) [InlineData("p50k_edit")] [InlineData("cl100k_base")] [InlineData("o200k_base")] + [InlineData("o200k_harmony")] public void TestAllSupportedEncodingNames(string encodingName) { Tokenizer tokenizer = TiktokenTokenizer.CreateForEncoding(encodingName); @@ -473,6 +499,7 @@ public void TestAllSupportedEncodingNames(string encodingName) "p50k_edit" => "text-davinci-edit-001", "cl100k_base" => "gpt-4", "o200k_base" => "gpt-4o", + "o200k_harmony" => "gpt-oss-120b", _ => throw new ArgumentException("Invalid encoding name"), }; @@ -499,16 +526,21 @@ public void TestEncodingNamesNegativeCases() Assert.Throws(() => TiktokenTokenizer.CreateForEncoding("p50k_edit_")); Assert.Throws(() => TiktokenTokenizer.CreateForEncoding("cl100k_base_")); Assert.Throws(() => TiktokenTokenizer.CreateForEncoding("o200k_base_")); + Assert.Throws(() => TiktokenTokenizer.CreateForEncoding("o200k_harmony_")); } [InlineData("gpt-4")] + [InlineData("gpt-4.1")] [InlineData("gpt-4o")] + [InlineData("gpt-5")] [InlineData("o1")] [InlineData("o3")] + [InlineData("o4-mini")] [InlineData("text-davinci-003")] [InlineData("text-curie-001")] [InlineData("text-davinci-edit-001")] [InlineData("phi-4")] + [InlineData("gpt-oss-20b")] [ConditionalTheory(typeof(RemoteExecutor), nameof(RemoteExecutor.IsSupported))] public void TestCreationUsingModel(string modelName) { @@ -752,6 +784,53 @@ public void TestPhi4SpecialCases() Assert.Equal(text, Phi4.Decode(encoded)); } + [Fact] + public void TestOss() + { + Assert.Equal( + new Dictionary + { + { "<|startoftext|>", 199998 }, + { "<|endoftext|>", 199999 }, + { "<|reserved_200000|>", 200000 }, + { "<|reserved_200001|>", 200001 }, + { "<|return|>", 200002 }, + { "<|constrain|>", 200003 }, + { "<|reserved_200004|>", 200004 }, + { "<|channel|>", 200005 }, + { "<|start|>", 200006 }, + { "<|end|>", 200007 }, + { "<|message|>", 200008 }, + { "<|reserved_200009|>", 200009 }, + { "<|reserved_200010|>", 200010 }, + { "<|reserved_200011|>", 200011 }, + { "<|call|>", 200012 }, + { "<|reserved_200013|>", 200013 }, + { "<|reserved_200014|>", 200014 }, + { "<|reserved_200015|>", 200015 }, + { "<|reserved_200016|>", 200016 }, + { "<|reserved_200017|>", 200017 }, + { "<|endofprompt|>", 200018 }, + }, GptOss.SpecialTokens); + + string text = "<|startoftext|><|start|><|message|>Hello World<|end|><|endoftext|>"; + + IReadOnlyList ids = GptOss.EncodeToIds(text); + + Assert.Equal( + new List { 199998, 200006, 200008, 13225, 5922, 200007, 199999 }, + ids); + Assert.Equal(text, GptOss.Decode(ids)); + + Assert.Equal(new string[] { "<|startoftext|>", "<|start|>", "<|message|>", "Hello", " World", "<|end|>", "<|endoftext|>" }, + GptOss.EncodeToTokens(text, out _).Select(t => t.Value).ToArray()); + + Assert.Equal(new List<(int, int)> { (0, 15), (15, 24), (24, 35), (35, 40), (40, 46), (46, 53), (53, 66) }, + GptOss.EncodeToTokens(text, out _).Select(t => (t.Offset.Start.Value, t.Offset.End.Value)).ToList()); + + Assert.Equal(ids, GptOss.EncodeToTokens(text, out _).Select(t => t.Id).ToList()); + } + // We are not exposing the Encoder, Decoder, or Vocabulary so far. For now, use reflection to test it. private static IReadOnlyDictionary, int>? GetEncoder(TiktokenTokenizer tiktoken) => typeof(TiktokenTokenizer).GetProperty("Encoder", BindingFlags.Instance | BindingFlags.NonPublic)?.GetValue(tiktoken) as IReadOnlyDictionary, int>; diff --git a/test/Microsoft.ML.TorchSharp.Tests/Microsoft.ML.TorchSharp.Tests.csproj b/test/Microsoft.ML.TorchSharp.Tests/Microsoft.ML.TorchSharp.Tests.csproj index 5fed431948..7e28551e01 100644 --- a/test/Microsoft.ML.TorchSharp.Tests/Microsoft.ML.TorchSharp.Tests.csproj +++ b/test/Microsoft.ML.TorchSharp.Tests/Microsoft.ML.TorchSharp.Tests.csproj @@ -29,12 +29,12 @@ - - - - + + + + - + diff --git a/test/NightlyBuildDependency.props b/test/NightlyBuildDependency.props index 7928333c4b..bb5a3d9618 100644 --- a/test/NightlyBuildDependency.props +++ b/test/NightlyBuildDependency.props @@ -2,15 +2,15 @@ - - - - - - - - - - + + + + + + + + + + diff --git a/test/TestFrameworkDependency.props b/test/TestFrameworkDependency.props index eec1c9c73b..368a3dfb4a 100644 --- a/test/TestFrameworkDependency.props +++ b/test/TestFrameworkDependency.props @@ -5,6 +5,6 @@ - + diff --git a/tools-local/Microsoft.ML.AutoML.SourceGenerator/Microsoft.ML.AutoML.SourceGenerator.csproj b/tools-local/Microsoft.ML.AutoML.SourceGenerator/Microsoft.ML.AutoML.SourceGenerator.csproj index 575b12c03e..bf7b6c8af5 100644 --- a/tools-local/Microsoft.ML.AutoML.SourceGenerator/Microsoft.ML.AutoML.SourceGenerator.csproj +++ b/tools-local/Microsoft.ML.AutoML.SourceGenerator/Microsoft.ML.AutoML.SourceGenerator.csproj @@ -6,16 +6,17 @@ $(GetTargetPathDependsOn);GetDependencyTargetPaths false true + + $(NoWarn);RS1042 - - - - - - - + + + + + + diff --git a/tools-local/Microsoft.ML.InternalCodeAnalyzer/BaseTestClassAnalyzer.cs b/tools-local/Microsoft.ML.InternalCodeAnalyzer/BaseTestClassAnalyzer.cs index ed45d59a72..ab39be9a01 100644 --- a/tools-local/Microsoft.ML.InternalCodeAnalyzer/BaseTestClassAnalyzer.cs +++ b/tools-local/Microsoft.ML.InternalCodeAnalyzer/BaseTestClassAnalyzer.cs @@ -17,7 +17,7 @@ public sealed class BaseTestClassAnalyzer : DiagnosticAnalyzer internal const string DiagnosticId = "MSML_ExtendBaseTestClass"; private const string Title = "Test classes should be derived from BaseTestClass or FunctionalTestBaseClass"; - private const string Format = "Test class '{0}' should extend BaseTestClass or FunctionalTestBaseClass."; + private const string Format = "Test class '{0}' should extend BaseTestClass or FunctionalTestBaseClass"; private const string Description = "Test classes should be derived from BaseTestClass or FunctionalTestBaseClass."; @@ -52,7 +52,7 @@ private sealed class AnalyzerImpl private readonly INamedTypeSymbol _factAttribute; private readonly INamedTypeSymbol _baseTestClass; private readonly INamedTypeSymbol _ITbaseTestClass; - private readonly ConcurrentDictionary _knownTestAttributes = new ConcurrentDictionary(); + private readonly ConcurrentDictionary _knownTestAttributes = new ConcurrentDictionary(SymbolEqualityComparer.Default); public AnalyzerImpl(Compilation compilation, INamedTypeSymbol factAttribute) { @@ -89,8 +89,8 @@ private bool ExtendsBaseTestClass(INamedTypeSymbol namedType) for (var current = namedType; current is object; current = current.BaseType) { - if (Equals(current, _baseTestClass) || - Equals(current, _ITbaseTestClass)) + if (SymbolEqualityComparer.Default.Equals(current, _baseTestClass) || + SymbolEqualityComparer.Default.Equals(current, _ITbaseTestClass)) return true; } diff --git a/tools-local/Microsoft.ML.InternalCodeAnalyzer/BestFriendAnalyzer.cs b/tools-local/Microsoft.ML.InternalCodeAnalyzer/BestFriendAnalyzer.cs index 605f71184c..9ec7e80ae0 100644 --- a/tools-local/Microsoft.ML.InternalCodeAnalyzer/BestFriendAnalyzer.cs +++ b/tools-local/Microsoft.ML.InternalCodeAnalyzer/BestFriendAnalyzer.cs @@ -17,10 +17,10 @@ public sealed class BestFriendAnalyzer : DiagnosticAnalyzer private const string Category = "Access"; internal const string DiagnosticId = "MSML_NoBestFriendInternal"; - private const string Title = "Cross-assembly internal access requires referenced item to have " + AttributeName + " attribute."; + private const string Title = "Cross-assembly internal access requires referenced item to have " + AttributeName + " attribute"; private const string Format = "Access of '{0}' is a cross assembly internal " + "reference, and the declaring assembly wants these accesses to be on something " + - "with the attribute " + AttributeName + "."; + "with the attribute " + AttributeName; private const string Description = "The identifier indicated is defined as an internal member of an assembly that has the " + AssemblyAttributeName + " assembly-level attribute set. Even with friend access to that " + @@ -60,7 +60,7 @@ private void AnalyzeCore(SemanticModelAnalysisContext context, string attributeN return; var myAssembly = comp.Assembly; - var assemblyHasAttrMap = new Dictionary(); + var assemblyHasAttrMap = new Dictionary(SymbolEqualityComparer.Default); int count = 0; foreach (var node in model.SyntaxTree.GetRoot().DescendantNodes(n => !n.IsKind(SyntaxKind.UsingDirective))) @@ -78,7 +78,7 @@ private void AnalyzeCore(SemanticModelAnalysisContext context, string attributeN if (symbol == null) continue; var symbolAssembly = symbol.ContainingAssembly; - if (Equals(symbolAssembly, myAssembly)) + if (SymbolEqualityComparer.Default.Equals(symbolAssembly, myAssembly)) continue; switch (symbol.DeclaredAccessibility) { @@ -96,12 +96,12 @@ private void AnalyzeCore(SemanticModelAnalysisContext context, string attributeN // It's the first of seeing the assembly containing symbol. A key-value pair is added into assemblyHasAttrMap to // indicate if that assembly includes an attribute WantsToBeBestFriends. If an assembly has WantsToBeBestFriends then // its associated value would be true. - assemblyWantsBestFriends = symbolAssembly.GetAttributes().Any(a => Equals(a.AttributeClass, wantsToBeBestFriendsAttributeType)); + assemblyWantsBestFriends = symbolAssembly.GetAttributes().Any(a => SymbolEqualityComparer.Default.Equals(a.AttributeClass, wantsToBeBestFriendsAttributeType)); assemblyHasAttrMap[symbolAssembly] = assemblyWantsBestFriends; } if (!assemblyWantsBestFriends) continue; - if (symbol.GetAttributes().Any(a => Equals(a.AttributeClass, bestFriendAttributeType))) + if (symbol.GetAttributes().Any(a => SymbolEqualityComparer.Default.Equals(a.AttributeClass, bestFriendAttributeType))) { // You're not just a friend, you're my best friend! continue; diff --git a/tools-local/Microsoft.ML.InternalCodeAnalyzer/BestFriendOnPublicDeclarationsAnalyzer.cs b/tools-local/Microsoft.ML.InternalCodeAnalyzer/BestFriendOnPublicDeclarationsAnalyzer.cs index 8b1821013a..d44815c091 100644 --- a/tools-local/Microsoft.ML.InternalCodeAnalyzer/BestFriendOnPublicDeclarationsAnalyzer.cs +++ b/tools-local/Microsoft.ML.InternalCodeAnalyzer/BestFriendOnPublicDeclarationsAnalyzer.cs @@ -16,8 +16,8 @@ public sealed class BestFriendOnPublicDeclarationsAnalyzer : DiagnosticAnalyzer private const string Category = "Access"; internal const string DiagnosticId = "MSML_BestFriendOnPublicDeclaration"; - private const string Title = "Public declarations should not have " + AttributeName + " attribute."; - private const string Format = "The " + AttributeName + " should not be applied to publicly visible members."; + private const string Title = "Public declarations should not have " + AttributeName + " attribute"; + private const string Format = "The " + AttributeName + " should not be applied to publicly visible members"; private const string Description = "The " + AttributeName + " attribute is not valid on public identifiers."; @@ -59,7 +59,7 @@ private void AnalyzeCore(SymbolAnalysisContext context, INamedTypeSymbol attribu if (context.Symbol.DeclaredAccessibility != Accessibility.Public) return; - var attribute = context.Symbol.GetAttributes().FirstOrDefault(a => Equals(a.AttributeClass, attributeType)); + var attribute = context.Symbol.GetAttributes().FirstOrDefault(a => SymbolEqualityComparer.Default.Equals(a.AttributeClass, attributeType)); if (attribute == null) return; diff --git a/tools-local/Microsoft.ML.InternalCodeAnalyzer/INamedTypeSymbolExtensions.cs b/tools-local/Microsoft.ML.InternalCodeAnalyzer/INamedTypeSymbolExtensions.cs index 38b414bc8b..2ad6272e92 100644 --- a/tools-local/Microsoft.ML.InternalCodeAnalyzer/INamedTypeSymbolExtensions.cs +++ b/tools-local/Microsoft.ML.InternalCodeAnalyzer/INamedTypeSymbolExtensions.cs @@ -23,7 +23,7 @@ private static bool ExtendsFactAttribute(INamedTypeSymbol namedType, INamedTypeS Debug.Assert(factAttribute is object); for (var current = namedType; current is object; current = current.BaseType) { - if (Equals(current, factAttribute)) + if (SymbolEqualityComparer.Default.Equals(current, factAttribute)) return true; } diff --git a/tools-local/Microsoft.ML.InternalCodeAnalyzer/Microsoft.ML.InternalCodeAnalyzer.csproj b/tools-local/Microsoft.ML.InternalCodeAnalyzer/Microsoft.ML.InternalCodeAnalyzer.csproj index 44b410a05e..5981ee82e0 100644 --- a/tools-local/Microsoft.ML.InternalCodeAnalyzer/Microsoft.ML.InternalCodeAnalyzer.csproj +++ b/tools-local/Microsoft.ML.InternalCodeAnalyzer/Microsoft.ML.InternalCodeAnalyzer.csproj @@ -2,13 +2,14 @@ netstandard2.0 + + $(NoWarn);RS1036;RS1038 - - - - + + + diff --git a/tools-local/Microsoft.ML.InternalCodeAnalyzer/NameFixProvider.cs b/tools-local/Microsoft.ML.InternalCodeAnalyzer/NameFixProvider.cs index 94a3b7307f..d0ed78aa50 100644 --- a/tools-local/Microsoft.ML.InternalCodeAnalyzer/NameFixProvider.cs +++ b/tools-local/Microsoft.ML.InternalCodeAnalyzer/NameFixProvider.cs @@ -31,7 +31,7 @@ public sealed class NameFixProvider : CodeFixProvider { private const string PrivateTitle = "Fix name"; - private static ImmutableArray _fixable = ImmutableArray.Create( + private static readonly ImmutableArray _fixable = ImmutableArray.Create( NameAnalyzer.PrivateFieldName.Id, NameAnalyzer.GeneralName.Id, ParameterVariableNameAnalyzer.Id, TypeParamNameAnalyzer.Id); @@ -111,7 +111,8 @@ private async Task RenameAsync(Document document, // Produce a new solution that has all references to that type renamed, including the declaration. var originalSolution = document.Project.Solution; var optionSet = originalSolution.Workspace.Options; - var newSolution = await Renamer.RenameSymbolAsync(document.Project.Solution, typeSymbol, newName, optionSet, cancellationToken).ConfigureAwait(false); + SymbolRenameOptions renameOptions = new SymbolRenameOptions(); + var newSolution = await Renamer.RenameSymbolAsync(document.Project.Solution, typeSymbol, renameOptions, newName, cancellationToken).ConfigureAwait(false); // Return the new solution with the now-uppercase type name. return newSolution; diff --git a/tools-local/Microsoft.ML.InternalCodeAnalyzer/RelaxTestNamingSuppressor.cs b/tools-local/Microsoft.ML.InternalCodeAnalyzer/RelaxTestNamingSuppressor.cs index cdd4d6bd2b..3f74d7ea12 100644 --- a/tools-local/Microsoft.ML.InternalCodeAnalyzer/RelaxTestNamingSuppressor.cs +++ b/tools-local/Microsoft.ML.InternalCodeAnalyzer/RelaxTestNamingSuppressor.cs @@ -28,7 +28,7 @@ public override void ReportSuppressions(SuppressionAnalysisContext context) return; } - var knownTestAttributes = new ConcurrentDictionary(); + var knownTestAttributes = new ConcurrentDictionary(SymbolEqualityComparer.Default); foreach (var diagnostic in context.ReportedDiagnostics) {