From 346eaa88cff697db4ecf750b68e4baeac42f9dc8 Mon Sep 17 00:00:00 2001 From: Christopher Hertel Date: Sat, 28 Sep 2024 19:16:04 +0200 Subject: [PATCH] feat: add voyage embeddings model --- .env | 3 ++ README.md | 34 ++++++++++---- examples/embeddings-openai.php | 21 +++++++++ examples/embeddings-voyage.php | 20 ++++++++ src/Anthropic/Model/Claude.php | 10 ++-- .../{ClaudePlatform.php => Platform.php} | 2 +- src/Anthropic/Platform/Anthropic.php | 4 +- src/DocumentEmbedder.php | 2 +- src/EmbeddingModel.php | 19 -------- src/EmbeddingsModel.php | 23 ++++++++++ src/OpenAI/Model/Embeddings.php | 8 ++-- src/ToolBox/Tool/SimilaritySearch.php | 4 +- src/Voyage/Model/Voyage.php | 37 +++++++++++++++ src/Voyage/Model/Voyage/Version.php | 46 +++++++++++++++++++ src/Voyage/Platform.php | 15 ++++++ src/Voyage/Platform/Voyage.php | 28 +++++++++++ 16 files changed, 233 insertions(+), 43 deletions(-) create mode 100755 examples/embeddings-openai.php create mode 100755 examples/embeddings-voyage.php rename src/Anthropic/{ClaudePlatform.php => Platform.php} (89%) delete mode 100644 src/EmbeddingModel.php create mode 100644 src/EmbeddingsModel.php create mode 100644 src/Voyage/Model/Voyage.php create mode 100644 src/Voyage/Model/Voyage/Version.php create mode 100644 src/Voyage/Platform.php create mode 100644 src/Voyage/Platform/Voyage.php diff --git a/.env b/.env index 88d9102f..2a8b994a 100644 --- a/.env +++ b/.env @@ -6,6 +6,9 @@ OPENAI_API_KEY= # For using Claude on Anthropic ANTHROPIC_API_KEY= +# For using Voyage +VOYAGE_API_KEY= + # For using GPT on Azure AZURE_OPENAI_BASEURL= AZURE_OPENAI_DEPLOYMENT= diff --git a/README.md b/README.md index 1c8b4084..842eaba2 100644 --- a/README.md +++ b/README.md @@ -29,15 +29,17 @@ Currently supported models and platforms: | Vendor | Model | Platform | |----------------|------------------------|----------------------| | **OpenAI** | - GPT
- Embeddings | - OpenAI
- Azure | -| **Anthropic** | - Claude | - Anthropic | +| **Anthropic** | - Claude | - Anthropic | +| **Voyage** | - Voyage | - Voyage | Planned Models & Platforms (not implemented yet): -| Vendor | Model | Platform | -|----------------|------------------------|----------------------------------| -| **Anthropic** | - Voyage | - GPC
- AWS | -| **Google** | - Gemini
- Gemma | - GPC | -| **Meta** | - Llama | - Meta AI
- GPC
- Ollama | +| Vendor | Model | Platform | +|----------------|---------------------------|----------------------------------| +| **Anthropic** | - Claude | - GPC
- AWS | +| **Voyage** | - Voyage | - AWS | +| **Google** | - Gemini
- Gemma | - GPC | +| **Meta** | - Llama | - Meta AI
- GPC
- Ollama | | **Mistral AI** | - Mistral
- Codestral | - Mistral
- GPT
- Ollama | Supported Stores @@ -72,13 +74,13 @@ To run all examples, just use `make run-all-examples`. ### Chat Examples -1. Chat Example: OpenAI's GPT +1. OpenAI's GPT ```bash export OPENAI_API_KEY=sk-... php examples/chat-gpt-openai.php ``` -1. Chat Example: OpenAI's GPT With Azure +1. OpenAI's GPT with Azure ```bash export AZURE_OPENAI_BASEURL=... // e.g. your-resource.openai.azure.com export AZURE_OPENAI_DEPLOYMENT=... @@ -87,12 +89,26 @@ To run all examples, just use `make run-all-examples`. php examples/chat-gpt-azure.php ``` -1. Chat Example: Anthropic's Claude +1. Anthropic's Claude ```bash export ANTHROPIC_API_KEY=sk-... php examples/chat-claude-anthropic.php ``` +### Embeddings Examples + +1. OpenAI's Emebddings + ```bash + export OPENAI_API_KEY=sk-... + php examples/embeddings-openai.php + ``` + +1. Voyage's Embeddings + ```bash + export VOYAGE_API_KEY=sk-... + php examples/embeddings-voyage.php + ``` + ### Tool Examples 1. Simple Clock Tool diff --git a/examples/embeddings-openai.php b/examples/embeddings-openai.php new file mode 100755 index 00000000..ab13b7e0 --- /dev/null +++ b/examples/embeddings-openai.php @@ -0,0 +1,21 @@ +loadEnv(dirname(__DIR__).'/.env'); + +$platform = new OpenAI(HttpClient::create(), $_ENV['OPENAI_API_KEY']); +$embeddings = new Embeddings($platform, Version::textEmbedding3Small()); + +$vector = $embeddings->create(<<getDimensions().PHP_EOL; diff --git a/examples/embeddings-voyage.php b/examples/embeddings-voyage.php new file mode 100755 index 00000000..80eb5934 --- /dev/null +++ b/examples/embeddings-voyage.php @@ -0,0 +1,20 @@ +loadEnv(dirname(__DIR__).'/.env'); + +$platform = new VoyagePlatform(HttpClient::create(), $_ENV['VOYAGE_API_KEY']); +$embeddings = new Voyage($platform); + +$vector = $embeddings->create(<<getDimensions().PHP_EOL; diff --git a/src/Anthropic/Model/Claude.php b/src/Anthropic/Model/Claude.php index 52ef7cc2..55c3c4b9 100644 --- a/src/Anthropic/Model/Claude.php +++ b/src/Anthropic/Model/Claude.php @@ -4,8 +4,8 @@ namespace PhpLlm\LlmChain\Anthropic\Model; -use PhpLlm\LlmChain\Anthropic\ClaudePlatform; use PhpLlm\LlmChain\Anthropic\Model\Claude\Version; +use PhpLlm\LlmChain\Anthropic\Platform; use PhpLlm\LlmChain\LanguageModel; use PhpLlm\LlmChain\Message\MessageBag; use PhpLlm\LlmChain\Response\Choice; @@ -14,10 +14,10 @@ final class Claude implements LanguageModel { /** - * @param array $options The default options for the model usage + * @param array $options The default options for the model usage */ public function __construct( - private readonly ClaudePlatform $platform, + private readonly Platform $platform, private ?Version $version = null, private readonly array $options = ['temperature' => 1.0, 'max_tokens' => 1000], ) { @@ -25,8 +25,8 @@ public function __construct( } /** - * @param array $options The options to be used for this specific call. - * Can overwrite default options. + * @param array $options The options to be used for this specific call. + * Can overwrite default options. */ public function call(MessageBag $messages, array $options = []): Response { diff --git a/src/Anthropic/ClaudePlatform.php b/src/Anthropic/Platform.php similarity index 89% rename from src/Anthropic/ClaudePlatform.php rename to src/Anthropic/Platform.php index a6816bbe..fa97f329 100644 --- a/src/Anthropic/ClaudePlatform.php +++ b/src/Anthropic/Platform.php @@ -4,7 +4,7 @@ namespace PhpLlm\LlmChain\Anthropic; -interface ClaudePlatform +interface Platform { /** * @param array $body diff --git a/src/Anthropic/Platform/Anthropic.php b/src/Anthropic/Platform/Anthropic.php index c692581c..a0f155fe 100644 --- a/src/Anthropic/Platform/Anthropic.php +++ b/src/Anthropic/Platform/Anthropic.php @@ -4,10 +4,10 @@ namespace PhpLlm\LlmChain\Anthropic\Platform; -use PhpLlm\LlmChain\Anthropic\ClaudePlatform; +use PhpLlm\LlmChain\Anthropic\Platform; use Symfony\Contracts\HttpClient\HttpClientInterface; -final readonly class Anthropic implements ClaudePlatform +final readonly class Anthropic implements Platform { public function __construct( private HttpClientInterface $httpClient, diff --git a/src/DocumentEmbedder.php b/src/DocumentEmbedder.php index b7ff38a9..2f300d8e 100644 --- a/src/DocumentEmbedder.php +++ b/src/DocumentEmbedder.php @@ -10,7 +10,7 @@ final readonly class DocumentEmbedder { public function __construct( - private EmbeddingModel $embeddings, + private EmbeddingsModel $embeddings, private StoreInterface $store, ) { } diff --git a/src/EmbeddingModel.php b/src/EmbeddingModel.php deleted file mode 100644 index 405533a9..00000000 --- a/src/EmbeddingModel.php +++ /dev/null @@ -1,19 +0,0 @@ - $texts - * - * @return list - */ - public function multiCreate(array $texts): array; -} diff --git a/src/EmbeddingsModel.php b/src/EmbeddingsModel.php new file mode 100644 index 00000000..859e37a2 --- /dev/null +++ b/src/EmbeddingsModel.php @@ -0,0 +1,23 @@ + $options + */ + public function create(string $text, array $options = []): Vector; + + /** + * @param list $texts + * @param array $options + * + * @return Vector[] + */ + public function multiCreate(array $texts, array $options = []): array; +} diff --git a/src/OpenAI/Model/Embeddings.php b/src/OpenAI/Model/Embeddings.php index 2c5669a8..249c83c8 100644 --- a/src/OpenAI/Model/Embeddings.php +++ b/src/OpenAI/Model/Embeddings.php @@ -5,11 +5,11 @@ namespace PhpLlm\LlmChain\OpenAI\Model; use PhpLlm\LlmChain\Document\Vector; -use PhpLlm\LlmChain\EmbeddingModel; +use PhpLlm\LlmChain\EmbeddingsModel; use PhpLlm\LlmChain\OpenAI\Model\Embeddings\Version; use PhpLlm\LlmChain\OpenAI\Platform; -final class Embeddings implements EmbeddingModel +final class Embeddings implements EmbeddingsModel { public function __construct( private readonly Platform $platform, @@ -18,14 +18,14 @@ public function __construct( $this->version ??= Version::textEmbedding3Small(); } - public function create(string $text): Vector + public function create(string $text, array $options = []): Vector { $response = $this->platform->request('embeddings', $this->createBody($text)); return $this->extractVector($response); } - public function multiCreate(array $texts): array + public function multiCreate(array $texts, array $options = []): array { $bodies = array_map([$this, 'createBody'], $texts); diff --git a/src/ToolBox/Tool/SimilaritySearch.php b/src/ToolBox/Tool/SimilaritySearch.php index 70eba369..949fa8aa 100644 --- a/src/ToolBox/Tool/SimilaritySearch.php +++ b/src/ToolBox/Tool/SimilaritySearch.php @@ -5,7 +5,7 @@ namespace PhpLlm\LlmChain\ToolBox\Tool; use PhpLlm\LlmChain\Document\Document; -use PhpLlm\LlmChain\EmbeddingModel; +use PhpLlm\LlmChain\EmbeddingsModel; use PhpLlm\LlmChain\Store\VectorStoreInterface; use PhpLlm\LlmChain\ToolBox\AsTool; @@ -18,7 +18,7 @@ final class SimilaritySearch public array $usedDocuments = []; public function __construct( - private readonly EmbeddingModel $embeddings, + private readonly EmbeddingsModel $embeddings, private readonly VectorStoreInterface $vectorStore, ) { } diff --git a/src/Voyage/Model/Voyage.php b/src/Voyage/Model/Voyage.php new file mode 100644 index 00000000..98f694fa --- /dev/null +++ b/src/Voyage/Model/Voyage.php @@ -0,0 +1,37 @@ +version ??= Version::v3(); + } + + public function create(string $text, array $options = []): Vector + { + $vectors = $this->multiCreate([$text], $options); + + return $vectors[0]; + } + + public function multiCreate(array $texts, array $options = []): array + { + $response = $this->platform->request(array_merge($options, [ + 'model' => $this->version->name, + 'input' => $texts, + ])); + + return array_map(fn (array $data) => new Vector($data['embedding']), $response['data']); + } +} diff --git a/src/Voyage/Model/Voyage/Version.php b/src/Voyage/Model/Voyage/Version.php new file mode 100644 index 00000000..77ffd2f3 --- /dev/null +++ b/src/Voyage/Model/Voyage/Version.php @@ -0,0 +1,46 @@ + $body + * + * @return array + */ + public function request(array $body): array; +} diff --git a/src/Voyage/Platform/Voyage.php b/src/Voyage/Platform/Voyage.php new file mode 100644 index 00000000..6f03058e --- /dev/null +++ b/src/Voyage/Platform/Voyage.php @@ -0,0 +1,28 @@ +httpClient->request('POST', 'https://api.voyageai.com/v1/embeddings', [ + 'headers' => ['Content-Type' => 'application/json'], + 'auth_bearer' => $this->apiKey, + 'json' => $body, + ]); + + return $response->toArray(); + } +}