Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions src/main/java/de/kherud/llama/ModelParameters.java
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,10 @@ public ModelParameters setJsonSchema(String schema) {
* Set pooling type for embeddings (default: model default if unspecified).
*/
public ModelParameters setPoolingType(PoolingType type) {
parameters.put("--pooling", type.getArgValue());
if (type != PoolingType.UNSPECIFIED) {
// Don't set if unspecified, as it will use the model's default pooling type
parameters.put("--pooling", type.name().toLowerCase());
}
return this;
}

Expand Down Expand Up @@ -960,5 +963,3 @@ public ModelParameters enableJinja() {
}

}


43 changes: 43 additions & 0 deletions src/test/java/de/kherud/llama/LlamaEmbeddingsTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package de.kherud.llama;

import de.kherud.llama.args.PoolingType;
import org.junit.*;

import java.lang.management.ManagementFactory;
import java.lang.management.RuntimeMXBean;

public class LlamaEmbeddingsTest {

private static final String modelPath = "models/codellama-7b.Q2_K.gguf";
private static LlamaModel model;

@BeforeClass
public static void setup() {
// Print PID of the current process to attach with GDB
// Remember to set 'echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope' to attach.
RuntimeMXBean runtime = ManagementFactory.getRuntimeMXBean();
System.out.println("PID: " + runtime.getName().split("@")[0]);
}

@After
public void tearDownTest() {
if (model != null) {
model.close();
}
}

@Test
public void testEmbeddingTypes() {
for (PoolingType type : PoolingType.values()) {
System.out.println("Testing embedding with pooling type: " + type);
if (type == PoolingType.RANK) {
continue; // Only supported by reranking models
}
model = new LlamaModel(new ModelParameters().setModel(modelPath).setGpuLayers(99).enableEmbedding().setPoolingType(type));
String text = "This is a test sentence for embedding.";
float[] embedding = model.embed(text);
Assert.assertEquals(4096, embedding.length);
model.close();
}
}
}