序
本文主要研究一下如何使用langchain4j+Jllama调用大语言模型。
步骤
pom.xml
<properties>
<maven.compiler.source>21</maven.compiler.source>
<maven.compiler.target>21</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<logback.version>1.5.6</logback.version>
<jlama.version>0.8.3</jlama.version>
</properties>
<dependencies>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-jlama</artifactId>
<version>1.0.0-beta1</version>
<exclusions>
<exclusion>
<groupId>com.github.tjake</groupId>
<artifactId>jlama-core</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- Add native jlama bindings -->
<dependency>
<groupId>com.github.tjake</groupId>
<artifactId>jlama-native</artifactId>
<classifier>${os.detected.classifier}</classifier>
<version>${jlama.version}</version>
</dependency>
<dependency>
<groupId>com.github.tjake</groupId>
<artifactId>jlama-core</artifactId>
<version>${jlama.version}</version>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j</artifactId>
<version>1.0.0-beta1</version>
</dependency>
</dependencies>
<build>
<extensions>
<extension>
<groupId>kr.motd.maven</groupId>
<artifactId>os-maven-plugin</artifactId>
<version>1.7.1</version>
</extension>
</extensions>
</build>
下载模型文件
- 手工下载
比如http://hf-mirror.com/tjake/Llama-3.2-1B-Instruct-JQ4/tree/main,可以通过git下载下来,需要git支持lfs文件下载 - 自动下载
jlama会自动下载到~/.jlama/models/目录下,文件夹命名为tjake_Llama-3.2-1B-Instruct-JQ4
但是默认是使用https://huggingface.co,可能访问不到,需要替换为https://hf-mirror.com
运行
public class JlamaChatModelExamples {
static class Simple_Prompt {
public static void main(String[] args) {
ChatLanguageModel model = JlamaChatModel.builder()
.modelName("tjake/Llama-3.2-1B-Instruct-JQ4")
.temperature(0.3f)
.build();
ChatResponse chatResponse = model.chat(
SystemMessage.from("You are helpful chatbot who is a java expert."),
UserMessage.from("Write a java program to print hello world.")
);
System.out.println("\n" + chatResponse.aiMessage().text() + "\n");
}
}
}
idea运行要加上--add-modules=jdk.incubator.vector --enable-native-access=ALL-UNNAMED
不过example的模型不支持ARM,报错如下
WARNING: Using incubator modules: jdk.incubator.vector
WARN c.g.t.j.t.o.TensorOperationsProvider - Native operations not available. Consider adding 'com.github.tjake:jlama-native' to the classpath
INFO c.g.t.j.t.o.TensorOperationsProvider - Using Panama Vector Operations (OffHeap)
INFO c.g.tjake.jlama.model.AbstractModel - Model type = Q4, Working memory type = F32, Quantized memory type = I8
Exception in thread "main" java.lang.UnsupportedOperationException: java.lang.UnsupportedOperationException: java.lang.UnsupportedOperationException: ARM_128
at java.base/jdk.internal.reflect.DirectConstructorHandleAccessor.newInstance(DirectConstructorHandleAccessor.java:62)
at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:502)
at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:486)
at java.base/java.util.concurrent.ForkJoinTask.getThrowableException(ForkJoinTask.java:540)
at java.base/java.util.concurrent.ForkJoinTask.reportException(ForkJoinTask.java:567)
at java.base/java.util.concurrent.ForkJoinTask.join(ForkJoinTask.java:653)
at com.github.tjake.jlama.util.PhysicalCoreExecutor.execute(PhysicalCoreExecutor.java:55)
at com.github.tjake.jlama.math.VectorMath.pchunk(VectorMath.java:56)
at com.github.tjake.jlama.model.AbstractModel.sample(AbstractModel.java:422)
at com.github.tjake.jlama.model.AbstractModel.generate(AbstractModel.java:550)
at dev.langchain4j.model.jlama.JlamaChatModel.generate(JlamaChatModel.java:117)
at dev.langchain4j.model.jlama.JlamaChatModel.generate(JlamaChatModel.java:70)
at dev.langchain4j.model.chat.ChatLanguageModel.doChat(ChatLanguageModel.java:114)
at dev.langchain4j.model.chat.ChatLanguageModel.chat(ChatLanguageModel.java:58)
at dev.langchain4j.model.chat.ChatLanguageModel.chat(ChatLanguageModel.java:86)
at JlamaChatModelExamples$Simple_Prompt.main(JlamaChatModelExamples.java:18)
Caused by: java.lang.UnsupportedOperationException: java.lang.UnsupportedOperationException: ARM_128
at java.base/jdk.internal.reflect.DirectConstructorHandleAccessor.newInstance(DirectConstructorHandleAccessor.java:62)
at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:502)
at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:486)
at java.base/java.util.concurrent.ForkJoinTask.getThrowableException(ForkJoinTask.java:540)
at java.base/java.util.concurrent.ForkJoinTask.reportException(ForkJoinTask.java:567)
at java.base/java.util.concurrent.ForkJoinTask.invoke(ForkJoinTask.java:670)
at java.base/java.util.stream.ForEachOps$ForEachOp.evaluateParallel(ForEachOps.java:160)
at java.base/java.util.stream.ForEachOps$ForEachOp$OfInt.evaluateParallel(ForEachOps.java:189)
at java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:233)
at java.base/java.util.stream.IntPipeline.forEach(IntPipeline.java:463)
at java.base/java.util.stream.IntPipeline$Head.forEach(IntPipeline.java:620)
at com.github.tjake.jlama.math.VectorMath.lambda$pchunk$2(VectorMath.java:59)
at java.base/java.util.concurrent.ForkJoinTask$AdaptedRunnableAction.exec(ForkJoinTask.java:1403)
at java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:387)
at java.base/java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec(ForkJoinPool.java:1312)
at java.base/java.util.concurrent.ForkJoinPool.scan(ForkJoinPool.java:1843)
at java.base/java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1808)
at java.base/java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:188)
Caused by: java.lang.UnsupportedOperationException: ARM_128
at com.github.tjake.jlama.tensor.operations.PanamaTensorOperations.batchDotProduct(PanamaTensorOperations.java:125)
at com.github.tjake.jlama.tensor.operations.TensorOperations.dotProductChunk(TensorOperations.java:76)
at com.github.tjake.jlama.model.AbstractModel.lambda$sample$4(AbstractModel.java:424)
at com.github.tjake.jlama.math.VectorMath.lambda$pchunk$1(VectorMath.java:60)
at java.base/java.util.stream.ForEachOps$ForEachOp$OfInt.accept(ForEachOps.java:205)
at java.base/java.util.stream.Streams$RangeIntSpliterator.forEachRemaining(Streams.java:104)
at java.base/java.util.Spliterator$OfInt.forEachRemaining(Spliterator.java:712)
at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509)
at java.base/java.util.stream.ForEachOps$ForEachTask.compute(ForEachOps.java:291)
at java.base/java.util.concurrent.CountedCompleter.exec(CountedCompleter.java:754)
... 5 more
小结
Jlama是一个java类型用于运行大语言模型,它使用Java 20+版本构建,使用了Panama Vector API 来实现快速推理。
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。