本文主要研究一下如何使用langchain4j+Jllama调用大语言模型。

步骤

pom.xml

    <properties>
        <maven.compiler.source>21</maven.compiler.source>
        <maven.compiler.target>21</maven.compiler.target>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <logback.version>1.5.6</logback.version>
        <jlama.version>0.8.3</jlama.version>
    </properties>

     <dependencies>
        <dependency>
            <groupId>dev.langchain4j</groupId>
            <artifactId>langchain4j-jlama</artifactId>
            <version>1.0.0-beta1</version>
            <exclusions>
                <exclusion>
                    <groupId>com.github.tjake</groupId>
                    <artifactId>jlama-core</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

        <!-- Add native jlama bindings -->
        <dependency>
            <groupId>com.github.tjake</groupId>
            <artifactId>jlama-native</artifactId>
            <classifier>${os.detected.classifier}</classifier>
            <version>${jlama.version}</version>
        </dependency>

        <dependency>
            <groupId>com.github.tjake</groupId>
            <artifactId>jlama-core</artifactId>
            <version>${jlama.version}</version>
        </dependency>

        <dependency>
            <groupId>dev.langchain4j</groupId>
            <artifactId>langchain4j</artifactId>
            <version>1.0.0-beta1</version>
        </dependency>

    </dependencies> 

    <build>
        <extensions>
            <extension>
                <groupId>kr.motd.maven</groupId>
                <artifactId>os-maven-plugin</artifactId>
                <version>1.7.1</version>
            </extension>
        </extensions>
    </build>

下载模型文件

运行

public class JlamaChatModelExamples {

    static class Simple_Prompt {

        public static void main(String[] args) {

            ChatLanguageModel model = JlamaChatModel.builder()
                    .modelName("tjake/Llama-3.2-1B-Instruct-JQ4")
                    .temperature(0.3f)
                    .build();

            ChatResponse chatResponse = model.chat(
                    SystemMessage.from("You are helpful chatbot who is a java expert."),
                    UserMessage.from("Write a java program to print hello world.")
            );

            System.out.println("\n" + chatResponse.aiMessage().text() + "\n");
        }
    }
}
idea运行要加上--add-modules=jdk.incubator.vector --enable-native-access=ALL-UNNAMED

不过example的模型不支持ARM,报错如下

WARNING: Using incubator modules: jdk.incubator.vector
WARN  c.g.t.j.t.o.TensorOperationsProvider - Native operations not available. Consider adding 'com.github.tjake:jlama-native' to the classpath
INFO  c.g.t.j.t.o.TensorOperationsProvider - Using Panama Vector Operations (OffHeap)
INFO  c.g.tjake.jlama.model.AbstractModel - Model type = Q4, Working memory type = F32, Quantized memory type = I8
Exception in thread "main" java.lang.UnsupportedOperationException: java.lang.UnsupportedOperationException: java.lang.UnsupportedOperationException: ARM_128
    at java.base/jdk.internal.reflect.DirectConstructorHandleAccessor.newInstance(DirectConstructorHandleAccessor.java:62)
    at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:502)
    at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:486)
    at java.base/java.util.concurrent.ForkJoinTask.getThrowableException(ForkJoinTask.java:540)
    at java.base/java.util.concurrent.ForkJoinTask.reportException(ForkJoinTask.java:567)
    at java.base/java.util.concurrent.ForkJoinTask.join(ForkJoinTask.java:653)
    at com.github.tjake.jlama.util.PhysicalCoreExecutor.execute(PhysicalCoreExecutor.java:55)
    at com.github.tjake.jlama.math.VectorMath.pchunk(VectorMath.java:56)
    at com.github.tjake.jlama.model.AbstractModel.sample(AbstractModel.java:422)
    at com.github.tjake.jlama.model.AbstractModel.generate(AbstractModel.java:550)
    at dev.langchain4j.model.jlama.JlamaChatModel.generate(JlamaChatModel.java:117)
    at dev.langchain4j.model.jlama.JlamaChatModel.generate(JlamaChatModel.java:70)
    at dev.langchain4j.model.chat.ChatLanguageModel.doChat(ChatLanguageModel.java:114)
    at dev.langchain4j.model.chat.ChatLanguageModel.chat(ChatLanguageModel.java:58)
    at dev.langchain4j.model.chat.ChatLanguageModel.chat(ChatLanguageModel.java:86)
    at JlamaChatModelExamples$Simple_Prompt.main(JlamaChatModelExamples.java:18)
Caused by: java.lang.UnsupportedOperationException: java.lang.UnsupportedOperationException: ARM_128
    at java.base/jdk.internal.reflect.DirectConstructorHandleAccessor.newInstance(DirectConstructorHandleAccessor.java:62)
    at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:502)
    at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:486)
    at java.base/java.util.concurrent.ForkJoinTask.getThrowableException(ForkJoinTask.java:540)
    at java.base/java.util.concurrent.ForkJoinTask.reportException(ForkJoinTask.java:567)
    at java.base/java.util.concurrent.ForkJoinTask.invoke(ForkJoinTask.java:670)
    at java.base/java.util.stream.ForEachOps$ForEachOp.evaluateParallel(ForEachOps.java:160)
    at java.base/java.util.stream.ForEachOps$ForEachOp$OfInt.evaluateParallel(ForEachOps.java:189)
    at java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:233)
    at java.base/java.util.stream.IntPipeline.forEach(IntPipeline.java:463)
    at java.base/java.util.stream.IntPipeline$Head.forEach(IntPipeline.java:620)
    at com.github.tjake.jlama.math.VectorMath.lambda$pchunk$2(VectorMath.java:59)
    at java.base/java.util.concurrent.ForkJoinTask$AdaptedRunnableAction.exec(ForkJoinTask.java:1403)
    at java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:387)
    at java.base/java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec(ForkJoinPool.java:1312)
    at java.base/java.util.concurrent.ForkJoinPool.scan(ForkJoinPool.java:1843)
    at java.base/java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1808)
    at java.base/java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:188)
Caused by: java.lang.UnsupportedOperationException: ARM_128
    at com.github.tjake.jlama.tensor.operations.PanamaTensorOperations.batchDotProduct(PanamaTensorOperations.java:125)
    at com.github.tjake.jlama.tensor.operations.TensorOperations.dotProductChunk(TensorOperations.java:76)
    at com.github.tjake.jlama.model.AbstractModel.lambda$sample$4(AbstractModel.java:424)
    at com.github.tjake.jlama.math.VectorMath.lambda$pchunk$1(VectorMath.java:60)
    at java.base/java.util.stream.ForEachOps$ForEachOp$OfInt.accept(ForEachOps.java:205)
    at java.base/java.util.stream.Streams$RangeIntSpliterator.forEachRemaining(Streams.java:104)
    at java.base/java.util.Spliterator$OfInt.forEachRemaining(Spliterator.java:712)
    at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509)
    at java.base/java.util.stream.ForEachOps$ForEachTask.compute(ForEachOps.java:291)
    at java.base/java.util.concurrent.CountedCompleter.exec(CountedCompleter.java:754)
    ... 5 more

小结

Jlama是一个java类型用于运行大语言模型,它使用Java 20+版本构建,使用了Panama Vector API 来实现快速推理。

doc


codecraft
11.9k 声望2k 粉丝

当一个代码的工匠回首往事时,不因虚度年华而悔恨,也不因碌碌无为而羞愧,这样,当他老的时候,可以很自豪告诉世人,我曾经将代码注入生命去打造互联网的浪潮之巅,那是个很疯狂的时代,我在一波波的浪潮上留下...