序
本文主要研究一下Spring AI的PgVectorStore
示例
pom.xml
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-starter-vector-store-pgvector</artifactId>
</dependency>
pgvector
docker run -it --rm --name postgres -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres pgvector/pgvector:pg16
配置
spring:
datasource:
name: pgvector
driverClassName: org.postgresql.Driver
url: jdbc:postgresql://localhost:5432/postgres?currentSchema=public&connectTimeout=60&socketTimeout=60
username: postgres
password: postgres
ai:
vectorstore:
type: pgvector
pgvector:
initialize-schema: true
index-type: HNSW
distance-type: COSINE_DISTANCE
dimensions: 1024
max-document-batch-size: 10000
schema-name: public
table-name: vector_store
设置initialize-schema为true,默认会执行如下初始化脚本:
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS hstore;
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE TABLE IF NOT EXISTS vector_store (
id uuid DEFAULT uuid_generate_v4() PRIMARY KEY,
content text,
metadata json,
embedding vector(1536) // 1536 is the default embedding dimension
);
CREATE INDEX ON vector_store USING HNSW (embedding vector_cosine_ops);
脚本源码:
org/springframework/ai/vectorstore/pgvector/PgVectorStore.java
public void afterPropertiesSet() {
logger.info("Initializing PGVectorStore schema for table: {} in schema: {}", this.getVectorTableName(),
this.getSchemaName());
logger.info("vectorTableValidationsEnabled {}", this.schemaValidation);
if (this.schemaValidation) {
this.schemaValidator.validateTableSchema(this.getSchemaName(), this.getVectorTableName());
}
if (!this.initializeSchema) {
logger.debug("Skipping the schema initialization for the table: {}", this.getFullyQualifiedTableName());
return;
}
// Enable the PGVector, JSONB and UUID support.
this.jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS vector");
this.jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS hstore");
if (this.idType == PgIdType.UUID) {
this.jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS \"uuid-ossp\"");
}
this.jdbcTemplate.execute(String.format("CREATE SCHEMA IF NOT EXISTS %s", this.getSchemaName()));
// Remove existing VectorStoreTable
if (this.removeExistingVectorStoreTable) {
this.jdbcTemplate.execute(String.format("DROP TABLE IF EXISTS %s", this.getFullyQualifiedTableName()));
}
this.jdbcTemplate.execute(String.format("""
CREATE TABLE IF NOT EXISTS %s (
id %s PRIMARY KEY,
content text,
metadata json,
embedding vector(%d)
)
""", this.getFullyQualifiedTableName(), this.getColumnTypeName(), this.embeddingDimensions()));
if (this.createIndexMethod != PgIndexType.NONE) {
this.jdbcTemplate.execute(String.format("""
CREATE INDEX IF NOT EXISTS %s ON %s USING %s (embedding %s)
""", this.getVectorIndexName(), this.getFullyQualifiedTableName(), this.createIndexMethod,
this.getDistanceType().index));
}
}
代码
@Test
public void testAddAndSearch() {
List<Document> documents = List.of(
new Document("Spring AI rocks!! Spring AI rocks!! Spring AI rocks!! Spring AI rocks!! Spring AI rocks!!", Map.of("meta1", "meta1")),
new Document("The World is Big and Salvation Lurks Around the Corner"),
new Document("You walk forward facing the past and you turn back toward the future.", Map.of("meta2", "meta2")));
// Add the documents to Milvus Vector Store
pgVectorStore.add(documents);
// Retrieve documents similar to a query
List<Document> results = this.pgVectorStore.similaritySearch(SearchRequest.builder().query("Spring").topK(5).build());
log.info("results:{}", JSON.toJSONString(results));
}
输出如下:
results:[{"contentFormatter":{"excludedEmbedMetadataKeys":[],"excludedInferenceMetadataKeys":[],"metadataSeparator":"\n","metadataTemplate":"{key}: {value}","textTemplate":"{metadata_string}\n\n{content}"},"formattedContent":"distance: 0.43509135\nmeta1: meta1\n\nSpring AI rocks!! Spring AI rocks!! Spring AI rocks!! Spring AI rocks!! Spring AI rocks!!","id":"9dbce9af-0451-4bdb-8f03-1f8b8c4d696f","metadata":{"distance":0.43509135,"meta1":"meta1"},"score":0.5649086534976959,"text":"Spring AI rocks!! Spring AI rocks!! Spring AI rocks!! Spring AI rocks!! Spring AI rocks!!"},{"contentFormatter":{"$ref":"$[0].contentFormatter"},"formattedContent":"distance: 0.57093126\n\nThe World is Big and Salvation Lurks Around the Corner","id":"92a45683-11fc-48b7-8676-dcca3b518dd4","metadata":{"distance":0.57093126},"score":0.42906874418258667,"text":"The World is Big and Salvation Lurks Around the Corner"},{"contentFormatter":{"$ref":"$[0].contentFormatter"},"formattedContent":"distance: 0.5936024\nmeta2: meta2\n\nYou walk forward facing the past and you turn back toward the future.","id":"298f6565-bcc7-4cbc-8552-4c0e2d021dbf","metadata":{"distance":0.5936024,"meta2":"meta2"},"score":0.40639758110046387,"text":"You walk forward facing the past and you turn back toward the future."}]
源码
PgVectorStoreAutoConfiguration
org/springframework/ai/vectorstore/pgvector/autoconfigure/PgVectorStoreAutoConfiguration.java
@AutoConfiguration(after = JdbcTemplateAutoConfiguration.class)
@ConditionalOnClass({ PgVectorStore.class, DataSource.class, JdbcTemplate.class })
@EnableConfigurationProperties(PgVectorStoreProperties.class)
@ConditionalOnProperty(name = SpringAIVectorStoreTypes.TYPE, havingValue = SpringAIVectorStoreTypes.PGVECTOR,
matchIfMissing = true)
public class PgVectorStoreAutoConfiguration {
@Bean
@ConditionalOnMissingBean(BatchingStrategy.class)
BatchingStrategy pgVectorStoreBatchingStrategy() {
return new TokenCountBatchingStrategy();
}
@Bean
@ConditionalOnMissingBean
public PgVectorStore vectorStore(JdbcTemplate jdbcTemplate, EmbeddingModel embeddingModel,
PgVectorStoreProperties properties, ObjectProvider<ObservationRegistry> observationRegistry,
ObjectProvider<VectorStoreObservationConvention> customObservationConvention,
BatchingStrategy batchingStrategy) {
var initializeSchema = properties.isInitializeSchema();
return PgVectorStore.builder(jdbcTemplate, embeddingModel)
.schemaName(properties.getSchemaName())
.idType(properties.getIdType())
.vectorTableName(properties.getTableName())
.vectorTableValidationsEnabled(properties.isSchemaValidation())
.dimensions(properties.getDimensions())
.distanceType(properties.getDistanceType())
.removeExistingVectorStoreTable(properties.isRemoveExistingVectorStoreTable())
.indexType(properties.getIndexType())
.initializeSchema(initializeSchema)
.observationRegistry(observationRegistry.getIfUnique(() -> ObservationRegistry.NOOP))
.customObservationConvention(customObservationConvention.getIfAvailable(() -> null))
.batchingStrategy(batchingStrategy)
.maxDocumentBatchSize(properties.getMaxDocumentBatchSize())
.build();
}
}
PgVectorStoreAutoConfiguration在spring.ai.vectorstore.type
为pgvector
时会自动装配PgVectorStore,它依赖PgVectorStoreProperties及JdbcTemplateAutoConfiguration
PgVectorStoreProperties
org/springframework/ai/vectorstore/pgvector/autoconfigure/PgVectorStoreProperties.java
@ConfigurationProperties(PgVectorStoreProperties.CONFIG_PREFIX)
public class PgVectorStoreProperties extends CommonVectorStoreProperties {
public static final String CONFIG_PREFIX = "spring.ai.vectorstore.pgvector";
private int dimensions = PgVectorStore.INVALID_EMBEDDING_DIMENSION;
private PgIndexType indexType = PgIndexType.HNSW;
private PgDistanceType distanceType = PgDistanceType.COSINE_DISTANCE;
private boolean removeExistingVectorStoreTable = false;
// Dynamically generate table name in PgVectorStore to allow backward compatibility
private String tableName = PgVectorStore.DEFAULT_TABLE_NAME;
private String schemaName = PgVectorStore.DEFAULT_SCHEMA_NAME;
private PgVectorStore.PgIdType idType = PgVectorStore.PgIdType.UUID;
private boolean schemaValidation = PgVectorStore.DEFAULT_SCHEMA_VALIDATION;
private int maxDocumentBatchSize = PgVectorStore.MAX_DOCUMENT_BATCH_SIZE;
//......
}
PgVectorStoreProperties继承了CommonVectorStoreProperties的initializeSchema配置,它提供了spring.ai.vectorstore.pgvector
的配置,主要有dimensions、indexType、distanceType、removeExistingVectorStoreTable、tableName、schemaName、idType、schemaValidation、maxDocumentBatchSize这几个属性
JdbcTemplateAutoConfiguration
org/springframework/boot/autoconfigure/jdbc/JdbcTemplateAutoConfiguration.java
@AutoConfiguration(after = DataSourceAutoConfiguration.class)
@ConditionalOnClass({ DataSource.class, JdbcTemplate.class })
@ConditionalOnSingleCandidate(DataSource.class)
@EnableConfigurationProperties(JdbcProperties.class)
@Import({ DatabaseInitializationDependencyConfigurer.class, JdbcTemplateConfiguration.class,
NamedParameterJdbcTemplateConfiguration.class })
public class JdbcTemplateAutoConfiguration {
}
JdbcTemplateAutoConfiguration引入了DatabaseInitializationDependencyConfigurer、JdbcTemplateConfiguration、NamedParameterJdbcTemplateConfiguration
小结
Spring AI提供了spring-ai-starter-vector-store-pgvector用于自动装配PgVectorStore。除了spring.ai.vectorstore.pgvector
的配置,还需要配置spring.datasource
。
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。