序
本文主要研究一下Spring AI Alibaba的MySQLDocumentReader
MySQLDocumentReader
community/document-readers/spring-ai-alibaba-starter-document-reader-mysql/src/main/java/com/alibaba/cloud/ai/reader/mysql/MySQLDocumentReader.java
public class MySQLDocumentReader implements DocumentReader {
private final MySQLResource mysqlResource;
public MySQLDocumentReader(MySQLResource mysqlResource) {
this.mysqlResource = mysqlResource;
}
@Override
public List<Document> get() {
List<Document> documents = new ArrayList<>();
try {
// Register MySQL JDBC driver
Class.forName("com.mysql.cj.jdbc.Driver");
// Create database connection
try (Connection connection = createConnection()) {
documents = executeQueryAndProcessResults(connection);
}
}
catch (ClassNotFoundException e) {
throw new RuntimeException("MySQL JDBC driver not found", e);
}
catch (SQLException e) {
throw new RuntimeException("Error executing MySQL query: " + e.getMessage(), e);
}
return documents;
}
/**
* Create database connection
*/
private Connection createConnection() throws SQLException {
return DriverManager.getConnection(mysqlResource.getJdbcUrl(), mysqlResource.getUsername(),
mysqlResource.getPassword());
}
/**
* Execute query and process results
*/
private List<Document> executeQueryAndProcessResults(Connection connection) throws SQLException {
List<Document> documents = new ArrayList<>();
try (Statement statement = connection.createStatement();
ResultSet resultSet = statement.executeQuery(mysqlResource.getQuery())) {
List<String> columnNames = getColumnNames(resultSet.getMetaData());
while (resultSet.next()) {
Map<String, Object> rowData = extractRowData(resultSet, columnNames);
String content = buildContent(rowData);
Map<String, Object> metadata = buildMetadata(rowData);
documents.add(new Document(content, metadata));
}
}
return documents;
}
/**
* Get list of column names
*/
private List<String> getColumnNames(ResultSetMetaData metaData) throws SQLException {
List<String> columnNames = new ArrayList<>();
int columnCount = metaData.getColumnCount();
for (int i = 1; i <= columnCount; i++) {
columnNames.add(metaData.getColumnName(i));
}
return columnNames;
}
/**
* Extract row data
*/
private Map<String, Object> extractRowData(ResultSet resultSet, List<String> columnNames) throws SQLException {
Map<String, Object> rowData = new HashMap<>();
for (int i = 0; i < columnNames.size(); i++) {
String columnName = columnNames.get(i);
Object value = resultSet.getObject(i + 1);
rowData.put(columnName, value);
}
return rowData;
}
/**
* Build document content
*/
private String buildContent(Map<String, Object> rowData) {
StringBuilder contentBuilder = new StringBuilder();
List<String> contentColumns = mysqlResource.getTextColumns();
if (contentColumns == null || contentColumns.isEmpty()) {
// If no content columns specified, use all columns
for (Map.Entry<String, Object> entry : rowData.entrySet()) {
appendColumnContent(contentBuilder, entry.getKey(), entry.getValue());
}
}
else {
// Only use specified content columns
for (String column : contentColumns) {
if (rowData.containsKey(column)) {
appendColumnContent(contentBuilder, column, rowData.get(column));
}
}
}
return contentBuilder.toString().trim();
}
/**
* Append column content
*/
private void appendColumnContent(StringBuilder builder, String column, Object value) {
builder.append(column).append(": ").append(value).append("\n");
}
/**
* Build metadata
*/
private Map<String, Object> buildMetadata(Map<String, Object> rowData) {
Map<String, Object> metadata = new HashMap<>();
metadata.put(MySQLResource.SOURCE, mysqlResource.getJdbcUrl());
List<String> metadataColumns = mysqlResource.getMetadataColumns();
if (metadataColumns != null) {
for (String column : metadataColumns) {
if (rowData.containsKey(column)) {
metadata.put(column, rowData.get(column));
}
}
}
return metadata;
}
}
MySQLDocumentReader要求构造器传入MySQLResource,其get方法通过jdbc执行指定的query,将字段名与值通过:
拼接在一起作为内容,根据metadataColumns来将自定字段的值作为metadata
MySQLResource
community/document-readers/spring-ai-alibaba-starter-document-reader-mysql/src/main/java/com/alibaba/cloud/ai/reader/mysql/MySQLResource.java
public class MySQLResource {
// MySQL connection properties
private final String host; // MySQL server hostname or IP address
private final int port; // MySQL server port number, default is 3306
private final String database; // Name of the database to connect to
private final String username; // MySQL user name for authentication
private final String password; // MySQL password for authentication
// Query settings
private final String query; // SQL query to execute
private final List<String> contentColumns; // Columns to include in document content
private final List<String> metadataColumns; // Columns to include in document metadata
public static final String SOURCE = "mysql";
// Default values for MySQL connection
public static final String DEFAULT_HOST = "127.0.0.1";
public static final int DEFAULT_PORT = 3306;
public static final String DEFAULT_USERNAME = "root";
public static final String DEFAULT_PASSWORD = "root";
public MySQLResource(String host, int port, String database, String username, String password, String query,
List<String> contentColumns, List<String> metadataColumns) {
this.host = host;
this.port = port;
this.database = database;
this.username = username;
this.password = password;
this.query = query;
this.contentColumns = contentColumns;
this.metadataColumns = metadataColumns;
}
/**
* Constructor with default host and port
* @param database Database name
* @param username MySQL username
* @param password MySQL password
* @param query SQL query to execute
* @param contentColumns Columns to include in document content
* @param metadataColumns Columns to include in document metadata
*/
public MySQLResource(String database, String username, String password, String query, List<String> contentColumns,
List<String> metadataColumns) {
this(DEFAULT_HOST, DEFAULT_PORT, database, username, password, query, contentColumns, metadataColumns);
}
/**
* Constructor with all default connection parameters
* @param database Database name
* @param query SQL query to execute
* @param contentColumns Columns to include in document content
* @param metadataColumns Columns to include in document metadata
*/
public MySQLResource(String database, String query, List<String> contentColumns, List<String> metadataColumns) {
this(DEFAULT_HOST, DEFAULT_PORT, database, DEFAULT_USERNAME, DEFAULT_PASSWORD, query, contentColumns,
metadataColumns);
}
//......
}
MySQLResource定义了port、database、username、password、query、contentColumns、metadataColumns等属性
示例
@EnabledIfSystemProperty(named = "mysql.host", matches = ".+")
public class MySQLDocumentReaderTest {
private MySQLResource mysqlResource;
private MySQLDocumentReader reader;
@BeforeEach
void setUp() {
// Read MySQL connection information from system properties
String host = System.getProperty("mysql.host", "localhost");
int port = Integer.parseInt(System.getProperty("mysql.port", "3306"));
String database = System.getProperty("mysql.database", "mysql"); // Use default
// mysql
// database
String username = System.getProperty("mysql.username", "root");
String password = System.getProperty("mysql.password", "root");
String query = System.getProperty("mysql.query", "SELECT * FROM user LIMIT 10;"); // Use
// user
// table
// in
// mysql
// database
// Read content and metadata columns from system properties
String contentColumnsStr = System.getProperty("mysql.content.columns", "User,Host");
String metadataColumnsStr = System.getProperty("mysql.metadata.columns", "User,Host");
List<String> contentColumns = Arrays.asList(contentColumnsStr.split(","));
List<String> metadataColumns = Arrays.asList(metadataColumnsStr.split(","));
// Setup test MySQL resource
mysqlResource = new MySQLResource(host, port, database, username, password, query, contentColumns,
metadataColumns);
reader = new MySQLDocumentReader(mysqlResource);
}
@Test
void testGetDocuments() {
// This test requires a running MySQL instance with test data
// You may need to modify the connection details and query using system
// properties:
// -Dmysql.host=your_host -Dmysql.port=your_port -Dmysql.database=your_db
// -Dmysql.username=your_user -Dmysql.password=your_pass
List<Document> documents = reader.get();
// Basic assertions
assertNotNull(documents);
assertFalse(documents.isEmpty());
// Test first document
Document firstDoc = documents.get(0);
assertNotNull(firstDoc);
// Test document content
String content = firstDoc.getText();
assertNotNull(content);
}
@Test
void testInvalidConnection() {
// Test with invalid credentials
MySQLResource invalidResource = new MySQLResource("invalid_host", 3306, "invalid_db", "invalid_user",
"invalid_pass", "SELECT * FROM test_table", null, null);
MySQLDocumentReader invalidReader = new MySQLDocumentReader(invalidResource);
// Should throw RuntimeException
assertThrows(RuntimeException.class, invalidReader::get);
}
}
小结
spring-ai-alibaba-starter-document-reader-mysql提供了MySQLDocumentReader用于读取mysql的表内容和metadata到document,一条记录转为一个document,其中contentColumns用于指定哪些字段作为document的内容,多个字段用\n
分隔,metadataColumns用于指定哪个字段的值作为metadata。
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。