本文主要研究一下Spring AI Alibaba的MySQLDocumentReader

MySQLDocumentReader

community/document-readers/spring-ai-alibaba-starter-document-reader-mysql/src/main/java/com/alibaba/cloud/ai/reader/mysql/MySQLDocumentReader.java

public class MySQLDocumentReader implements DocumentReader {

    private final MySQLResource mysqlResource;

    public MySQLDocumentReader(MySQLResource mysqlResource) {
        this.mysqlResource = mysqlResource;
    }

    @Override
    public List<Document> get() {
        List<Document> documents = new ArrayList<>();
        try {
            // Register MySQL JDBC driver
            Class.forName("com.mysql.cj.jdbc.Driver");

            // Create database connection
            try (Connection connection = createConnection()) {
                documents = executeQueryAndProcessResults(connection);
            }
        }
        catch (ClassNotFoundException e) {
            throw new RuntimeException("MySQL JDBC driver not found", e);
        }
        catch (SQLException e) {
            throw new RuntimeException("Error executing MySQL query: " + e.getMessage(), e);
        }
        return documents;
    }

    /**
     * Create database connection
     */
    private Connection createConnection() throws SQLException {
        return DriverManager.getConnection(mysqlResource.getJdbcUrl(), mysqlResource.getUsername(),
                mysqlResource.getPassword());
    }

    /**
     * Execute query and process results
     */
    private List<Document> executeQueryAndProcessResults(Connection connection) throws SQLException {
        List<Document> documents = new ArrayList<>();
        try (Statement statement = connection.createStatement();
                ResultSet resultSet = statement.executeQuery(mysqlResource.getQuery())) {

            List<String> columnNames = getColumnNames(resultSet.getMetaData());
            while (resultSet.next()) {
                Map<String, Object> rowData = extractRowData(resultSet, columnNames);
                String content = buildContent(rowData);
                Map<String, Object> metadata = buildMetadata(rowData);
                documents.add(new Document(content, metadata));
            }
        }
        return documents;
    }

    /**
     * Get list of column names
     */
    private List<String> getColumnNames(ResultSetMetaData metaData) throws SQLException {
        List<String> columnNames = new ArrayList<>();
        int columnCount = metaData.getColumnCount();
        for (int i = 1; i <= columnCount; i++) {
            columnNames.add(metaData.getColumnName(i));
        }
        return columnNames;
    }

    /**
     * Extract row data
     */
    private Map<String, Object> extractRowData(ResultSet resultSet, List<String> columnNames) throws SQLException {
        Map<String, Object> rowData = new HashMap<>();
        for (int i = 0; i < columnNames.size(); i++) {
            String columnName = columnNames.get(i);
            Object value = resultSet.getObject(i + 1);
            rowData.put(columnName, value);
        }
        return rowData;
    }

    /**
     * Build document content
     */
    private String buildContent(Map<String, Object> rowData) {
        StringBuilder contentBuilder = new StringBuilder();
        List<String> contentColumns = mysqlResource.getTextColumns();

        if (contentColumns == null || contentColumns.isEmpty()) {
            // If no content columns specified, use all columns
            for (Map.Entry<String, Object> entry : rowData.entrySet()) {
                appendColumnContent(contentBuilder, entry.getKey(), entry.getValue());
            }
        }
        else {
            // Only use specified content columns
            for (String column : contentColumns) {
                if (rowData.containsKey(column)) {
                    appendColumnContent(contentBuilder, column, rowData.get(column));
                }
            }
        }
        return contentBuilder.toString().trim();
    }

    /**
     * Append column content
     */
    private void appendColumnContent(StringBuilder builder, String column, Object value) {
        builder.append(column).append(": ").append(value).append("\n");
    }

    /**
     * Build metadata
     */
    private Map<String, Object> buildMetadata(Map<String, Object> rowData) {
        Map<String, Object> metadata = new HashMap<>();
        metadata.put(MySQLResource.SOURCE, mysqlResource.getJdbcUrl());

        List<String> metadataColumns = mysqlResource.getMetadataColumns();
        if (metadataColumns != null) {
            for (String column : metadataColumns) {
                if (rowData.containsKey(column)) {
                    metadata.put(column, rowData.get(column));
                }
            }
        }
        return metadata;
    }

}
MySQLDocumentReader要求构造器传入MySQLResource,其get方法通过jdbc执行指定的query,将字段名与值通过拼接在一起作为内容,根据metadataColumns来将自定字段的值作为metadata

MySQLResource

community/document-readers/spring-ai-alibaba-starter-document-reader-mysql/src/main/java/com/alibaba/cloud/ai/reader/mysql/MySQLResource.java

public class MySQLResource {

    // MySQL connection properties
    private final String host; // MySQL server hostname or IP address

    private final int port; // MySQL server port number, default is 3306

    private final String database; // Name of the database to connect to

    private final String username; // MySQL user name for authentication

    private final String password; // MySQL password for authentication

    // Query settings
    private final String query; // SQL query to execute

    private final List<String> contentColumns; // Columns to include in document content

    private final List<String> metadataColumns; // Columns to include in document metadata

    public static final String SOURCE = "mysql";

    // Default values for MySQL connection
    public static final String DEFAULT_HOST = "127.0.0.1";

    public static final int DEFAULT_PORT = 3306;

    public static final String DEFAULT_USERNAME = "root";

    public static final String DEFAULT_PASSWORD = "root";

    public MySQLResource(String host, int port, String database, String username, String password, String query,
            List<String> contentColumns, List<String> metadataColumns) {
        this.host = host;
        this.port = port;
        this.database = database;
        this.username = username;
        this.password = password;
        this.query = query;
        this.contentColumns = contentColumns;
        this.metadataColumns = metadataColumns;
    }

    /**
     * Constructor with default host and port
     * @param database Database name
     * @param username MySQL username
     * @param password MySQL password
     * @param query SQL query to execute
     * @param contentColumns Columns to include in document content
     * @param metadataColumns Columns to include in document metadata
     */
    public MySQLResource(String database, String username, String password, String query, List<String> contentColumns,
            List<String> metadataColumns) {
        this(DEFAULT_HOST, DEFAULT_PORT, database, username, password, query, contentColumns, metadataColumns);
    }

    /**
     * Constructor with all default connection parameters
     * @param database Database name
     * @param query SQL query to execute
     * @param contentColumns Columns to include in document content
     * @param metadataColumns Columns to include in document metadata
     */
    public MySQLResource(String database, String query, List<String> contentColumns, List<String> metadataColumns) {
        this(DEFAULT_HOST, DEFAULT_PORT, database, DEFAULT_USERNAME, DEFAULT_PASSWORD, query, contentColumns,
                metadataColumns);
    }

    //......
}    
MySQLResource定义了port、database、username、password、query、contentColumns、metadataColumns等属性

示例

@EnabledIfSystemProperty(named = "mysql.host", matches = ".+")
public class MySQLDocumentReaderTest {

    private MySQLResource mysqlResource;

    private MySQLDocumentReader reader;

    @BeforeEach
    void setUp() {
        // Read MySQL connection information from system properties
        String host = System.getProperty("mysql.host", "localhost");
        int port = Integer.parseInt(System.getProperty("mysql.port", "3306"));
        String database = System.getProperty("mysql.database", "mysql"); // Use default
                                                                            // mysql
                                                                            // database
        String username = System.getProperty("mysql.username", "root");
        String password = System.getProperty("mysql.password", "root");
        String query = System.getProperty("mysql.query", "SELECT * FROM user LIMIT 10;"); // Use
                                                                                            // user
                                                                                            // table
                                                                                            // in
                                                                                            // mysql
                                                                                            // database

        // Read content and metadata columns from system properties
        String contentColumnsStr = System.getProperty("mysql.content.columns", "User,Host");
        String metadataColumnsStr = System.getProperty("mysql.metadata.columns", "User,Host");

        List<String> contentColumns = Arrays.asList(contentColumnsStr.split(","));
        List<String> metadataColumns = Arrays.asList(metadataColumnsStr.split(","));

        // Setup test MySQL resource
        mysqlResource = new MySQLResource(host, port, database, username, password, query, contentColumns,
                metadataColumns);

        reader = new MySQLDocumentReader(mysqlResource);
    }

    @Test
    void testGetDocuments() {
        // This test requires a running MySQL instance with test data
        // You may need to modify the connection details and query using system
        // properties:
        // -Dmysql.host=your_host -Dmysql.port=your_port -Dmysql.database=your_db
        // -Dmysql.username=your_user -Dmysql.password=your_pass

        List<Document> documents = reader.get();

        // Basic assertions
        assertNotNull(documents);
        assertFalse(documents.isEmpty());

        // Test first document
        Document firstDoc = documents.get(0);
        assertNotNull(firstDoc);

        // Test document content
        String content = firstDoc.getText();
        assertNotNull(content);
    }

    @Test
    void testInvalidConnection() {
        // Test with invalid credentials
        MySQLResource invalidResource = new MySQLResource("invalid_host", 3306, "invalid_db", "invalid_user",
                "invalid_pass", "SELECT * FROM test_table", null, null);

        MySQLDocumentReader invalidReader = new MySQLDocumentReader(invalidResource);

        // Should throw RuntimeException
        assertThrows(RuntimeException.class, invalidReader::get);
    }

}

小结

spring-ai-alibaba-starter-document-reader-mysql提供了MySQLDocumentReader用于读取mysql的表内容和metadata到document,一条记录转为一个document,其中contentColumns用于指定哪些字段作为document的内容,多个字段用\n分隔,metadataColumns用于指定哪个字段的值作为metadata。

doc


codecraft
11.9k 声望2k 粉丝

当一个代码的工匠回首往事时,不因虚度年华而悔恨,也不因碌碌无为而羞愧,这样,当他老的时候,可以很自豪告诉世人,我曾经将代码注入生命去打造互联网的浪潮之巅,那是个很疯狂的时代,我在一波波的浪潮上留下...