2

BerkeleyDB在爬虫、搜索领域里头用的比较多,整体来讲的特点是嵌入式、kv数据库,功能强大,能支持几百T的存储。本文主要讲述怎么在java里头使用它。

添加依赖

 <!-- Berkeley DB Java Edition -->
        <dependency>
            <groupId>com.sleepycat</groupId>
            <artifactId>je</artifactId>
            <!--<version>5.0.73</version>-->
            <version>6.4.9</version>
        </dependency>

如果是5以上的版本,则需要添加oracle的仓库

<repositories>
        <repository>
            <id>oracleReleases</id>
            <name>Oracle Released Java Packages</name>
            <url>http://download.oracle.com/maven</url>
            <layout>default</layout>
        </repository>
    </repositories>

使用方式

在java里头主要有两种使用方式,一种是基于注解形式的,一种是原始的api使用。本文主要是用注解形式的。

领域模型

/* An entity class. */
@Entity
public class Person {

    @PrimaryKey
    String ssn;

    String name;
    Address address;

    @SecondaryKey(relate = MANY_TO_ONE, relatedEntity = Person.class)
    String parentSsn;

    @SecondaryKey(relate = ONE_TO_MANY)
    Set<String> emailAddresses = new HashSet<String>();

    @SecondaryKey(relate = MANY_TO_MANY,
            relatedEntity = Employer.class,
            onRelatedEntityDelete = NULLIFY)
    Set<Long> employerIds = new HashSet<Long>();

    public Person(String name, String ssn, String parentSsn) {
        this.name = name;
        this.ssn = ssn;
        this.parentSsn = parentSsn;
    }

    private Person() {
    } // For deserialization

    public String getSsn() {
        return ssn;
    }

    public void setSsn(String ssn) {
        this.ssn = ssn;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public Address getAddress() {
        return address;
    }

    public void setAddress(Address address) {
        this.address = address;
    }

    public String getParentSsn() {
        return parentSsn;
    }

    public void setParentSsn(String parentSsn) {
        this.parentSsn = parentSsn;
    }

    public Set<String> getEmailAddresses() {
        return emailAddresses;
    }

    public void setEmailAddresses(Set<String> emailAddresses) {
        this.emailAddresses = emailAddresses;
    }

    public Set<Long> getEmployerIds() {
        return employerIds;
    }

    public void setEmployerIds(Set<Long> employerIds) {
        this.employerIds = employerIds;
    }
}
  • 内嵌对象

/* A persistent class used in other classes. */
@Persistent
public class Address {
    String street;
    String city;
    String state;
    int zipCode;

    public Address() {
    } // For deserialization

    public String getStreet() {
        return street;
    }

    public void setStreet(String street) {
        this.street = street;
    }

    public String getCity() {
        return city;
    }

    public void setCity(String city) {
        this.city = city;
    }

    public String getState() {
        return state;
    }

    public void setState(String state) {
        this.state = state;
    }

    public int getZipCode() {
        return zipCode;
    }

    public void setZipCode(int zipCode) {
        this.zipCode = zipCode;
    }
}
  • 关联

@Entity
public class Employer {

    @PrimaryKey(sequence = "ID")
    private long id;

    @SecondaryKey(relate = ONE_TO_ONE)
    private String name;

    private Address address;

    public Employer(String name) {
        this.name = name;
    }

    private Employer() {
    } // For deserialization

    public long getId() {
        return id;
    }

    public void setId(long id) {
        this.id = id;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public Address getAddress() {
        return address;
    }

    public void setAddress(Address address) {
        this.address = address;
    }
}

初始化及关闭操作

    private Environment myEnv;
    private EntityStore store;
    private PrimaryIndex<String, Inventory> inventoryBySku;
    private PrimaryIndex<String, Vendor> vendorByName;
    private SecondaryIndex<String, String, Inventory> inventoryByName;

    /* Employer accessors */
    PrimaryIndex<Long, Employer> employerById;
    SecondaryIndex<String, Long, Employer> employerByName;

    /* Person accessors */
    PrimaryIndex<String, Person> personBySsn;
    SecondaryIndex<String, String, Person> personByParentSsn;
    SecondaryIndex<String, String, Person> personByEmailAddresses;
    SecondaryIndex<Long, String, Person> personByEmployerIds;


    private File envHome = new File(System.getProperty("user.dir") + File.separator + "bdb");
    private boolean readOnly = false;

    @Before
    public void prepare() {
        EnvironmentConfig myEnvConfig = new EnvironmentConfig();
        StoreConfig storeConfig = new StoreConfig();

        myEnvConfig.setReadOnly(readOnly);
        storeConfig.setReadOnly(readOnly);

        // If the environment is opened for write, then we want to be
        // able to create the environment and entity store if
        // they do not exist.
        myEnvConfig.setAllowCreate(!readOnly);
        storeConfig.setAllowCreate(!readOnly);

        // Open the environment and entity store
        System.out.println(envHome.getAbsolutePath());
        if (!envHome.exists()) {
            envHome.mkdir();
        }
        myEnv = new Environment(envHome, myEnvConfig);
        store = new EntityStore(myEnv, "EntityStore", storeConfig);

        // Primary key for Inventory classes
        inventoryBySku = store.getPrimaryIndex(String.class, Inventory.class);
        // Secondary key for Inventory classes
        // Last field in the getSecondaryIndex() method must be
        // the name of a class member; in this case, an Inventory.class
        // data member.
        inventoryByName = store.getSecondaryIndex(inventoryBySku, String.class, "itemName");
        // Primary key for Vendor class
        vendorByName = store.getPrimaryIndex(String.class, Vendor.class);

        employerById = store.getPrimaryIndex(Long.class, Employer.class);
        employerByName = store.getSecondaryIndex(employerById, String.class, "name");

        personBySsn = store.getPrimaryIndex(String.class, Person.class);
        personByParentSsn = store.getSecondaryIndex(personBySsn, String.class, "parentSsn");
        personByEmailAddresses = store.getSecondaryIndex(personBySsn, String.class, "emailAddresses");
        personByEmployerIds = store.getSecondaryIndex(personBySsn, Long.class, "employerIds");

    }

    @After
    public void close() {
        if (store != null) {
            try {
                store.close();
            } catch (DatabaseException dbe) {
                dbe.printStackTrace();
            }
        }

        if (myEnv != null) {
            try {
                // Finally, close the store and environment.
                myEnv.close();
            } catch (DatabaseException dbe) {
                dbe.printStackTrace();
            }
        }
    }

增删改查

添加数据

@Test
    public void putData() throws IOException {
        List<String> readLines = Resources.readLines(this.getClass().getClassLoader().getResource("vendors.txt"), Charsets.UTF_8);
        for (String data : readLines) {
            String[] sArray = data.split("#");
            Vendor theVendor = new Vendor();
            theVendor.setVendorName(sArray[0]);
            theVendor.setAddress(sArray[1]);
            theVendor.setCity(sArray[2]);
            theVendor.setState(sArray[3]);
            theVendor.setZipcode(sArray[4]);
            theVendor.setBusinessPhoneNumber(sArray[5]);
            theVendor.setRepName(sArray[6]);
            theVendor.setRepPhoneNumber(sArray[7]);
            // Put it in the store. Because we do not explicitly set
            // a transaction here, and because the store was opened
            // with transactional support, auto commit is used for each
            // write to the store.
            vendorByName.put(theVendor);
        }

        // Primary key for Inventory classes
        PrimaryIndex<String, Inventory> inventoryBySku = store.getPrimaryIndex(
                String.class, Inventory.class);
        List<String> data = Resources.readLines(this.getClass().getClassLoader().getResource("inventory.txt"), Charsets.UTF_8);
        for (String row : data) {
            String[] sArray = row.split("#");
            Inventory theInventory = new Inventory();
            theInventory.setItemName(sArray[0]);
            theInventory.setSku(sArray[1]);
            theInventory.setVendorPrice((new Float(sArray[2])).floatValue());
            theInventory.setVendorInventory((new Integer(sArray[3])).intValue());
            theInventory.setCategory(sArray[4]);
            theInventory.setVendor(sArray[5]);
            // Put it in the store. Note that this causes our secondary key
            // to be automatically updated for us.
            inventoryBySku.put(theInventory);
        }
    }

查询数据

    @Test
    public void getInventoryData() {
        // Use the inventory name secondary key to retrieve
        // these objects.
        EntityCursor<Inventory> items =
                inventoryByName.subIndex("Oranges").entities();
        try {
            for (Inventory item : items) {
                System.out.println(ToStringBuilder.reflectionToString(item));
            }
        } finally {
            items.close();
        }
    }

    @Test
    public void getAllInventory() {
        // Get a cursor that will walk every
        // inventory object in the store.
        EntityCursor<Inventory> items = inventoryBySku.entities();

        try {
            for (Inventory item : items) {
                System.out.println(ToStringBuilder.reflectionToString(item));
            }
        } finally {
            items.close();
        }
    }

更新

如果不开启允许重复记录的话,put就是更新

@Test
    public void update() {
        String pk = "apple-for-update";
        Inventory theInventory = new Inventory();
        theInventory.setItemName("Apples");
        theInventory.setSku(pk);
        theInventory.setVendorPrice(1.20f);
        theInventory.setVendorInventory(728);
        theInventory.setCategory("fruits");
        theInventory.setVendor("Off the Vine");

        inventoryBySku.put(theInventory);

        Inventory inventory = inventoryBySku.get(pk);
        System.out.println(ToStringBuilder.reflectionToString(inventory));

        inventory.setVendor("vendor update");
        inventoryBySku.put(inventory);

        System.out.println(ToStringBuilder.reflectionToString(inventoryBySku.get(pk)));
    }

删除

    @Test
    public void delete() {
        String pk = "apple-for-update";
        Inventory theInventory = new Inventory();
        theInventory.setItemName("Apples");
        theInventory.setSku(pk);
        theInventory.setVendorPrice(1.20f);
        theInventory.setVendorInventory(728);
        theInventory.setCategory("fruits");
        theInventory.setVendor("Off the Vine");

        inventoryBySku.put(theInventory);

        Inventory inventory = inventoryBySku.get(pk);
        System.out.println(ToStringBuilder.reflectionToString(inventory));

        boolean rs = inventoryBySku.delete(pk);
        Assert.assertTrue(rs);

        Assert.assertNull(inventoryBySku.get(pk));

    }

统计

@Test
    public void count(){
        EntityCursor<Employer> cursor = null;
        try{
            cursor = employerById.entities();
            int count = -1;
            if(cursor.next() != null){
                count = cursor.count();
            }
            System.out.println("employee count:" + count);
        }finally {
            if(cursor != null){
                cursor.close();
            }
        }
    }

级联及主键自增情况

 @Test
    public void sequencePk() {

        /*
         * Add a parent and two children using the Person primary index.
         * Specifying a non-null parentSsn adds the child Person to the
         * sub-index of children for that parent key.
         */
        personBySsn.put(new Person("Bob Smith", "111-11-1111", null));
        personBySsn.put(new Person("Mary Smith", "333-33-3333", "111-11-1111"));
        personBySsn.put(new Person("Jack Smith", "222-22-2222", "111-11-1111"));

        /* Print the children of a parent using a sub-index and a cursor. */
        EntityCursor<Person> children = personByParentSsn.subIndex("111-11-1111").entities();
        try {
            for (Person child : children) {
                System.out.println(child.getSsn() + ' ' + child.getName());
            }
        } finally {
            children.close();
        }

        /* Get Bob by primary key using the primary index. */
        Person bob = personBySsn.get("111-11-1111");
        Assert.assertNotNull(bob);

        /*
         * Create two employers if they do not already exist.  Their primary
         * keys are assigned from a sequence.
         */
        Employer gizmoInc = employerByName.get("Gizmo Inc");
        if (gizmoInc == null) {
            gizmoInc = new Employer("Gizmo Inc");
            employerById.put(gizmoInc);
        }
        Employer gadgetInc = employerByName.get("Gadget Inc");
        if (gadgetInc == null) {
            gadgetInc = new Employer("Gadget Inc");
            employerById.put(gadgetInc);
        }

        /* Bob has two jobs and two email addresses. */
        bob.getEmployerIds().add(gizmoInc.getId());
        bob.getEmployerIds().add(gadgetInc.getId());

        bob.getEmailAddresses().add("bob@bob.com");
        bob.getEmailAddresses().add("bob@gmail.com");

        /* Update Bob's record. */
        personBySsn.put(bob);

        /* Bob can now be found by both email addresses. */
        bob = personByEmailAddresses.get("bob@bob.com");
        Assert.assertNotNull(bob);
        bob = personByEmailAddresses.get("bob@gmail.com");
        Assert.assertNotNull(bob);

        /* Bob can also be found as an employee of both employers. */
        EntityIndex<String, Person> employees;
        employees = personByEmployerIds.subIndex(gizmoInc.getId());
        Assert.assertTrue( employees.contains("111-11-1111"));
        employees = personByEmployerIds.subIndex(gadgetInc.getId());
        Assert.assertTrue(employees.contains("111-11-1111"));

        /*
         * When an employer is deleted, the onRelatedEntityDelete=NULLIFY for
         * the employerIds key causes the deleted ID to be removed from Bob's
         * employerIds.
         */
        employerById.delete(gizmoInc.getId());
        bob = personBySsn.get("111-11-1111");
        Assert.assertNotNull(bob);
        Assert.assertFalse(bob.getEmployerIds().contains(gizmoInc.getId()));
    }

    @Test
    public void cursor() {
        CursorConfig cc = new CursorConfig();
        // This is ignored if the store is not opened with uncommitted read
        // support.
        cc.setReadUncommitted(true);

        EntityCursor<Employer> employers = employerById.entities(null, cc);
        try{
            for(Employer employer : employers){
                System.out.println(ToStringBuilder.reflectionToString(employer));
            }
        }finally{
            employers.close();
        }
    }

本工程github

参考


codecraft
11.9k 声望2k 粉丝

当一个代码的工匠回首往事时,不因虚度年华而悔恨,也不因碌碌无为而羞愧,这样,当他老的时候,可以很自豪告诉世人,我曾经将代码注入生命去打造互联网的浪潮之巅,那是个很疯狂的时代,我在一波波的浪潮上留下...


« 上一篇
Java8的新特性
下一篇 »
lucene简单入门