前言

前面的文章已经介绍了 neo4j 服务的本地安装,以及数据的增删改查操作方法。那么这里就要进入 python 项目,来完成医疗知识的构建,问答机器人的代码实现。但篇幅较长,本文就主要介绍知识图谱的构建吧。
图片

环境

Anaconda3
Python3.8
Py2neo (新版) 

数据来源 (结构)

图片

编码

1.引入依赖import json

from py2neo import Graph, Node

2.类的初始化 (连接 neo4j)

def __init__(self):
    self.data_path = "./data/medical.json"
    self.neo4j = Graph('bolt://localhost:7687', auth=('neo4j', 'beiqiaosu123456'))

3.读取数据

def read_data(self):
    # 疾病
    diseases = []
    # 症状
    symptoms = []
    # 科室
    departments = []
    # 药品
    drugs = []
    # 食物
    foods = []
    # 出药厂商
    producers = []
    # 检查项目
    checks = []

    # 疾病信息
    disease_info = []

    # 疾病与症状
    rels_symptom = []
    # 疾病与并发症
    rels_acompany = []
    # 疾病与科室
    rels_category = []
    # 科室与科室
    rels_department = []
    # 疾病与通用药品
    rels_commondrug = []
    # 疾病与推荐药品
    rels_recommenddrug = []
    # 疾病与不可吃
    rels_noteat = []
    # 疾病与可以吃
    rels_doeat = []
    # 疾病与推荐吃
    rels_recommendeat = []
    # 疾病与检查项
    rels_check = []
    # 厂商与药品
    rels_drug_producer = []

    for data in open(self.data_path, encoding="utf8", mode="r"):
        data_json = json.loads(data)
        disease = data_json['name']
        disease_dict = dict()
        disease_dict['get_prob'] = ''
        disease_dict['yibao_status'] = ''
        disease_dict['easy_get'] = ''
        disease_dict['get_way'] = ''
        disease_dict['cure_lasttime'] = ''
        disease_dict['cured_prob'] = ''
        disease_dict['cost_money'] = ''
        disease_dict['cure_department'] = []
        diseases.append(disease)
        disease_dict['name'] = disease
        disease_dict['desc'] = data_json['desc']
        disease_dict['prevent'] = data_json['prevent']
        disease_dict['cause'] = data_json['cause']

        if "get_prob" in data_json:
            disease_dict['get_prob'] = data_json['get_prob']
        if "yibao_status" in data_json:
            disease_dict['yibao_status'] = data_json['yibao_status']
        if "easy_get" in data_json:
            disease_dict['easy_get'] = data_json['easy_get']
        if "get_way" in data_json:
            disease_dict['get_way'] = data_json['get_way']
        if "cure_lasttime" in data_json:
            disease_dict['cure_lasttime'] = data_json['cure_lasttime']
        if "cured_prob" in data_json:
            disease_dict['cured_prob'] = data_json['cured_prob']
        if "cost_money" in data_json:
            disease_dict['cost_money'] = data_json['cost_money']
        disease_info.append(disease_dict)

        symptom = data_json['symptom']
        for symptom_i in symptom:
            rels_symptom.append([disease, symptom_i])
        symptoms += symptom

        # 科室
        if "cure_department" in data_json:
            cure_department = data_json['cure_department']
            departments += cure_department
            if len(cure_department) == 1:
                rels_category.append([disease, cure_department[0]])
            if len(cure_department) == 2:
                large = cure_department[0]
                small = cure_department[1]
                rels_department.append([large, small])
                rels_category.append([disease, large])
            disease_dict['cure_department'] = cure_department

        # 并发症
        if 'acompany' in data_json:
            acompanys = data_json['acompany']
            for acompany in data_json['acompany']:
                rels_acompany.append([disease, acompany])
            symptoms += acompanys

        if 'common_drug' in data_json:
            commondrug = data_json['common_drug']
            drugs += commondrug
            for drug_c in commondrug:
                rels_commondrug.append([disease, drug_c])

            recommenddrug = data_json['recommand_drug']
            for drug_recom in recommenddrug:
                rels_recommenddrug.append([disease, drug_recom])
            drugs += recommenddrug

        if 'not_eat' in data_json:
            noteat = data_json['not_eat']
            for noteat_i in noteat:
                rels_noteat.append([disease, noteat_i])
            foods += noteat

        if 'do_eat' in data_json:
            doeat = data_json['do_eat']
            for doeat_i in doeat:
                rels_doeat.append([disease, doeat_i])
            foods += doeat

        if 'recommand_eat' in data_json:
            recommendfood = data_json['recommand_eat']
            for food_i in recommendfood:
                rels_recommendeat.append([disease, food_i])
            foods += recommendfood

        checkitem = data_json['check']
        for check_i in checkitem:
            check_i.replace("'", "")
            if check_i != "血清5'-核苷酸酶(5'-NT)":
                rels_check.append([disease, check_i])
        checks += checkitem

        # 厂商与药品
        druginfo = data_json['drug_detail']
        producers += [name.split("(")[0] for name in druginfo]
        rels_drug_producer += [[name.split("(")[0], name.split("(")[-1].replace(")", "")] for name in druginfo]

    return set(diseases), set(symptoms), set(producers), set(departments), set(drugs), set(foods), set(
        checks), disease_info, rels_symptom, rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, \
           rels_doeat, rels_recommendeat, rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer

4.创建节点

def create_medical_nodes(self):
    print("start create nodes")
    diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom,\
    rels_acompany,rels_commondrug,rels_recommenddrug,rels_noteat,rels_doeat,rels_recommendeat,\
    rels_check,rels_drug_producer,rels_department, rels_category, rels_drug_producer = \
        build_medical_graph.read_data()

    # 创建疾病节点
    # self.create_node('Diseases', diseases)
    # 创建症状节点
    # self.create_node('Symptoms', symptoms)
    # 创建科室
    # self.create_node('Departments', departments)
    # 创建药品
    # self.create_node('Drugs', drugs)
    # 创建食品
    # self.create_node('Foods', foods)
    # 创建出药厂商
    # self.create_node('Producers', producers)
    # 创建检查项
    # self.create_node('Checks', checks)
    self.create_disease_node('Diseases', disease_info)
    return

# 疾病节点单独创建
def create_node(self, label, values):
    count = 0;
    for val in values:
        count += 1
        print("节点: " + label + ", 名称为: " + val)
        node = Node(label, name = val)
        self.neo4j.create(node)
    return count

def create_disease_node(self, label, values):
    count = 0
    for disease in values:
        print("节点" + label + ", 名称:" + disease['name'])
        node = Node(label, name=disease['name'], desc=disease['desc'], prevent=disease['prevent'],cause=disease['cause'],
                    get_prob=disease['get_prob'],yibao_status=disease['yibao_status'],easy_get=disease['easy_get'],
                    get_way=disease['get_way'],cure_lasttime=disease['cure_lasttime'],cured_prob=disease['cured_prob'],
                    cost_money=disease['cost_money'],cure_department=disease['cure_department'])
        self.neo4j.create(node)
    return count

5.创建关联边

def create_medical_rels(self):
    print("start create rels")
    diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom, \
    rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, rels_doeat, rels_recommendeat, \
    rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer = \
        build_medical_graph.read_data()

    # 疾病与状态
    # self.create_rel("Diseases", "Symptoms", rels_symptom, "has_symptoms", "疾病症状")
    # 疾病与并发症
    # self.create_rel("Diseases", "Symptoms", rels_acompany, "acompany_with", "疾病并发症")
    # 疾病与科室
    # self.create_rel("Diseases", "Departments", rels_category, "belongs_to", "所属科室")
    # 科室与科室
    # self.create_rel("Departments", "Departments", rels_department, "belongs_to", "所属")
    # 疾病与通用药品
    # self.create_rel("Diseases", "Drugs", rels_commondrug, "common_drug", "常用备药")
    # 疾病与推荐药品
    # self.create_rel("Diseases", "Drugs", rels_recommenddrug, "recommand_drug", "推荐用药")
    # 疾病与忌口
    # self.create_rel("Diseases", "Foods", rels_noteat, "not_eat", "忌吃")
    # 疾病与可以吃
    # self.create_rel("Diseases", "Foods", rels_doeat, "do_eat", "可以吃")
    # 疾病与推荐吃
    # self.create_rel("Diseases", "Foods", rels_recommendeat, "recomment_eat", "推荐吃")
    # 疾病与检查项
    self.create_rel("Diseases", "Checks", rels_check, "need_check", "需要检查")
    # 厂商与药品
    # self.create_rel("Producers", "drugs", rels_drug_producer, "drug_of", "生产药品")

def create_rel(self, start_node, end_node, list, rel_name, rel_attr):
    count = 0
    for item in list:
        count += 1
        s = item[0]
        e = item[1]

        print ("创建边:" +rel_name +",("+start_node+"->"+end_node+"),点1:"+s+"点2:"+e)

        query = "Match (start:%s), (end:%s) where start.name='%s' and end.name='%s' create (start)-[rel:%s{name:'%s'}]->(end)" % (
            start_node, end_node, s, e, rel_name, rel_attr
        )
        self.neo4j.run(query)

    return count6. 导出节点数据# 导出实体的节点分词
def export_data(self):
    diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom, \
    rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, rels_doeat, rels_recommendeat, \
    rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer = \
        build_medical_graph.read_data()

    # 疾病名
    # f_diseases = open("dict/diseases.txt", encoding="utf-8", mode="w+")
    # f_diseases.write("\n".join(list(diseases)))
    # 症状名
    f_symptoms = open("dict/symptoms.txt", encoding="utf-8", mode="w+")
    f_symptoms.write("\n".join(list(symptoms)))

    f_producers = open("dict/producers.txt", encoding="utf-8", mode="w+")
    f_producers.write("\n".join(list(producers)))

    f_departments = open("dict/departments.txt", encoding="utf-8", mode="w+")
    f_departments.write("\n".join(list(departments)))

    f_drugs = open("dict/drugs.txt", encoding="utf-8", mode="w+")
    f_drugs.write("\n".join(list(drugs)))

    f_foods = open("dict/foods.txt", encoding="utf-8", mode="w+")
    f_foods.write("\n".join(list(foods)))

    f_checks = open("dict/checks.txt", encoding="utf-8", mode="w+")
    f_checks.write("\n".join(list(checks)))

    f_checks = open("dict/checks.txt", encoding="utf-8", mode="w+")
    f_checks.write("\n".join(list(checks)))

图片


北桥苏
21 声望0 粉丝