平铺嵌套JSON,Go竟然比Python慢,问题出在哪里?

GoPython平铺嵌套JSON,结果发现Go竟然比Python还要慢,这对于Go新手来说简直是刷新认识,请问Go更慢的原因是什么,如何改进?
此文无意引战,所以没有选择Python标签,请巨佬告诉我如何改进Go代码,我相信Go是更快且应该是快很多的。

数据样本(结构类似,但不固定)

{
    "first": "Dale",
    "last": "Murphy",
    "age": 44,
    "nets": ["ig", "fb", "value1", "value2"],
    "submap": {
        "subfirst": "Dale",
        "sublast": ["Murphy", "value3", "value4"],
        "subsub":{"subsub":["value5","value6","value7","value8","value9"]}
    }
}

预期结果

[
{"first":"Dale","last":"Murphy","age":"44","nets":"ig","subfirst":"Dale","sublast":"Murphy","subsub":"value5"},
{"first":"Dale","last":"Murphy","age":"44","nets":"fb","subfirst":"Dale","sublast":"Murphy","subsub":"value5"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value1","subfirst":"Dale","sublast":"Murphy","subsub":"value5"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value2","subfirst":"Dale","sublast":"Murphy","subsub":"value5"},
{"first":"Dale","last":"Murphy","age":"44","nets":"ig","subfirst":"Dale","sublast":"value3","subsub":"value5"},
{"first":"Dale","last":"Murphy","age":"44","nets":"fb","subfirst":"Dale","sublast":"value3","subsub":"value5"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value1","subfirst":"Dale","sublast":"value3","subsub":"value5"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value2","subfirst":"Dale","sublast":"value3","subsub":"value5"},
{"first":"Dale","last":"Murphy","age":"44","nets":"ig","subfirst":"Dale","sublast":"value4","subsub":"value5"},
{"first":"Dale","last":"Murphy","age":"44","nets":"fb","subfirst":"Dale","sublast":"value4","subsub":"value5"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value1","subfirst":"Dale","sublast":"value4","subsub":"value5"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value2","subfirst":"Dale","sublast":"value4","subsub":"value5"},
{"first":"Dale","last":"Murphy","age":"44","nets":"ig","subfirst":"Dale","sublast":"Murphy","subsub":"value6"},
{"first":"Dale","last":"Murphy","age":"44","nets":"fb","subfirst":"Dale","sublast":"Murphy","subsub":"value6"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value1","subfirst":"Dale","sublast":"Murphy","subsub":"value6"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value2","subfirst":"Dale","sublast":"Murphy","subsub":"value6"},
{"first":"Dale","last":"Murphy","age":"44","nets":"ig","subfirst":"Dale","sublast":"value3","subsub":"value6"},
{"first":"Dale","last":"Murphy","age":"44","nets":"fb","subfirst":"Dale","sublast":"value3","subsub":"value6"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value1","subfirst":"Dale","sublast":"value3","subsub":"value6"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value2","subfirst":"Dale","sublast":"value3","subsub":"value6"},
{"first":"Dale","last":"Murphy","age":"44","nets":"ig","subfirst":"Dale","sublast":"value4","subsub":"value6"},
{"first":"Dale","last":"Murphy","age":"44","nets":"fb","subfirst":"Dale","sublast":"value4","subsub":"value6"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value1","subfirst":"Dale","sublast":"value4","subsub":"value6"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value2","subfirst":"Dale","sublast":"value4","subsub":"value6"},
{"first":"Dale","last":"Murphy","age":"44","nets":"ig","subfirst":"Dale","sublast":"Murphy","subsub":"value7"},
{"first":"Dale","last":"Murphy","age":"44","nets":"fb","subfirst":"Dale","sublast":"Murphy","subsub":"value7"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value1","subfirst":"Dale","sublast":"Murphy","subsub":"value7"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value2","subfirst":"Dale","sublast":"Murphy","subsub":"value7"},
{"first":"Dale","last":"Murphy","age":"44","nets":"ig","subfirst":"Dale","sublast":"value3","subsub":"value7"},
{"first":"Dale","last":"Murphy","age":"44","nets":"fb","subfirst":"Dale","sublast":"value3","subsub":"value7"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value1","subfirst":"Dale","sublast":"value3","subsub":"value7"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value2","subfirst":"Dale","sublast":"value3","subsub":"value7"},
{"first":"Dale","last":"Murphy","age":"44","nets":"ig","subfirst":"Dale","sublast":"value4","subsub":"value7"},
{"first":"Dale","last":"Murphy","age":"44","nets":"fb","subfirst":"Dale","sublast":"value4","subsub":"value7"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value1","subfirst":"Dale","sublast":"value4","subsub":"value7"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value2","subfirst":"Dale","sublast":"value4","subsub":"value7"},
{"first":"Dale","last":"Murphy","age":"44","nets":"ig","subfirst":"Dale","sublast":"Murphy","subsub":"value8"},
{"first":"Dale","last":"Murphy","age":"44","nets":"fb","subfirst":"Dale","sublast":"Murphy","subsub":"value8"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value1","subfirst":"Dale","sublast":"Murphy","subsub":"value8"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value2","subfirst":"Dale","sublast":"Murphy","subsub":"value8"},
{"first":"Dale","last":"Murphy","age":"44","nets":"ig","subfirst":"Dale","sublast":"value3","subsub":"value8"},
{"first":"Dale","last":"Murphy","age":"44","nets":"fb","subfirst":"Dale","sublast":"value3","subsub":"value8"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value1","subfirst":"Dale","sublast":"value3","subsub":"value8"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value2","subfirst":"Dale","sublast":"value3","subsub":"value8"},
{"first":"Dale","last":"Murphy","age":"44","nets":"ig","subfirst":"Dale","sublast":"value4","subsub":"value8"},
{"first":"Dale","last":"Murphy","age":"44","nets":"fb","subfirst":"Dale","sublast":"value4","subsub":"value8"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value1","subfirst":"Dale","sublast":"value4","subsub":"value8"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value2","subfirst":"Dale","sublast":"value4","subsub":"value8"},
{"first":"Dale","last":"Murphy","age":"44","nets":"ig","subfirst":"Dale","sublast":"Murphy","subsub":"value9"},
{"first":"Dale","last":"Murphy","age":"44","nets":"fb","subfirst":"Dale","sublast":"Murphy","subsub":"value9"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value1","subfirst":"Dale","sublast":"Murphy","subsub":"value9"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value2","subfirst":"Dale","sublast":"Murphy","subsub":"value9"},
{"first":"Dale","last":"Murphy","age":"44","nets":"ig","subfirst":"Dale","sublast":"value3","subsub":"value9"},
{"first":"Dale","last":"Murphy","age":"44","nets":"fb","subfirst":"Dale","sublast":"value3","subsub":"value9"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value1","subfirst":"Dale","sublast":"value3","subsub":"value9"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value2","subfirst":"Dale","sublast":"value3","subsub":"value9"},
{"first":"Dale","last":"Murphy","age":"44","nets":"ig","subfirst":"Dale","sublast":"value4","subsub":"value9"},
{"first":"Dale","last":"Murphy","age":"44","nets":"fb","subfirst":"Dale","sublast":"value4","subsub":"value9"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value1","subfirst":"Dale","sublast":"value4","subsub":"value9"},
{"first":"Dale","last":"Murphy","age":"44","nets":"value2","subfirst":"Dale","sublast":"value4","subsub":"value9"}
]

CHATGPT给的Go代码

package main

import (
    "encoding/json"
    "fmt"
    "time"
)

func flattenJson(b []byte) ([]map[string]interface{}, error) {
    var jsonObj map[string]interface{}
    err := json.Unmarshal(b, &jsonObj)
    if err != nil {
        return nil, err
    }

    return flatten(jsonObj, make(map[string]interface{})), nil
}

func flatten(jsonObj map[string]interface{}, result map[string]interface{}) []map[string]interface{} {
    tempResults := []map[string]interface{}{make(map[string]interface{})}
    for k, v := range jsonObj {
        switch value := v.(type) {
        case map[string]interface{}:
            nestedResults := flatten(value, result)
            newTempResults := []map[string]interface{}{}
            for _, nested := range nestedResults {
                for _, item := range tempResults {
                    newItem := make(map[string]interface{})
                    for key, val := range item {
                        newItem[key] = val
                    }
                    for key, val := range nested {
                        newItem[key] = val
                    }
                    newTempResults = append(newTempResults, newItem)
                }
            }
            tempResults = newTempResults
        case []interface{}:
            newTempResults := []map[string]interface{}{}
            for _, elem := range value {
                for _, item := range tempResults {
                    newItem := make(map[string]interface{})
                    for key, val := range item {
                        newItem[key] = val
                    }
                    newItem[k] = elem
                    newTempResults = append(newTempResults, newItem)
                }
            }
            tempResults = newTempResults
        default:
            for _, item := range tempResults {
                item[k] = value
            }
        }
    }

    // Merge each item with result
    finalResults := []map[string]interface{}{}
    for _, item := range tempResults {
        newItem := make(map[string]interface{})
        for key, val := range result {
            newItem[key] = val
        }
        for key, val := range item {
            newItem[key] = val
        }
        finalResults = append(finalResults, newItem)
    }

    return finalResults
}

func main() {
    start := time.Now()
    jsonData := []byte(`{
        "first": "Dale",
        "last": "Murphy",
        "age": 44,
        "nets": ["ig", "fb", "value1", "value2"],
        "submap": {
            "subfirst": "Dale",
            "sublast": ["Murphy", "value3", "value4"],
            "subsub":{"subsub":["value5","value6","value7","value8","value9"]}
        }
}`)
    for i := 0; i < 100000; i++ {
        flattenJson(jsonData)
    }
    fmt.Println(time.Since(start))
}
// (base) C:\abc>go run stsd.go 
// 4.287609s
// (base) C:\abc>go run stsd.go
// 4.513107s
// (base) C:\abc>go run stsd.go
// 4.1299503s
// (base) C:\abc>go version
// go version go1.21.4 windows/amd64

Python实现代码

import time,json

def flattenjson(json_obj, normalresult=None):
    if normalresult is None:
        normalresult = {}
    # 用于存储处理过程中的临时结果
    temp_results = [{}]
    for k, v in json_obj.items():
        if isinstance(v, dict):
            # 递归处理嵌套字典,并合并结果
            nested_results = flattenjson(v)
            temp_results = [dict(item, **nested) for nested in nested_results for item in temp_results]
        elif isinstance(v, list):
            # 处理列表,为每个元素创建新的字典
            temp_results = [dict(item, **{k: j}) for j in v for item in temp_results]
        else:
            # 处理普通键值对,加入到临时结果中
            for item in temp_results:
                item[k] = v
    # 将普通结果合并到每个字典中
    final_results = [dict(item, **normalresult) for item in temp_results]
    return final_results

if __name__ == '__main__':
    start = time.time()
    jsonstr = """{
            "first": "Dale",
            "last": "Murphy",
            "age": 44,
            "nets": ["ig", "fb", "value1", "value2"],
            "submap": {
                "subfirst": "Dale",
                "sublast": ["Murphy", "value3", "value4"],
                "subsub":{"subsub":["value5","value6","value7","value8","value9"]}
            }
    }"""
    for _ in range(100000):
        flattenjson(json.loads(jsonstr))
    print(time.time()-start)
# (base) C:\abc>python test.py
# 3.2182690620422363
# (base) C:\abc>python test.py
# 3.291424560546875
# (base) C:\abc>python test.py
# 3.239586067199707
# (base) C:\abc>python
# Python 3.10.9 | packaged by Anaconda, Inc. | (main, Mar  1 2023, 18:18:15) [MSC v.1916 64 bit (AMD64)] on win32
# Type "help", "copyright", "credits" or "license" for more information.
# >>>

关于如何改进Go,AI已经无法解答了

You:
同样的 json 数据,我测试 100000 次平铺,为什么 python 的速度比 go 还要快?

AI:
Python 比 Go 更快的情况在特定场景下是可能的,尽管 Go 通常在性能上优于 Python,特别是在并发和系统级编程方面。以下是一些可能导致这种情况的原因:

实现细节:
    Python 和 Go 代码的实现方式可能有所不同,这可能导致性能差异。即使逻辑相同,语言特性和内部优化也会影响性能。
    Python 的一些内置函数(如 itertools.product)可能经过高度优化,而 Go 代码可能没有使用同等优化的库或方法。

数据结构和算法:
    使用的数据结构和算法可能在两种语言中有细微差异,这可能会影响性能。

解释器 / 编译器优化:
    Python 解释器(如 CPython)和 Go 编译器采用不同的优化策略,这些策略可能在特定情况下影响性能。

垃圾收集:
    Go 语言有一个运行时垃圾收集器,可能会影响性能。Python 也有垃圾收集,但其行为可能与 Go 不同。

启动时间:
    对于非常短的运行时间,Python 程序的启动时间可能相对较短,而编译型语言(如 Go)的程序启动时间可能较长。

测试环境:
    测试的硬件和操作系统环境可能对性能有影响。
    测试方法(如是否预热、是否包括编译时间等)也会影响结果。

为了更准确地比较性能,建议:

确保测试在相似的环境和条件下进行。
仔细审查和优化 Go 代码,确保它是高效的。
进行多次测试并取平均值,以减少偶然性的影响。
如果可能,使用性能分析工具查看代码的热点和潜在的性能瓶颈。

性能测试是复杂的,结果可能因多种因素而异,确保测试的公平和准确性非常重要。
You:
是不是大量接口导致性能下降?
AI:
在 Go 语言中,使用接口(interface)可能会对性能产生一定影响,但这种影响通常是非常微小的。接口在 Go 中是一种强大的特性,允许实现多态和灵活的代码设计。但是,它们确实带来了一些额外的开销,尤其是在涉及大量数据处理和高频调用时。以下是一些使用接口可能影响性能的情况:

动态分派:当您通过接口调用方法时,Go 需要在运行时确定要调用的具体方法实现。这个查找过程称为动态分派,它稍微慢于静态类型的直接方法调用。

类型断言和类型检查:如果您经常需要对接口类型进行类型断言以访问具体的类型,这可能会导致性能开销。

额外的间接层:接口引入了一个额外的间接层,因为它们通过指针引用具体的值。这意味着额外的内存解引用操作,可能会稍微影响性能。

内存分配:在某些情况下,使用接口可能导致额外的堆内存分配,特别是当它们用于包装小的值类型时。

垃圾回收:由于接口可能指向堆上分配的对象,因此可能会增加垃圾收集器的工作量。

在大多数情况下,这些开销都非常小,不太可能成为性能瓶颈。但是,在一些极端情况下(如大量数据处理或高频率调用),这些开销可能变得显著。

如果您怀疑接口是性能问题的原因,可以通过以下方式进行优化:

性能分析:使用 Go 的 pprof 工具进行性能分析,找出性能瓶颈的确切位置。
减少接口使用:在性能关键的部分考虑使用具体类型替代接口。
避免不必要的类型断言:减少类型断言的使用,尤其是在循环或频繁调用的代码块中。

总的来说,除非您正在处理大量数据或需要极高的性能,否则接口带来的开销通常是可以接受的,并且它们为代码的灵活性和可维护性带来了巨大的好处。
You:
GO 的实现有没有更高效的方法?
AI:
要提高 Go 语言中特定代码的性能,尤其是涉及 JSON 处理和数据转换的代码,您可以采取以下几种策略:

优化数据结构:
    使用更高效的数据结构可以显著提高性能。例如,使用切片而非映射,或选择更适合特定场景的数据结构。

减少内存分配:
    避免不必要的内存分配,如重复创建对象或频繁的切片扩展。您可以预先分配足够大的空间或复用已有的对象。

并行处理:
    如果您的任务可以并行化,使用 Go 的并发特性(goroutines 和 channels)来分散工作可以显著提高效率。

优化循环和递归:
    优化循环逻辑,减少循环内部的计算和函数调用。在递归函数中,确保没有不必要的重复计算。

使用更高效的库:
    选择性能更好的第三方库进行 JSON 解析和处理。比如,json-iterator/go 通常比标准库中的 encoding/json 提供更好的性能。

减少接口使用:
    如果可能,减少接口的使用,特别是在性能关键的代码路径中。使用具体类型可以避免接口带来的间接性和动态分

You:
用 json-iterator/go 改进之前的代码
AI:
要使用 json-iterator/go 库来改进之前的代码,首先需要安装这个库。然后,您可以将标准的 encoding/json 解析替换为 json-iterator/go 的解析,以提高 JSON 处理的效率。json-iterator/go 是一个高性能的 JSON 解析器,它完全兼容标准库,但通常提供更快的解析速度。
.....
结果速度和使用标准库encoding/json没有明显变化。

阅读 1.2k
2 个回答

粗略的看了一下代码,使用了反射, 在go中反射是非常消耗性能与相对于而言会比较慢的,

第一个原因是用于反射的reflect.Type信息需要动态生成。当我们使用Golang反射机制时,编译器需要动态生成一些辅助结构体来保存调用时的上下文信息。这些结构体中的字段数量与复杂度都取决于反射的使用情况。因此,如果我们在编写代码时经常使用反射,那么编译器将需要频繁地动态生成这些结构体,这会导致编译时间的增加和程序执行速度的降低。

第二个原因是反射使用了接口。在Golang中,所有类型(包括基础类型和结构体)都是通过接口实现的。在反射时,我们需要将类型和值转换为对应的接口类型。这种转换需要额外的时间和空间开销,而且机器码也需要额外的指令来完成类型转换。

在go中会尽量避免使用反射来实现某些功能,当然如果开发性价比不错,用反射也没问题

package main

import (
    "encoding/json"
    "fmt"
)

type MyStruct struct {
    First   string   `json:"first"`
    Last    string   `json:"last"`
    Age     int      `json:"age"`
    Nets    []string `json:"nets"`
    Submap  Submap   `json:"submap"`
}

type Submap struct {
    Subfirst string   `json:"subfirst"`
    Sublast  []string `json:"sublast"`
    Subsub   Subsub   `json:"subsub"`
}

type Subsub struct {
    Subsub []string `json:"subsub"`
}

func main() {
    jsonData := `
    {
        "first": "Dale",
        "last": "Murphy",
        "age": 44,
        "nets": ["ig", "fb", "value1", "value2"],
        "submap": {
            "subfirst": "Dale",
            "sublast": ["Murphy", "value3", "value4"],
            "subsub":{"subsub":["value5","value6","value7","value8","value9"]}
        }
    }`

    var data MyStruct
    err := json.Unmarshal([]byte(jsonData), &data)
    if err != nil {
        fmt.Println("Error:", err)
        return
    }

    // Accessing the flattened structure
    fmt.Println("First:", data.First)
    fmt.Println("Last:", data.Last)
    fmt.Println("Age:", data.Age)
    fmt.Println("Nets:", data.Nets)
    fmt.Println("Subfirst:", data.Submap.Subfirst)
    fmt.Println("Sublast:", data.Submap.Sublast)
    fmt.Println("Subsub:", data.Submap.Subsub.Subsub)
}
新手上路,请多包涵
type dataStruct struct {
    First  string
    Last   string
    Age    int
    Nets   []string
    Submap struct {
        Subfirst string
        Sublast  []string
        Subsub   struct {
            Subsub []string
        }
    }
}

func flattenJson(data []byte) ([]map[string]string, error) {
    var jd dataStruct
    err := json.Unmarshal(data, &jd)
    if err != nil {
        fmt.Println("解析json失败", err)
        return nil, err
    }

    ans := []map[string]string{}
    for _, net := range jd.Nets {
        for _, sublast := range jd.Submap.Sublast {
            for _, subsub := range jd.Submap.Subsub.Subsub {
                m := map[string]string{
                    "first":    jd.First,
                    "last":     jd.Last,
                    "age":      strconv.Itoa(jd.Age),
                    "subfirst": jd.Submap.Subfirst,
                    "nets":     net,
                    "sublast":  sublast,
                    "subsub":   subsub,
                }
                ans = append(ans, m)
            }
        }
    }
    return ans, nil
}

用反射确实比较慢,知道数据类型,可以试下这种的

撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
推荐问题
宣传栏