从 Python 中的 download_as_string 访问 blob 对象中的数据

我正在尝试访问和修改从 Google Cloud Functions 中的 Google Cloud Storage 提取的换行符 JSON 文件中的数据。结果总是显示为数字，尽管它不是 JSON 中的数据。

我看到 blob 对象的 download_as_string() 返回字节（ https://googleapis.github.io/google-cloud-python/latest/_modules/google/cloud/storage/blob.html#Blob.download_as_string ）但在任何参考文献中我明白了，每个人都可以很好地访问他们的数据。

我在 Cloud Functions 中这样做，但我认为我的问题适用于任何 GCP 工具。

我下面的示例应该简单地加载换行 JSON 数据，将其添加到列表中，选择前两个字典条目，转换回换行 JSON 并输出到 GCS 上的 JSON 文件。下面列出了示例、代码和错误输出。

示例换行符 JSON 输入

{"Website": "Google", "URL": "Google.com", "ID": 1}
{"Website": "Bing", "URL": "Bing.com", "ID": 2}
{"Website": "Yahoo", "URL": "Yahoo.com", "ID": 3}
{"Website": "Yandex", "URL": "Yandex.com", "ID": 4}

云函数中的代码

import requests
import json
import csv
from datetime import datetime, timedelta
import sys
from collections import OrderedDict
import os
import random

from google.cloud import bigquery
from google.cloud import storage

def importData(request, execution):
    # Read the data from Google Cloud Storage
    read_storage_client = storage.Client()

    # Set buckets and filenames
    bucket_name = "sample_bucket"
    filename = 'sample_json_output.json'

    # get bucket with name
    bucket = read_storage_client.get_bucket('sample_bucket')
    # get bucket data as blob
    blob = bucket.get_blob('sample_json.json')
    # download as string
    json_data = blob.download_as_string()

    # create list
    website_list = []
    for u,y in enumerate(json_data):
        website_list.append(y)

    # select first two
    website_list = website_list[0:2]

    # Create new-line JSON
    results_ready = '\n'.join(json.dumps(item) for item in website_list)

    # Write the data to Google Cloud Storage
    write_storage_client = storage.Client()

    write_storage_client.get_bucket(bucket_name) \
        .blob(filename) \
        .upload_from_string(results_ready)

sample_json_output.json 文件中的当前输出

123
34

预期产出

{"Website": "Google", "URL": "Google.com", "ID": 1}
{"Website": "Bing", "URL": "Bing.com", "ID": 2}

更新 6/6：如果我直接从 _download_tostring blob 写入文件，那么它会完美地写入 JSON 文件，但我需要事先访问内容。

 import requests
import json
import csv
from datetime import datetime, timedelta
import sys
from collections import OrderedDict
import os
import random

from google.cloud import bigquery
from google.cloud import storage

def importData(request, execution):

    # Read the data from Google Cloud Storage
    read_storage_client = storage.Client()

    # Set buckets and filenames
    bucket_name = "sample_bucket"
    filename = 'sample_json_output.json'

    # get bucket with name
    bucket = read_storage_client.get_bucket('sample_bucket')

    # get bucket data as blob
    blob = bucket.get_blob('sample_json.json')

    # convert to string
    json_data = blob.download_as_string()

    # Write the data to Google Cloud Storage
    write_storage_client = storage.Client()

    write_storage_client.get_bucket(bucket_name) \
        .blob(filename) \
        .upload_from_string(json_data)

更新 ⁶⁄₆ 输出

{"Website": "Google", "URL": "Google.com", "ID": 1}
{"Website": "Bing", "URL": "Bing.com", "ID": 2}
{"Website": "Yahoo", "URL": "Yahoo.com", "ID": 3}
{"Website": "Yandex", "URL": "Yandex.com", "ID": 4}

原文由 AngryWhopper 发布，翻译遵循 CC BY-SA 4.0 许可协议

阅读 681

import requests import json import ndjson import csv from datetime import datetime, timedelta import sys from collections import OrderedDict import os import random from google.cloud import bigquery from google.cloud import storage def importData(request, execution): # Read the data from Google Cloud Storage read_storage_client = storage.Client() # Set buckets and filenames bucket_name = "bucket-name" filename = "sample_json_output.json" # get bucket with name bucket = read_storage_client.get_bucket(bucket_name) # get bucket data as blob blob = bucket.get_blob("sample_json.json") # convert to string json_data_string = blob.download_as_string() json_data = ndjson.loads(json_data_string) list = [] for item in json_data: list.append(item) list1 = list[0:2] result = "" for item in list1: result = result + str(item) + "\n" # Write the data to Google Cloud Storage write_storage_client = storage.Client() write_storage_client.get_bucket(bucket_name) \ .blob(filename) \ .upload_from_string(result)

从 Python 中的 download_as_string 访问 blob 对象中的数据

你尚未登录，登录后可以

字节的 trae AI IDE 不支持类似 vscode 的 ssh remote 远程开发怎么办？

DataCap 中验证码无法显示，后台出现 NullPointerException 错误?

发现深拷贝和浅拷贝效果一致：请问一下有什么区别呢？

如何实现一个深拷贝函数？

Python 成员变量在多个子类实例间共享，如何避免？

为什么 Qwen2.5-Omni-7B 官方教程都报错 Cannot import available module of Qwen2_5OmniModel in modelscope ？

Spark-TTS-0.5B 的 requirements.txt 在哪里？

Stack Overflow 翻译