使用python将json转换为csv

问题描述:

我有一些非常大的json文件需要转换(最大的是500 MB)。在线工具只支持不适合我的小尺寸。所以我看着*寻找可能适合我的脚本。在发现这一个:使用python将json转换为csv

import csv 
import json 

infile = open("test_user.json","r") 
outfile = open ("test_user.csv","w") 

writer = csv.writer(outfile) 

for row in infile: 
    data = json.loads(row) 
    writer.writerow(data) 

这里是一个链接我的JSON文件中的一个:https://pastebin.com/xpzeJmug

[ 
    { 
     "contributors": null, 
     "truncated": false, 
     "text": "", 
     "in_reply_to_status_id": null, 
     "id": 433266666479562750, 
     "favorite_count": 0, 
     "source": "<a href=\"http://www.apple.com\" rel=\"nofollow\">iOS</a>", 
     "retweeted": false, 
     "coordinates": null, 
     "entities": { 
      "symbols": [], 
      "user_mentions": [], 
      "hashtags": [ 
       { 
        "indices": [ 
         38, 
         43 
        ], 
        "text": "IMDb" 
       } 
      ], 
      "urls": [ 
       { 
        "url": "http://tw.co/LTiHcO3XH2", 
        "indices": [ 
         44, 
         66 
        ], 
        "expanded_url": "http://www.imdb.com/title/tt0993846", 
        "display_url": "imdb.com/title/tt0993846" 
       } 
      ] 
     }, 
     "in_reply_to_screen_name": null, 
     "in_reply_to_user_id": null, 
     "retweet_count": 0, 
     "id_str": "433266666479562753", 
     "favorited": false, 
     "user": { 
      "follow_request_sent": false, 
      "profile_use_background_image": true, 
      "id": 182591357, 
      "verified": false, 
      "profile_text_color": "333333", 
      "profile_image_url_https": "https://pbs.twimg.com/profile_images/430409335710416897/pwV3muOK_normal.jpeg", 
      "profile_sidebar_fill_color": "DDEEF6", 
      "is_translator": false, 
      "geo_enabled": true, 
      "entities": { 
       "description": { 
        "urls": [] 
       } 
      }, 
      "followers_count": 363, 
      "protected": false, 
      "location": "Turkey", 
      "default_profile_image": false, 
      "id_str": "182591357", 
      "lang": "tr", 
      "utc_offset": 7200, 
      "statuses_count": 23921, 
      "description": "@Besiktas, I Love House Music.", 
      "friends_count": 238, 
      "profile_link_color": "0084B4", 
      "profile_image_url": "http://pbs.twimg.com/profile_images/430409335710416897/pwV3muOK_normal.jpeg", 
      "notifications": false, 
      "profile_background_image_url_https": "https://pbs.twimg.com/profile_background_images/660819655/mbxstu4fqnkll8b5kyu7.jpeg", 
      "profile_background_color": "C0DEED", 
      "profile_banner_url": "https://pbs.twimg.com/profile_banners/182591357/1391607536", 
      "profile_background_image_url": "http://pbs.twimg.com/profile_background_images/660819655/mbxstu4fqnkll8b5kyu7.jpeg", 
      "name": "Metin Haşal", 
      "is_translation_enabled": false, 
      "profile_background_tile": true, 
      "favourites_count": 3, 
      "screen_name": "metinhasal", 
      "url": null, 
      "created_at": "Tue Aug 24 23:30:11 +0000 2010", 
      "contributors_enabled": false, 
      "time_zone": "Istanbul", 
      "profile_sidebar_border_color": "FFFFFF", 
      "default_profile": false, 
      "following": false, 
      "listed_count": 0 
     }, 
     "geo": null, 
     "in_reply_to_user_id_str": null, 
     "possibly_sensitive": false, 
     "lang": "en", 
     "created_at": "Tue Feb 11 15:49:57 +0000 2014", 
     "in_reply_to_status_id_str": null, 
     "place": null, 
     "metadata": { 
      "iso_language_code": "en", 
      "result_type": "recent" 
     } 
    }, 
    { 
     "contributors": null, 
     "truncated": false, 
     "text": "", 
     "in_reply_to_status_id": null, 
     "id": 433266923489730560, 
     "favorite_count": 0, 
     "source": "<a href=\"http://www.apple.com\" rel=\"nofollow\">iOS</a>", 
     "retweeted": false, 
     "coordinates": null, 
     "entities": { 
      "symbols": [], 
      "user_mentions": [], 
      "hashtags": [ 
       { 
        "indices": [ 
         53, 
         58 
        ], 
        "text": "IMDb" 
       } 
      ], 
      "urls": [ 
       { 
        "url": "http://tw.co/IAWzT6Mmp1", 
        "indices": [ 
         30, 
         52 
        ], 
        "expanded_url": "http://www.imdb.com/title/tt0387877", 
        "display_url": "imdb.com/title/tt0387877" 
       } 
      ] 
     }, 
     "in_reply_to_screen_name": null, 
     "in_reply_to_user_id": null, 
     "retweet_count": 0, 
     "id_str": "433266923489730560", 
     "favorited": false, 
     "user": { 
      "follow_request_sent": false, 
      "profile_use_background_image": true, 
      "id": 249246669, 
      "verified": false, 
      "profile_text_color": "333333", 
      "profile_image_url_https": "https://pbs.twimg.com/profile_images/2551826686/image_normal.jpg", 
      "profile_sidebar_fill_color": "DDEEF6", 
      "is_translator": false, 
      "geo_enabled": true, 
      "entities": { 
       "url": { 
        "urls": [ 
         { 
          "url": "http://tw.co/VhnKdhkDbZ", 
          "indices": [ 
           0, 
           22 
          ], 
          "expanded_url": "http://www.vimeo.com/bolelof", 
          "display_url": "vimeo.com/bolelof" 
         } 
        ] 
       }, 
       "description": { 
        "urls": [] 
       } 
      }, 
      "followers_count": 787, 
      "protected": false, 
      "location": "Moscow City Mufuka!", 
      "default_profile_image": false, 
      "id_str": "249246669", 
      "lang": "en", 
      "utc_offset": -18000, 
      "statuses_count": 1969, 
      "description": "Заядлый турист, маньяк-велосипедист, любитель тату-портаков и всей музыки на свете. Ведущий 'афиши' на мск-24. Хороший парень и возможно ваш друг.", 
      "friends_count": 108, 
      "profile_link_color": "0084B4", 
      "profile_image_url": "http://pbs.twimg.com/profile_images/2551826686/image_normal.jpg", 
      "notifications": false, 
      "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", 
      "profile_background_color": "C0DEED", 
      "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", 
      "name": "болелов борис", 
      "is_translation_enabled": false, 
      "profile_background_tile": false, 
      "favourites_count": 0, 
      "screen_name": "bolelof_boris", 
      "url": "http://tw.co/VhnKdhkDbZ", 
      "created_at": "Tue Feb 08 17:26:01 +0000 2011", 
      "contributors_enabled": false, 
      "time_zone": "Quito", 
      "profile_sidebar_border_color": "C0DEED", 
      "default_profile": true, 
      "following": false, 
      "listed_count": 27 
     }, 
     "geo": null, 
     "in_reply_to_user_id_str": null, 
     "possibly_sensitive": false, 
     "lang": "en", 
     "created_at": "Tue Feb 11 15:50:58 +0000 2014", 
     "in_reply_to_status_id_str": null, 
     "place": null, 
     "metadata": { 
      "iso_language_code": "en", 
      "result_type": "recent" 
     } 
    }, 
    { 
     "contributors": null, 
     "truncated": false, 
     "text": "", 
     "in_reply_to_status_id": null, 
     "id": 433267603868762100, 
     "favorite_count": 0, 
     "source": "<a href=\"http://www.apple.com\" rel=\"nofollow\">iOS</a>", 
     "retweeted": false, 
     "coordinates": null, 
     "entities": { 
      "symbols": [], 
      "user_mentions": [], 
      "hashtags": [ 
       { 
        "indices": [ 
         45, 
         50 
        ], 
        "text": "IMDb" 
       } 
      ], 
      "urls": [ 
       { 
        "url": "http://tw.co/Mv56yMkFQ2", 
        "indices": [ 
         22, 
         44 
        ], 
        "expanded_url": "http://www.imdb.com/title/tt2193215", 
        "display_url": "imdb.com/title/tt2193215" 
       } 
      ] 
     }, 
     "in_reply_to_screen_name": null, 
     "in_reply_to_user_id": null, 
     "retweet_count": 0, 
     "id_str": "433267603868762112", 
     "favorited": false, 
     "user": { 
      "follow_request_sent": false, 
      "profile_use_background_image": true, 
      "id": 249246669, 
      "verified": false, 
      "profile_text_color": "333333", 
      "profile_image_url_https": "https://pbs.twimg.com/profile_images/2551826686/image_normal.jpg", 
      "profile_sidebar_fill_color": "DDEEF6", 
      "is_translator": false, 
      "geo_enabled": true, 
      "entities": { 
       "url": { 
        "urls": [ 
         { 
          "url": "http://tw.co/VhnKdhkDbZ", 
          "indices": [ 
           0, 
           22 
          ], 
          "expanded_url": "http://www.vimeo.com/bolelof", 
          "display_url": "vimeo.com/bolelof" 
         } 
        ] 
       }, 
       "description": { 
        "urls": [] 
       } 
      }, 
      "followers_count": 787, 
      "protected": false, 
      "location": "Moscow City Mufuka!", 
      "default_profile_image": false, 
      "id_str": "249246669", 
      "lang": "en", 
      "utc_offset": -18000, 
      "statuses_count": 1969, 
      "description": "Заядлый турист, маньяк-велосипедист, любитель тату-портаков и всей музыки на свете. Ведущий 'афиши' на мск-24. Хороший парень и возможно ваш друг.", 
      "friends_count": 108, 
      "profile_link_color": "0084B4", 
      "profile_image_url": "http://pbs.twimg.com/profile_images/2551826686/image_normal.jpg", 
      "notifications": false, 
      "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", 
      "profile_background_color": "C0DEED", 
      "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", 
      "name": "болелов борис", 
      "is_translation_enabled": false, 
      "profile_background_tile": false, 
      "favourites_count": 0, 
      "screen_name": "bolelof_boris", 
      "url": "http://tw.co/VhnKdhkDbZ", 
      "created_at": "Tue Feb 08 17:26:01 +0000 2011", 
      "contributors_enabled": false, 
      "time_zone": "Quito", 
      "profile_sidebar_border_color": "C0DEED", 
      "default_profile": true, 
      "following": false, 
      "listed_count": 27 
     }, 
     "geo": null, 
     "in_reply_to_user_id_str": null, 
     "possibly_sensitive": false, 
     "lang": "bg", 
     "created_at": "Tue Feb 11 15:53:41 +0000 2014", 
     "in_reply_to_status_id_str": null, 
     "place": null, 
     "metadata": { 
      "iso_language_code": "bg", 
      "result_type": "recent" 
     } 
    } 
] 

当我运行该文件,我收到以下错误:

回溯(最近通话最后):

File "C:/Users/Piyush/Desktop/2008/runcsv.py", line 11, in <module> 
writer.writerow(data) 

_csv.Error: sequence expected 

有人可以告诉我如何解决。我不太了解python,所以如果你给我一个详细的答案,我将不胜感激。

+0

为什么不使用熊猫? – selten98

+0

@ selten98向我解释如何使用它? – Hossein

+0

'import pandas df = pd.DataFrame([]) df = df.from_json(JSONHERE) df.to_csv()' 就像这样。 – selten98

类似的问题已经在这里问:_csv.Error: sequence expected [Python]

请检查解决方案适用于你。

+1

我检查了所有类似的问题之前问这个问题,但他们对我的工作效率不高... – Hossein

import pandas as pd 
df = pd.read_json("FILE.json") 
df.to_csv("test.csv") 

此代码可能适合您。

+0

这是一个非常简单的代码,并没有为我工作!我的JSON结构很复杂...... – Hossein

您可能会在这里找到您的解决方案:*: Nested Json to csv

如果您在问题中指定了所需的完整格式,将会更有帮助。