Skip to content

Instantly share code, notes, and snippets.

@dimpurr
Last active October 24, 2017 00:01
Show Gist options
  • Select an option

  • Save dimpurr/d89cd5c6ec1d14b2e5d988c5ebb14635 to your computer and use it in GitHub Desktop.

Select an option

Save dimpurr/d89cd5c6ec1d14b2e5d988c5ebb14635 to your computer and use it in GitHub Desktop.

Revisions

  1. dimpurr revised this gist Oct 24, 2017. No changes.
  2. dimpurr revised this gist Oct 24, 2017. No changes.
  3. dimpurr created this gist Oct 24, 2017.
    79 changes: 79 additions & 0 deletions pyspider-json2csv.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,79 @@
    #!/usr/bin/python
    # -*- coding: utf-8 -*-

    import copy
    import json
    import fileinput

    # tool
    def print_list_utf8(list):
    print "[",
    for i in range(len(list)):
    if (i != len(list) - 1):
    print list[i].encode("utf-8") + ",",
    else:
    print list[i].encode("utf-8"),
    print "]"

    def print_dict_utf8(dict):
    print "{",
    for key in dict:
    print '"' + key.encode("utf-8") + '"' + ": " + '"' + dict[key].encode("utf-8") + '"' + ","
    print "}"

    # env
    fo = open("out.csv", "wb")
    # fo.write( "Test\n");

    # global
    sys_fields = [u'url', u'updatetime', u'taskid'];
    fields = copy.deepcopy(sys_fields)

    # gen fields
    for line in fileinput.input("in.json"):
    json_obj = json.loads(line)
    for field_name in json_obj["result"]:
    if (field_name not in fields):
    fields.append(field_name)
    # print_list_utf8(fields)
    fields.sort()
    print "[FIN] GEN FIELDS"

    # gen empty csv line obj
    csvline = {}
    for i in range(len(fields)):
    csvline[fields[i]] = ""
    print "[FIN] GEN CSV LINE OBJ"

    # write file
    def write_csv_head(list):
    for i in range(len(list)):
    fo.write(list[i].encode("utf-8").replace(",", ";"))
    if (i != len(list) - 1):
    fo.write(",")
    else:
    fo.write("\n")

    def write_csv_line(dict):
    templist = []
    for key in fields:
    templist.append(dict[key])
    write_csv_head(templist)

    # -- start--
    print "[WRITE] HEAD"
    write_csv_head(fields)
    # gen data lines
    for line in fileinput.input("in.json"):
    new_csvline = copy.deepcopy(csvline)
    json_obj = json.loads(line)
    for field_name in sys_fields: # 遍历 sys field
    new_csvline[field_name] = str(json_obj[field_name])
    # new_csvline[str(field_name).encode("utf-8")] = str(json_obj[field_name]).encode("utf-8")
    for field_name in json_obj["result"]: # 遍历每一个 results 存在的 field
    new_csvline[field_name] = json_obj["result"][field_name]
    # new_csvline[str(field_name).encode("utf-8")] = str(json_obj["result"][field_name]).encode("utf-8")
    # print_dict_utf8(new_csvline)
    # print "[WRITE] LINE"
    write_csv_line(new_csvline)
    print "[FIN ALL]"