Python VS JSON, YAML, CSV & XML

Python VS JSON, YAML, CSV & XML


jmtaboada
jmtaboada
Python VS JSON, YAML, CSV & XML

Examples of how to work with JSON, YAML, CSV, and XML files in Python.

Today I saw myself preparing some exercises for my student who is learning Python programming language, and last week he asked me about converting a CSV to XML. So I decided to publish these exercises for anybody who doubts how to work with those formats.

Example of JSON processing

import json

example = {
    "name": "Kiroga",
    "workers": [
        {
            "name": "Alex",
            "sold": 3,
        },
        {
            "name": "Susan",
            "sold": 8,
        },
        {
            "name": "Delia",
            "sold": 4,
        },
    ]
 }

print("=== From Python to JSON String ===")
json_string = json.dumps(example)
print("{} -> {}".format(type(json_string), json_string))
print()

print("=== From JSON String to Python ===")
obj = json.loads(json_string)
print("{} -> {}".format(type(obj), obj))

Result:

=== From Python to JSON String ===
<class 'str'> -> {"name": "Kiroga", "workers": [{"name": "Alex", "sold": 3}, {"name": "Susan", "sold": 8}, {"name": "Delia", "sold": 4}]}

=== From JSON String to Python ===
<class 'dict'> -> {'name': 'Kiroga', 'workers': [{'name': 'Alex', 'sold': 3}, {'name': 'Susan', 'sold': 8}, {'name': 'Delia', 'sold': 4}]}

Example of YAML processing

import yaml

example = {
    'incoming data': [
        1024, 
        88,
        u'á€eVentura',
        192834, 
        1337, 
        'no data', 
    ],
    'configuration': '/etc/example/config.conf',
    'aprox pi value': 3.141592654,
    'names': {
        'incoming': 'málaga',
        'outgoing': 'szczecin',
        'days': 4
    }
}

print("=== Dictionary to YAML string ===")
yaml_string = yaml.dump(example)
print(yaml_string)
print("")

print("=== YAML string to Dictionary ===")
obj = yaml.safe_load(yaml_string)
print(obj)

Result:

=== Dictionary to YAML string ===
aprox pi value: 3.141592654
configuration: /etc/example/config.conf
incoming data:
- 1024
- 88
- "\xE1\u20ACeVentura"
- 192834
- 1337
- no data
names:
  days: 4
  incoming: "m\xE1laga"
  outgoing: szczecin


=== YAML string to Dictionary ===
{'aprox pi value': 3.141592654, 'configuration': '/etc/example/config.conf', 'incoming data': [1024, 88, 'á€eVentura', 192834, 1337, 'no data'], 'names': {'days': 4, 'incoming': 'málaga', 'outgoing': 'szczecin'}}

Example of CSV processing

import csv
from io import StringIO

example = '''"Origin","Target","Code","Country"
"1.0.0.0","1.0.0.255","AU","Australia"
"1.0.1.0","1.0.3.255","CN","China"
"1.0.4.0","1.0.7.255","AU","Australia"
"1.0.128.0","1.0.255.255","TH","Thailand"'''

print("=== Read as ROW ===")
rows = csv.reader(example.split("\n"), delimiter=',', quotechar='"')
headers = next(rows)
for row in rows:
  print("Origin:{:>12s}     Target:{:>12s}      Code:{:2s}     Country:{}".format(*row))
print("")

print("=== Read as Dict ===")
rows = csv.DictReader(example.split("\n"), delimiter=',', quotechar='"')
for row in rows:
  print("Origin:{:>12s}     Target:{:>12s}      Code:{:2s}     Country:{}".format(row['Origin'], row['Target'], row['Code'], row['Country']))
print("")

print("=== Write CSV ===")
buffer = StringIO()
header = ['Origin', 'Target', 'Code', 'Country']
writer = csv.DictWriter(buffer, fieldnames=header)
writer.writeheader()
writer.writerow({'Origin': '1.0.0.0', 'Target': '1.0.0.255', 'Code': 'AU', 'Country': 'Australia'})
writer.writerow({'Origin': '1.0.1.0', 'Target': '1.0.3.255', 'Code': 'CN', 'Country': 'China'})
writer.writerow({'Origin': '1.0.4.0', 'Target': '1.0.7.255', 'Code': 'AU', 'Country': 'Australia'})
writer.writerow({'Origin': '1.0.128.0', 'Target': '1.0.255.255', 'Code': 'TH', 'Country': 'Thailand'})
print(buffer.getvalue())

Result:

=== Read as ROW ===
Origin:     1.0.0.0     Target:   1.0.0.255      Code:AU     Country:Australia
Origin:     1.0.1.0     Target:   1.0.3.255      Code:CN     Country:China
Origin:     1.0.4.0     Target:   1.0.7.255      Code:AU     Country:Australia
Origin:   1.0.128.0     Target: 1.0.255.255      Code:TH     Country:Thailand

=== Read as Dict ===
Origin:     1.0.0.0     Target:   1.0.0.255      Code:AU     Country:Australia
Origin:     1.0.1.0     Target:   1.0.3.255      Code:CN     Country:China
Origin:     1.0.4.0     Target:   1.0.7.255      Code:AU     Country:Australia
Origin:   1.0.128.0     Target: 1.0.255.255      Code:TH     Country:Thailand

=== Write CSV ===
Origin,Target,Code,Country
1.0.0.0,1.0.0.255,AU,Australia
1.0.1.0,1.0.3.255,CN,China
1.0.4.0,1.0.7.255,AU,Australia
1.0.128.0,1.0.255.255,TH,Thailand

Example of XML processing with Minidom

For me, the resultant source code seems too complex for use, a spaghetti soup code.

from xml.dom import minidom

print("=== Create XML === ")

# Create XML document
root = minidom.Document()

# WMWROOT Element
wmwroot = root.createElement('WMWROOT')
wmwroot.setAttribute('xmlns', 'http://www.manh.com/ILSNET/Interface')
root.appendChild(wmwroot)

# WMWDDATA
wmwdata = root.createElement('WMWDATA')
wmwroot.appendChild(wmwdata)

# WMFWUpload
wmfwupload = root.createElement('WMFWUpload')
wmfwupload.setAttribute('xmlns:xsd', 'http://www.w3.org/2001/XMLSchema')
wmfwupload.setAttribute('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')
wmwdata.appendChild(wmfwupload)

# Date
node = root.createElement('Date')
node.appendChild(root.createTextNode("2022-09-26T10:36:02.6713225Z"))
wmfwupload.appendChild(node)

# GroupIndex
node = root.createElement('GroupIndex')
node.appendChild(root.createTextNode("1"))
wmfwupload.appendChild(node)

# Id
node = root.createElement('Id')
node.appendChild(root.createTextNode("48fafcd4-f928-4f95-88ca-702ce6139267"))
wmfwupload.appendChild(node)

# ...
node = root.createElement('More')
node.appendChild(root.createTextNode("..."))
wmfwupload.appendChild(node)

# TO String
xml_str = root.toprettyxml(indent="\t")
print(xml_str)
print("")

print("=== Parse XML ===")
xml_obj = minidom.parseString(xml_str)
print("Node {} is Type {}".format(xml_obj.childNodes[0].tagName, type(xml_obj)))
print("NAME:", xml_obj.childNodes[0].childNodes[1].childNodes[1].childNodes[5].tagName)
print("VALUE:", xml_obj.childNodes[0].childNodes[1].childNodes[1].childNodes[5].childNodes[0].data)
print("===================================")
print("Summary:")
print(xml_obj.childNodes)
print(xml_obj.childNodes[0].childNodes)
print(xml_obj.childNodes[0].childNodes[1].childNodes)
print(xml_obj.childNodes[0].childNodes[1].childNodes[1].childNodes)
print(xml_obj.childNodes[0].childNodes[1].childNodes[1].childNodes[5].tagName)
print(xml_obj.childNodes[0].childNodes[1].childNodes[1].childNodes[5].childNodes)

Result:

=== Create XML === 
<?xml version="1.0" ?>
<WMWROOT xmlns="http://www.manh.com/ILSNET/Interface">
	<WMWDATA>
		<WMFWUpload xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
			<Date>2022-09-26T10:36:02.6713225Z</Date>
			<GroupIndex>1</GroupIndex>
			<Id>48fafcd4-f928-4f95-88ca-702ce6139267</Id>
			<More>...</More>
		</WMFWUpload>
	</WMWDATA>
</WMWROOT>


=== Parse XML ===
Node WMWROOT is Type <class 'xml.dom.minidom.Document'>
NAME: Id
VALUE: 48fafcd4-f928-4f95-88ca-702ce6139267
===================================
Summary:
[<DOM Element: WMWROOT at 0x7f7f207809b0>]
[<DOM Text node "'\n\t'">, <DOM Element: WMWDATA at 0x7f7f20780910>, <DOM Text node "'\n'">]
[<DOM Text node "'\n\t\t'">, <DOM Element: WMFWUpload at 0x7f7f20780c30>, <DOM Text node "'\n\t'">]
[<DOM Text node "'\n\t\t\t'">, <DOM Element: Date at 0x7f7f20780eb0>, <DOM Text node "'\n\t\t\t'">, <DOM Element: GroupIndex at 0x7f7f207802d0>, <DOM Text node "'\n\t\t\t'">, <DOM Element: Id at 0x7f7f20780190>, <DOM Text node "'\n\t\t\t'">, <DOM Element: More at 0x7f7f20780f50>, <DOM Text node "'\n\t\t'">]
Id
[<DOM Text node "'48fafcd4-f'...">]

Example of XML processing with ElementTree

It seems more practical to me when it comes to being used, and the source code is clearer:

import xml.etree.ElementTree as ET
from xml.dom import minidom

print("=== Create XML === ")

# WMWROOT Element
wmwroot = ET.Element("WMWROOT")
wmwroot.set('xmlns', 'http://www.manh.com/ILSNET/Interface')

# WMWDDATA
wmwdata = ET.SubElement(wmwroot, "WMWDATA")

# WMFWUpload
wmfwupload = ET.SubElement(wmwdata, "WMFWUpload")
wmfwupload.set('xmlns:xsd', 'http://www.w3.org/2001/XMLSchema')
wmfwupload.set('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')

# Date, GroupIndex, Id, ...
node = ET.SubElement(wmfwupload, 'Date').text = "2022-09-26T10:36:02.6713225Z"
node = ET.SubElement(wmfwupload, 'GroupIndex').text = "1"
node = ET.SubElement(wmfwupload, 'Id').text = "48fafcd4-f928-4f95-88ca-702ce6139267"
node = ET.SubElement(wmfwupload, 'More').text = "..."

# TO String
xml_str = ET.tostring(wmwroot)
print(minidom.parseString(xml_str).toprettyxml(indent="\t"))
print("")

print("=== Parse XML ===")
xml_obj = ET.fromstring(xml_str)
print("Node {} is Type {}".format(xml_obj[0].tag, type(xml_obj)))
print("===================================")
print("Summary:")
for wmwdata in xml_obj:
  for wmwfupload in wmwdata:
    for element in wmwfupload:
      print("{} -> {}".format(element.tag, element.text))

Result:

=== Create XML === 
<?xml version="1.0" ?>
<WMWROOT xmlns="http://www.manh.com/ILSNET/Interface">
	<WMWDATA>
		<WMFWUpload xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
			<Date>2022-09-26T10:36:02.6713225Z</Date>
			<GroupIndex>1</GroupIndex>
			<Id>48fafcd4-f928-4f95-88ca-702ce6139267</Id>
			<More>...</More>
		</WMFWUpload>
	</WMWDATA>
</WMWROOT>


=== Parse XML ===
Node {http://www.manh.com/ILSNET/Interface}WMWDATA is Type <class 'xml.etree.ElementTree.Element'>
===================================
Summary:
{http://www.manh.com/ILSNET/Interface}Date -> 2022-09-26T10:36:02.6713225Z
{http://www.manh.com/ILSNET/Interface}GroupIndex -> 1
{http://www.manh.com/ILSNET/Interface}Id -> 48fafcd4-f928-4f95-88ca-702ce6139267
{http://www.manh.com/ILSNET/Interface}More -> ...

Example of XML processing with ETree from LXML

I didn’t manage to work the namespaces in internal nodes.

from lxml import etree as ET
from xml.dom import minidom

print("=== Create XML === ")

# WMWROOT Element
wmwroot = ET.Element("WMWROOT")
wmwroot.set('xmlns', 'http://www.manh.com/ILSNET/Interface')

# WMWDDATA
wmwdata = ET.SubElement(wmwroot, "WMWDATA")

# WMFWUpload
wmfwupload = ET.SubElement(wmwdata, "WMFWUpload")

# NOT WORKING! (Neither with QName)
# wmfwupload.set('xmlns:xsd', 'http://www.w3.org/2001/XMLSchema')
# wmfwupload.set('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')

# Date, GroupIndex, Id, ...
node = ET.SubElement(wmfwupload, 'Date').text = "2022-09-26T10:36:02.6713225Z"
node = ET.SubElement(wmfwupload, 'GroupIndex').text = "1"
node = ET.SubElement(wmfwupload, 'Id').text = "48fafcd4-f928-4f95-88ca-702ce6139267"
node = ET.SubElement(wmfwupload, 'More').text = "..."

# TO String
xml_str = ET.tostring(wmwroot)
print(minidom.parseString(xml_str).toprettyxml(indent="\t"))
print("")

print("=== Parse XML ===")
xml_obj = minidom.parseString(xml_str)
print("Node {} is Type {}".format(xml_obj.childNodes[0].tagName, type(xml_obj)))
print("NAME:", xml_obj.childNodes[0].childNodes[0].childNodes[0].childNodes[2].tagName)
print("VALUE:", xml_obj.childNodes[0].childNodes[0].childNodes[0].childNodes[2].childNodes[0].data)
print("===================================")
print("Summary:")
print(xml_obj.childNodes)
print(xml_obj.childNodes[0].childNodes)
print(xml_obj.childNodes[0].childNodes[0].childNodes)
print(xml_obj.childNodes[0].childNodes[0].childNodes[0].childNodes)
print(xml_obj.childNodes[0].childNodes[0].childNodes[0].childNodes[2].tagName)
print(xml_obj.childNodes[0].childNodes[0].childNodes[0].childNodes[2].childNodes)

Result:

=== Create XML === 
<?xml version="1.0" ?>
<WMWROOT xmlns="http://www.manh.com/ILSNET/Interface">
	<WMWDATA>
		<WMFWUpload>
			<Date>2022-09-26T10:36:02.6713225Z</Date>
			<GroupIndex>1</GroupIndex>
			<Id>48fafcd4-f928-4f95-88ca-702ce6139267</Id>
			<More>...</More>
		</WMFWUpload>
	</WMWDATA>
</WMWROOT>


=== Parse XML ===
Node WMWROOT is Type <class 'xml.dom.minidom.Document'>
NAME: Id
VALUE: 48fafcd4-f928-4f95-88ca-702ce6139267
===================================
Summary:
[<DOM Element: WMWROOT at 0x7f7f206fccd0>]
[<DOM Text node "'\n\t'">, <DOM Element: WMWDATA at 0x7f7f206fc730>, <DOM Text node "'\n'">]
[<DOM Text node "'\n\t\t'">, <DOM Element: WMFWUpload at 0x7f7f206fcd70>, <DOM Text node "'\n\t'">]
[<DOM Text node "'\n\t\t\t'">, <DOM Element: Date at 0x7f7f206fcc30>, <DOM Text node "'\n\t\t\t'">, <DOM Element: GroupIndex at 0x7f7f206fceb0>, <DOM Text node "'\n\t\t\t'">, <DOM Element: Id at 0x7f7f206fca50>, <DOM Text node "'\n\t\t\t'">, <DOM Element: More at 0x7f7f206fcb90>, <DOM Text node "'\n\t\t'">]
Id
[<DOM Text node "'48fafcd4-f'...">]
Show Comments (0)

Comments