Examples of how to work with JSON, YAML, CSV, and XML files in Python.
Today I saw myself preparing some exercises for my student who is learning Python programming language, and last week he asked me about converting a CSV to XML. So I decided to publish these exercises for anybody who doubts how to work with those formats.
Example of JSON processing
import json
example = {
"name": "Kiroga",
"workers": [
{
"name": "Alex",
"sold": 3,
},
{
"name": "Susan",
"sold": 8,
},
{
"name": "Delia",
"sold": 4,
},
]
}
print("=== From Python to JSON String ===")
json_string = json.dumps(example)
print("{} -> {}".format(type(json_string), json_string))
print()
print("=== From JSON String to Python ===")
obj = json.loads(json_string)
print("{} -> {}".format(type(obj), obj))
Result:
=== From Python to JSON String ===
<class 'str'> -> {"name": "Kiroga", "workers": [{"name": "Alex", "sold": 3}, {"name": "Susan", "sold": 8}, {"name": "Delia", "sold": 4}]}
=== From JSON String to Python ===
<class 'dict'> -> {'name': 'Kiroga', 'workers': [{'name': 'Alex', 'sold': 3}, {'name': 'Susan', 'sold': 8}, {'name': 'Delia', 'sold': 4}]}
Example of YAML processing
import yaml
example = {
'incoming data': [
1024,
88,
u'á€eVentura',
192834,
1337,
'no data',
],
'configuration': '/etc/example/config.conf',
'aprox pi value': 3.141592654,
'names': {
'incoming': 'málaga',
'outgoing': 'szczecin',
'days': 4
}
}
print("=== Dictionary to YAML string ===")
yaml_string = yaml.dump(example)
print(yaml_string)
print("")
print("=== YAML string to Dictionary ===")
obj = yaml.safe_load(yaml_string)
print(obj)
Result:
=== Dictionary to YAML string ===
aprox pi value: 3.141592654
configuration: /etc/example/config.conf
incoming data:
- 1024
- 88
- "\xE1\u20ACeVentura"
- 192834
- 1337
- no data
names:
days: 4
incoming: "m\xE1laga"
outgoing: szczecin
=== YAML string to Dictionary ===
{'aprox pi value': 3.141592654, 'configuration': '/etc/example/config.conf', 'incoming data': [1024, 88, 'á€eVentura', 192834, 1337, 'no data'], 'names': {'days': 4, 'incoming': 'málaga', 'outgoing': 'szczecin'}}
Example of CSV processing
import csv
from io import StringIO
example = '''"Origin","Target","Code","Country"
"1.0.0.0","1.0.0.255","AU","Australia"
"1.0.1.0","1.0.3.255","CN","China"
"1.0.4.0","1.0.7.255","AU","Australia"
"1.0.128.0","1.0.255.255","TH","Thailand"'''
print("=== Read as ROW ===")
rows = csv.reader(example.split("\n"), delimiter=',', quotechar='"')
headers = next(rows)
for row in rows:
print("Origin:{:>12s} Target:{:>12s} Code:{:2s} Country:{}".format(*row))
print("")
print("=== Read as Dict ===")
rows = csv.DictReader(example.split("\n"), delimiter=',', quotechar='"')
for row in rows:
print("Origin:{:>12s} Target:{:>12s} Code:{:2s} Country:{}".format(row['Origin'], row['Target'], row['Code'], row['Country']))
print("")
print("=== Write CSV ===")
buffer = StringIO()
header = ['Origin', 'Target', 'Code', 'Country']
writer = csv.DictWriter(buffer, fieldnames=header)
writer.writeheader()
writer.writerow({'Origin': '1.0.0.0', 'Target': '1.0.0.255', 'Code': 'AU', 'Country': 'Australia'})
writer.writerow({'Origin': '1.0.1.0', 'Target': '1.0.3.255', 'Code': 'CN', 'Country': 'China'})
writer.writerow({'Origin': '1.0.4.0', 'Target': '1.0.7.255', 'Code': 'AU', 'Country': 'Australia'})
writer.writerow({'Origin': '1.0.128.0', 'Target': '1.0.255.255', 'Code': 'TH', 'Country': 'Thailand'})
print(buffer.getvalue())
Result:
=== Read as ROW ===
Origin: 1.0.0.0 Target: 1.0.0.255 Code:AU Country:Australia
Origin: 1.0.1.0 Target: 1.0.3.255 Code:CN Country:China
Origin: 1.0.4.0 Target: 1.0.7.255 Code:AU Country:Australia
Origin: 1.0.128.0 Target: 1.0.255.255 Code:TH Country:Thailand
=== Read as Dict ===
Origin: 1.0.0.0 Target: 1.0.0.255 Code:AU Country:Australia
Origin: 1.0.1.0 Target: 1.0.3.255 Code:CN Country:China
Origin: 1.0.4.0 Target: 1.0.7.255 Code:AU Country:Australia
Origin: 1.0.128.0 Target: 1.0.255.255 Code:TH Country:Thailand
=== Write CSV ===
Origin,Target,Code,Country
1.0.0.0,1.0.0.255,AU,Australia
1.0.1.0,1.0.3.255,CN,China
1.0.4.0,1.0.7.255,AU,Australia
1.0.128.0,1.0.255.255,TH,Thailand
Example of XML processing with Minidom
For me, the resultant source code seems too complex for use, a spaghetti soup code.
from xml.dom import minidom
print("=== Create XML === ")
# Create XML document
root = minidom.Document()
# WMWROOT Element
wmwroot = root.createElement('WMWROOT')
wmwroot.setAttribute('xmlns', 'http://www.manh.com/ILSNET/Interface')
root.appendChild(wmwroot)
# WMWDDATA
wmwdata = root.createElement('WMWDATA')
wmwroot.appendChild(wmwdata)
# WMFWUpload
wmfwupload = root.createElement('WMFWUpload')
wmfwupload.setAttribute('xmlns:xsd', 'http://www.w3.org/2001/XMLSchema')
wmfwupload.setAttribute('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')
wmwdata.appendChild(wmfwupload)
# Date
node = root.createElement('Date')
node.appendChild(root.createTextNode("2022-09-26T10:36:02.6713225Z"))
wmfwupload.appendChild(node)
# GroupIndex
node = root.createElement('GroupIndex')
node.appendChild(root.createTextNode("1"))
wmfwupload.appendChild(node)
# Id
node = root.createElement('Id')
node.appendChild(root.createTextNode("48fafcd4-f928-4f95-88ca-702ce6139267"))
wmfwupload.appendChild(node)
# ...
node = root.createElement('More')
node.appendChild(root.createTextNode("..."))
wmfwupload.appendChild(node)
# TO String
xml_str = root.toprettyxml(indent="\t")
print(xml_str)
print("")
print("=== Parse XML ===")
xml_obj = minidom.parseString(xml_str)
print("Node {} is Type {}".format(xml_obj.childNodes[0].tagName, type(xml_obj)))
print("NAME:", xml_obj.childNodes[0].childNodes[1].childNodes[1].childNodes[5].tagName)
print("VALUE:", xml_obj.childNodes[0].childNodes[1].childNodes[1].childNodes[5].childNodes[0].data)
print("===================================")
print("Summary:")
print(xml_obj.childNodes)
print(xml_obj.childNodes[0].childNodes)
print(xml_obj.childNodes[0].childNodes[1].childNodes)
print(xml_obj.childNodes[0].childNodes[1].childNodes[1].childNodes)
print(xml_obj.childNodes[0].childNodes[1].childNodes[1].childNodes[5].tagName)
print(xml_obj.childNodes[0].childNodes[1].childNodes[1].childNodes[5].childNodes)
Result:
=== Create XML ===
<?xml version="1.0" ?>
<WMWROOT xmlns="http://www.manh.com/ILSNET/Interface">
<WMWDATA>
<WMFWUpload xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<Date>2022-09-26T10:36:02.6713225Z</Date>
<GroupIndex>1</GroupIndex>
<Id>48fafcd4-f928-4f95-88ca-702ce6139267</Id>
<More>...</More>
</WMFWUpload>
</WMWDATA>
</WMWROOT>
=== Parse XML ===
Node WMWROOT is Type <class 'xml.dom.minidom.Document'>
NAME: Id
VALUE: 48fafcd4-f928-4f95-88ca-702ce6139267
===================================
Summary:
[<DOM Element: WMWROOT at 0x7f7f207809b0>]
[<DOM Text node "'\n\t'">, <DOM Element: WMWDATA at 0x7f7f20780910>, <DOM Text node "'\n'">]
[<DOM Text node "'\n\t\t'">, <DOM Element: WMFWUpload at 0x7f7f20780c30>, <DOM Text node "'\n\t'">]
[<DOM Text node "'\n\t\t\t'">, <DOM Element: Date at 0x7f7f20780eb0>, <DOM Text node "'\n\t\t\t'">, <DOM Element: GroupIndex at 0x7f7f207802d0>, <DOM Text node "'\n\t\t\t'">, <DOM Element: Id at 0x7f7f20780190>, <DOM Text node "'\n\t\t\t'">, <DOM Element: More at 0x7f7f20780f50>, <DOM Text node "'\n\t\t'">]
Id
[<DOM Text node "'48fafcd4-f'...">]
Example of XML processing with ElementTree
It seems more practical to me when it comes to being used, and the source code is clearer:
import xml.etree.ElementTree as ET
from xml.dom import minidom
print("=== Create XML === ")
# WMWROOT Element
wmwroot = ET.Element("WMWROOT")
wmwroot.set('xmlns', 'http://www.manh.com/ILSNET/Interface')
# WMWDDATA
wmwdata = ET.SubElement(wmwroot, "WMWDATA")
# WMFWUpload
wmfwupload = ET.SubElement(wmwdata, "WMFWUpload")
wmfwupload.set('xmlns:xsd', 'http://www.w3.org/2001/XMLSchema')
wmfwupload.set('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')
# Date, GroupIndex, Id, ...
node = ET.SubElement(wmfwupload, 'Date').text = "2022-09-26T10:36:02.6713225Z"
node = ET.SubElement(wmfwupload, 'GroupIndex').text = "1"
node = ET.SubElement(wmfwupload, 'Id').text = "48fafcd4-f928-4f95-88ca-702ce6139267"
node = ET.SubElement(wmfwupload, 'More').text = "..."
# TO String
xml_str = ET.tostring(wmwroot)
print(minidom.parseString(xml_str).toprettyxml(indent="\t"))
print("")
print("=== Parse XML ===")
xml_obj = ET.fromstring(xml_str)
print("Node {} is Type {}".format(xml_obj[0].tag, type(xml_obj)))
print("===================================")
print("Summary:")
for wmwdata in xml_obj:
for wmwfupload in wmwdata:
for element in wmwfupload:
print("{} -> {}".format(element.tag, element.text))
Result:
=== Create XML ===
<?xml version="1.0" ?>
<WMWROOT xmlns="http://www.manh.com/ILSNET/Interface">
<WMWDATA>
<WMFWUpload xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<Date>2022-09-26T10:36:02.6713225Z</Date>
<GroupIndex>1</GroupIndex>
<Id>48fafcd4-f928-4f95-88ca-702ce6139267</Id>
<More>...</More>
</WMFWUpload>
</WMWDATA>
</WMWROOT>
=== Parse XML ===
Node {http://www.manh.com/ILSNET/Interface}WMWDATA is Type <class 'xml.etree.ElementTree.Element'>
===================================
Summary:
{http://www.manh.com/ILSNET/Interface}Date -> 2022-09-26T10:36:02.6713225Z
{http://www.manh.com/ILSNET/Interface}GroupIndex -> 1
{http://www.manh.com/ILSNET/Interface}Id -> 48fafcd4-f928-4f95-88ca-702ce6139267
{http://www.manh.com/ILSNET/Interface}More -> ...
Example of XML processing with ETree from LXML
I didn’t manage to work the namespaces in internal nodes.
from lxml import etree as ET
from xml.dom import minidom
print("=== Create XML === ")
# WMWROOT Element
wmwroot = ET.Element("WMWROOT")
wmwroot.set('xmlns', 'http://www.manh.com/ILSNET/Interface')
# WMWDDATA
wmwdata = ET.SubElement(wmwroot, "WMWDATA")
# WMFWUpload
wmfwupload = ET.SubElement(wmwdata, "WMFWUpload")
# NOT WORKING! (Neither with QName)
# wmfwupload.set('xmlns:xsd', 'http://www.w3.org/2001/XMLSchema')
# wmfwupload.set('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')
# Date, GroupIndex, Id, ...
node = ET.SubElement(wmfwupload, 'Date').text = "2022-09-26T10:36:02.6713225Z"
node = ET.SubElement(wmfwupload, 'GroupIndex').text = "1"
node = ET.SubElement(wmfwupload, 'Id').text = "48fafcd4-f928-4f95-88ca-702ce6139267"
node = ET.SubElement(wmfwupload, 'More').text = "..."
# TO String
xml_str = ET.tostring(wmwroot)
print(minidom.parseString(xml_str).toprettyxml(indent="\t"))
print("")
print("=== Parse XML ===")
xml_obj = minidom.parseString(xml_str)
print("Node {} is Type {}".format(xml_obj.childNodes[0].tagName, type(xml_obj)))
print("NAME:", xml_obj.childNodes[0].childNodes[0].childNodes[0].childNodes[2].tagName)
print("VALUE:", xml_obj.childNodes[0].childNodes[0].childNodes[0].childNodes[2].childNodes[0].data)
print("===================================")
print("Summary:")
print(xml_obj.childNodes)
print(xml_obj.childNodes[0].childNodes)
print(xml_obj.childNodes[0].childNodes[0].childNodes)
print(xml_obj.childNodes[0].childNodes[0].childNodes[0].childNodes)
print(xml_obj.childNodes[0].childNodes[0].childNodes[0].childNodes[2].tagName)
print(xml_obj.childNodes[0].childNodes[0].childNodes[0].childNodes[2].childNodes)
Result:
=== Create XML ===
<?xml version="1.0" ?>
<WMWROOT xmlns="http://www.manh.com/ILSNET/Interface">
<WMWDATA>
<WMFWUpload>
<Date>2022-09-26T10:36:02.6713225Z</Date>
<GroupIndex>1</GroupIndex>
<Id>48fafcd4-f928-4f95-88ca-702ce6139267</Id>
<More>...</More>
</WMFWUpload>
</WMWDATA>
</WMWROOT>
=== Parse XML ===
Node WMWROOT is Type <class 'xml.dom.minidom.Document'>
NAME: Id
VALUE: 48fafcd4-f928-4f95-88ca-702ce6139267
===================================
Summary:
[<DOM Element: WMWROOT at 0x7f7f206fccd0>]
[<DOM Text node "'\n\t'">, <DOM Element: WMWDATA at 0x7f7f206fc730>, <DOM Text node "'\n'">]
[<DOM Text node "'\n\t\t'">, <DOM Element: WMFWUpload at 0x7f7f206fcd70>, <DOM Text node "'\n\t'">]
[<DOM Text node "'\n\t\t\t'">, <DOM Element: Date at 0x7f7f206fcc30>, <DOM Text node "'\n\t\t\t'">, <DOM Element: GroupIndex at 0x7f7f206fceb0>, <DOM Text node "'\n\t\t\t'">, <DOM Element: Id at 0x7f7f206fca50>, <DOM Text node "'\n\t\t\t'">, <DOM Element: More at 0x7f7f206fcb90>, <DOM Text node "'\n\t\t'">]
Id
[<DOM Text node "'48fafcd4-f'...">]