|
Package json_to_relation ::
Module generic_json_parser
|
|
1 '''
2 Created on Sep 23, 2013
3
4 @author: paepcke
5 '''
6 import StringIO
7 import ijson
8 import re
9
10 from col_data_type import ColDataType
11
12
14 '''
15 Takes a JSON string, and returns a CSV row for later import into a relational database.
16 '''
17
18
19
20
21
22 REMOVE_ITEM_FROM_STRING_PATTERN = re.compile(r'(item)\.([^.]*$)')
23
24 - def __init__(self, jsonToRelationConverter):
25 '''
26
27 @param jsonToRelationConverter: JSONToRelation instance
28 @type jsonToRelationConverter: JSONToRelation
29 '''
30 '''
31 Constructor
32 '''
33 self.jsonToRelationConverter = jsonToRelationConverter
34
36 '''
37 Given a JSON string that is one entire JSON object, parse the
38 string into nested dicts. Derive relational column names from the
39 (possibly nested) labels. Cooperate with the parent JSONToRelations
40 instance to build a schema of typed SQL columns. Fill the passed-in
41 row with values from the JSON string. The following mappings from
42 Python values are used::
43 ('null', None)
44 ('boolean', <true orfFalse>)
45 ('number', <int or Decimal>)
46 ('string', <unicode>)
47 ('map_key', <str>)
48 ('start_map', None)
49 ('end_map', None)
50 ('start_array', None)
51 ('end_array', None)
52
53 @param jsonStr: string of a single, self contained JSON object
54 @type jsonStr: String
55 @param row: partially filled array of values.
56 @type row: List<<any>>
57 '''
58 parser = ijson.parse(StringIO.StringIO(jsonStr))
59
60
61 arrayIndexStack = Stack()
62
63
64 for nestedLabel, event, value in parser:
65
66 if event == "start_map":
67 if not arrayIndexStack.empty():
68
69
70 self.incArrayIndex(arrayIndexStack)
71 continue
72
73 if (len(nestedLabel) == 0) or\
74 (event == "map_key") or\
75 (event == "end_map"):
76 continue
77
78 if not arrayIndexStack.empty():
79
80
81
82
83
84 nestedLabel = self.removeItemPartOfString(nestedLabel) +\
85 '_' +\
86 str(arrayIndexStack.top(exceptionOnEmpty=True))
87
88
89
90 nestedLabel = self.jsonToRelationConverter.ensureLegalIdentifierChars(nestedLabel)
91
92
93 try:
94 colDataType = self.jsonToRelationConverter.schemaHints[nestedLabel]
95 except KeyError:
96 colDataType = None
97
98 if event == "string":
99 if colDataType is None:
100 colDataType = ColDataType.TEXT
101 self.jsonToRelationConverter.ensureColExistence(nestedLabel, colDataType)
102 self.setValInRow(row, nestedLabel, value)
103 continue
104
105 if event == "boolean":
106 if colDataType is None:
107 colDataType = ColDataType.SMALLINT
108 self.jsonToRelationConverter.ensureColExistence(nestedLabel, colDataType)
109 if value:
110 value = 1
111 else:
112 value = 0
113 self.setValInRow(row, nestedLabel,value)
114 continue
115
116 if event == "number":
117 if colDataType is None:
118 colDataType = ColDataType.DOUBLE
119 self.jsonToRelationConverter.ensureColExistence(nestedLabel, colDataType)
120 self.setValInRow(row, nestedLabel,value)
121 continue
122
123 if event == "null":
124 if colDataType is None:
125 colDataType = ColDataType.TEXT
126 self.jsonToRelationConverter.ensureColExistence(nestedLabel, colDataType)
127 self.setValInRow(row, nestedLabel, '')
128 continue
129
130 if event == "start_array":
131
132
133
134 arrayIndexStack.push(-1)
135 continue
136
137 if event == "end_array":
138
139 arrayIndexStack.pop()
140 continue
141
142 raise ValueError("Unknown JSON value type at %s for value %s (ijson event: %s)" % (nestedLabel,value,str(event)))
143 return row
144
146 '''
147 Given a column name, a value and a partially filled row,
148 add the column to the row, or set the value in an already
149 existing row. Uses the JSONToRelation instance passed to
150 __init__() to obtain current schema.
151 @param theRow: list of values in their proper column positions
152 @type theRow: List<<any>>
153 @param colName: name of column into which value is to be inserted.
154 @type colName: String
155 @param value: the field value
156 @type value: <any>, as per ColDataType
157 '''
158
159
160
161 colSpec = self.jsonToRelationConverter.cols[colName]
162 targetPos = colSpec.colPos
163
164 if (len(theRow) == 0 or len(theRow) == targetPos):
165 theRow.append(value)
166 return theRow
167
168 if (len(theRow) > targetPos):
169 theRow[targetPos] = value
170 return theRow
171
172
173
174
175
176
177
178 fillList = ['null']*(targetPos - len(theRow))
179 fillList.append(value)
180 theRow.extend(fillList)
181 return theRow
182
183
185 currArrayIndex = arrayIndexStack.pop()
186 currArrayIndex += 1
187 arrayIndexStack.push(currArrayIndex)
188
190 currArrayIndex = arrayIndexStack.pop()
191 currArrayIndex -= 1
192 arrayIndexStack.push(currArrayIndex)
193
195 '''
196 Given a label, like employee.item.name, remove the last
197 occurrence of 'item'
198 @param label: string from which last 'item' occurrence is to be removed
199 @type label: String
200 '''
201
202
203
204
205
206 match = re.search(GenericJSONParser.REMOVE_ITEM_FROM_STRING_PATTERN, label)
207 if match is None:
208
209 return label
210
211
212 res = label[:match.start(1)] + match.group(2)
213 return res
214
216
219
221 return len(self.stackArray) == 0
222
223 - def push(self, item):
224 self.stackArray.append(item)
225
227 try:
228 return self.stackArray.pop()
229 except IndexError:
230 raise ValueError("Stack empty.")
231
232 - def top(self, exceptionOnEmpty=False):
233 if len(self.stackArray) == 0:
234 if exceptionOnEmpty:
235 raise ValueError("Call to Stack instance method 'top' when stack is empty.")
236 else:
237 return None
238 return self.stackArray[len(self.stackArray) -1]
239
241 return len(self.stackArray)
242