|
Package json_to_relation ::
Module input_source
|
|
1 '''
2 Created on Sep 14, 2013
3
4 @author: paepcke
5 '''
6 import StringIO
7 import bz2
8 import gzip
9 import os
10 import sys
11 from urllib import FancyURLopener
12 import urllib2
13 from urlparse import urlparse
14
15
20
52
53
54
57 if len(urlparse(inFilePathOrURL)[0]) == 0:
58 inFilePathOrURL = 'file://' + inFilePathOrURL
59 self.inFilePathOrURL = inFilePathOrURL
60 self.compression = self.determineCompression(self.inFilePathOrURL)
61
62
63
64
65
66 self.ensureFileLocal(inFilePathOrURL)
67
68 if self.compression == COMPRESSION_TYPE.NO_COMPRESSION:
69 self.fileHandle = urllib2.urlopen(self.localFilePath)
70 elif self.compression == COMPRESSION_TYPE.GZIP:
71 self.fileHandle = gzip.open(self.localFilePath, 'rb')
72 elif self.compression == COMPRESSION_TYPE.BZIP2:
73 self.fileHandle = bz2.BZ2File(self.localFilePath, 'rb')
74
82
84
85 try:
86 (scheme,netloc,path,query,fragment) = self.fileHandle.urlsplit()
87 except AttributeError:
88 self.fileHandle.close()
89 if self.deleteTempFile:
90 try:
91 os.remove(self.localFilePath)
92 except:
93 pass
94
96 '''
97 Given a file path, determine by file extension whether
98 the file is gzip or bzip2 compressed, or whether it is
99 not compressed.
100 @param fileURI: item that str() turns into a file path or URL
101 @type fileURI: STRING
102 '''
103 if str(fileURI).endswith('bz2'):
104 return COMPRESSION_TYPE.BZIP2
105 elif str(fileURI).endswith('gz'):
106 return COMPRESSION_TYPE.GZIP
107 else:
108 return COMPRESSION_TYPE.NO_COMPRESSION
109
111 '''
112 Takes a file path or URL. Sets self.localFilePath
113 to the same path if file is local, or
114 if the file is remote but uncompressed.
115 If a file is remote and compressed, retrieves
116 the file into a local tmp file and returns that
117 file name. In this case the flag self.deleteTempFile
118 is set to True.
119 @param inFilePathOrURL: file path or URL to file
120 @type inFilePathOrURL: String
121 '''
122 self.localFilePath = inFilePathOrURL
123 self.deleteTempFile = False
124 if self.compression == COMPRESSION_TYPE.NO_COMPRESSION:
125 return
126
127 parseResult = urlparse(inFilePathOrURL)
128 if parseResult.scheme == 'file':
129 self.localFilePath = parseResult.path
130 return
131 opener = FancyURLopener()
132
133 self.localFilePath = opener.retrieve(inFilePathOrURL)[0]
134 self.deleteTempFile = True
135
138 self.fileHandle = StringIO.StringIO(inputStr)
139
141 '''
142 No decompression for strings
143 @param line:
144 @type line:
145 '''
146 return line
147
150
152 - def __init__(self, server, pwd, dbName, collName):
153 self.server = server
154 self.pwd = pwd
155 self.dbName = dbName
156 self.collName = collName
157 self.fileHandle = self.connect()
158
160 raise NotImplementedError("MangoDB connector not yet implemented")
161
163 raise NotImplementedError("MangoDB connector not yet implemented")
164
166 raise NotImplementedError("MangoDB connector not yet implemented")
167
170 self.fileHandle = sys.stdin
171
173 '''
174 No decompression for pipes. Pipe through gunzip or similar first.
175 @param line:
176 @type line:
177 '''
178 return line
179
182