17 |
17 |
import io
|
18 |
18 |
import os
|
19 |
19 |
import re
|
|
20 |
import sys
|
20 |
21 |
import csv
|
21 |
22 |
from collections import OrderedDict
|
22 |
23 |
|
... | ... | |
46 |
47 |
|
47 |
48 |
code_cache = OrderedDict()
|
48 |
49 |
|
|
50 |
# Backport of https://bugs.python.org/issue30157
|
|
51 |
if sys.version_info < (2, 7, 15):
|
|
52 |
def _guess_quote_and_delimiter(self, data, delimiters):
|
|
53 |
"""
|
|
54 |
Looks for text enclosed between two identical quotes
|
|
55 |
(the probable quotechar) which are preceded and followed
|
|
56 |
by the same character (the probable delimiter).
|
|
57 |
For example:
|
|
58 |
,'some text',
|
|
59 |
The quote with the most wins, same with the delimiter.
|
|
60 |
If there is no quotechar the delimiter can't be determined
|
|
61 |
this way.
|
|
62 |
"""
|
|
63 |
|
|
64 |
matches = []
|
|
65 |
for restr in ('(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?",
|
|
66 |
'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)', # ".*?",
|
|
67 |
'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)', # ,".*?"
|
|
68 |
'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'): # ".*?" (no delim, no space)
|
|
69 |
regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
|
|
70 |
matches = regexp.findall(data)
|
|
71 |
if matches:
|
|
72 |
break
|
|
73 |
|
|
74 |
if not matches:
|
|
75 |
# (quotechar, doublequote, delimiter, skipinitialspace)
|
|
76 |
return ('', False, None, 0)
|
|
77 |
quotes = {}
|
|
78 |
delims = {}
|
|
79 |
spaces = 0
|
|
80 |
for m in matches:
|
|
81 |
n = regexp.groupindex['quote'] - 1
|
|
82 |
key = m[n]
|
|
83 |
if key:
|
|
84 |
quotes[key] = quotes.get(key, 0) + 1
|
|
85 |
try:
|
|
86 |
n = regexp.groupindex['delim'] - 1
|
|
87 |
key = m[n]
|
|
88 |
except KeyError:
|
|
89 |
continue
|
|
90 |
if key and (delimiters is None or key in delimiters):
|
|
91 |
delims[key] = delims.get(key, 0) + 1
|
|
92 |
try:
|
|
93 |
n = regexp.groupindex['space'] - 1
|
|
94 |
except KeyError:
|
|
95 |
continue
|
|
96 |
if m[n]:
|
|
97 |
spaces += 1
|
|
98 |
|
|
99 |
quotechar = reduce(lambda a, b, quotes = quotes:
|
|
100 |
(quotes[a] > quotes[b]) and a or b, quotes.keys())
|
|
101 |
|
|
102 |
if delims:
|
|
103 |
delim = reduce(lambda a, b, delims = delims:
|
|
104 |
(delims[a] > delims[b]) and a or b, delims.keys())
|
|
105 |
skipinitialspace = delims[delim] == spaces
|
|
106 |
if delim == '\n': # most likely a file with a single column
|
|
107 |
delim = ''
|
|
108 |
else:
|
|
109 |
# there is *no* delimiter, it's a single column of quoted data
|
|
110 |
delim = ''
|
|
111 |
skipinitialspace = 0
|
|
112 |
|
|
113 |
# if we see an extra quote between delimiters, we've got a
|
|
114 |
# double quoted format
|
|
115 |
dq_regexp = re.compile(
|
|
116 |
r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
|
|
117 |
{'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
|
|
118 |
|
|
119 |
|
|
120 |
|
|
121 |
if dq_regexp.search(data):
|
|
122 |
doublequote = True
|
|
123 |
else:
|
|
124 |
doublequote = False
|
|
125 |
|
|
126 |
return (quotechar, doublequote, delim, skipinitialspace)
|
|
127 |
csv.Sniffer._guess_quote_and_delimiter = _guess_quote_and_delimiter
|
|
128 |
elif six.PY3 and sys.version_info < (3, 7):
|
|
129 |
def _guess_quote_and_delimiter(self, data, delimiters):
|
|
130 |
"""
|
|
131 |
Looks for text enclosed between two identical quotes
|
|
132 |
(the probable quotechar) which are preceded and followed
|
|
133 |
by the same character (the probable delimiter).
|
|
134 |
For example:
|
|
135 |
,'some text',
|
|
136 |
The quote with the most wins, same with the delimiter.
|
|
137 |
If there is no quotechar the delimiter can't be determined
|
|
138 |
this way.
|
|
139 |
"""
|
|
140 |
|
|
141 |
matches = []
|
|
142 |
for restr in (r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?",
|
|
143 |
r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)', # ".*?",
|
|
144 |
r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)', # ,".*?"
|
|
145 |
r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'): # ".*?" (no delim, no space)
|
|
146 |
regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
|
|
147 |
matches = regexp.findall(data)
|
|
148 |
if matches:
|
|
149 |
break
|
|
150 |
|
|
151 |
if not matches:
|
|
152 |
# (quotechar, doublequote, delimiter, skipinitialspace)
|
|
153 |
return ('', False, None, 0)
|
|
154 |
quotes = {}
|
|
155 |
delims = {}
|
|
156 |
spaces = 0
|
|
157 |
groupindex = regexp.groupindex
|
|
158 |
for m in matches:
|
|
159 |
n = groupindex['quote'] - 1
|
|
160 |
key = m[n]
|
|
161 |
if key:
|
|
162 |
quotes[key] = quotes.get(key, 0) + 1
|
|
163 |
try:
|
|
164 |
n = groupindex['delim'] - 1
|
|
165 |
key = m[n]
|
|
166 |
except KeyError:
|
|
167 |
continue
|
|
168 |
if key and (delimiters is None or key in delimiters):
|
|
169 |
delims[key] = delims.get(key, 0) + 1
|
|
170 |
try:
|
|
171 |
n = groupindex['space'] - 1
|
|
172 |
except KeyError:
|
|
173 |
continue
|
|
174 |
if m[n]:
|
|
175 |
spaces += 1
|
|
176 |
|
|
177 |
quotechar = max(quotes, key=quotes.get)
|
|
178 |
|
|
179 |
if delims:
|
|
180 |
delim = max(delims, key=delims.get)
|
|
181 |
skipinitialspace = delims[delim] == spaces
|
|
182 |
if delim == '\n': # most likely a file with a single column
|
|
183 |
delim = ''
|
|
184 |
else:
|
|
185 |
# there is *no* delimiter, it's a single column of quoted data
|
|
186 |
delim = ''
|
|
187 |
skipinitialspace = 0
|
|
188 |
|
|
189 |
# if we see an extra quote between delimiters, we've got a
|
|
190 |
# double quoted format
|
|
191 |
dq_regexp = re.compile(
|
|
192 |
r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
|
|
193 |
{'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
|
|
194 |
|
|
195 |
|
|
196 |
|
|
197 |
if dq_regexp.search(data):
|
|
198 |
doublequote = True
|
|
199 |
else:
|
|
200 |
doublequote = False
|
|
201 |
|
|
202 |
return (quotechar, doublequote, delim, skipinitialspace)
|
|
203 |
csv.Sniffer._guess_quote_and_delimiter = _guess_quote_and_delimiter
|
|
204 |
|
49 |
205 |
|
50 |
206 |
def get_code(expr):
|
51 |
207 |
# limit size of code cache to 1024
|
52 |
|
-
|