1
|
# -*- coding: utf-8 -*-
|
2
|
#!/usr/bin/env python
|
3
|
|
4
|
import re
|
5
|
import os
|
6
|
|
7
|
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "calebasse.settings")
|
8
|
|
9
|
import xlrd
|
10
|
from pprint import pprint
|
11
|
|
12
|
from calebasse.ressources.models import School, SchoolType
|
13
|
|
14
|
xls_folder = "/home/jschneider/apps/calebasse/xls"
|
15
|
|
16
|
creche = SchoolType.objects.get(id=1)
|
17
|
maternelle = SchoolType.objects.get(id=2)
|
18
|
primaire = SchoolType.objects.get(id=3)
|
19
|
college = SchoolType.objects.get(id=4)
|
20
|
lycee = SchoolType.objects.get(id=5)
|
21
|
|
22
|
def clean_string(string):
|
23
|
if isinstance(string, float):
|
24
|
string = unicode("%d" % string)
|
25
|
elif not isinstance(string, unicode):
|
26
|
string = unicode(string)
|
27
|
string = re.sub(r'\s+$', u'', string)
|
28
|
string = re.sub(r'^\s+', u'', string)
|
29
|
string = re.sub(r'\s+', u' ', string)
|
30
|
return string
|
31
|
|
32
|
def format_phone(number):
|
33
|
num = unicode("%d" % number)
|
34
|
num = unicode("0%s %s %s %s %s" % \
|
35
|
(num[0], num[1:3], num[3:5], num[5:7], num[7:9]))
|
36
|
return num
|
37
|
|
38
|
def parse_xls(book, mapper, private=False, first_row=1):
|
39
|
values = []
|
40
|
sheet = book.sheet_by_index(0)
|
41
|
for i in range(first_row, sheet.nrows - 1):
|
42
|
value = {'school_type': None}
|
43
|
row = sheet.row(i)
|
44
|
# Name
|
45
|
value['name'] = clean_string(row[mapper['name']].value)
|
46
|
# School Type
|
47
|
school_type = clean_string(row[mapper['school_type']].value)
|
48
|
school_type = school_type.replace('.', '')
|
49
|
if school_type.lower() == u"primaire":
|
50
|
value['school_type'] = primaire
|
51
|
elif school_type.lower() == u"maternelle":
|
52
|
value['school_type'] = maternelle
|
53
|
elif school_type.lower() == u"elémentaire":
|
54
|
value['school_type'] = "elementaire"
|
55
|
elif school_type.lower() in [u"lp", u"lycée", u"sep", u"lycée professionnel"]:
|
56
|
value['school_type'] = lycee
|
57
|
elif school_type.lower() == u"collège":
|
58
|
value['school_type'] = college
|
59
|
else:
|
60
|
print "!!!!! WARNING: school type %r unmapple" % school_type
|
61
|
# Address
|
62
|
value['address'] = clean_string(row[mapper['address']].value)
|
63
|
value['address_complement'] = u""
|
64
|
for j in mapper['address_complement']:
|
65
|
value['address_complement'] += row[j].value + " "
|
66
|
value['address_complement'] = clean_string(value['address_complement'])
|
67
|
value['zip_code'] = clean_string(row[mapper['zip_code']].value)
|
68
|
value['city'] = clean_string(row[mapper['city']].value)
|
69
|
if isinstance(row[mapper['phone']].value, unicode) or isinstance(row[mapper['phone']].value, str):
|
70
|
value['phone'] = clean_string(row[mapper['phone']].value)
|
71
|
else:
|
72
|
type(row[mapper['phone']].value)
|
73
|
value['phone'] = format_phone(row[mapper['phone']].value)
|
74
|
value['director_name'] = u""
|
75
|
for j in mapper['director_name']:
|
76
|
value['director_name'] += row[j].value + " "
|
77
|
value['director_name'] = clean_string(value['director_name'])
|
78
|
value['private'] = private
|
79
|
|
80
|
print "--------------"
|
81
|
print value
|
82
|
print "--------------"
|
83
|
|
84
|
if value['name']:
|
85
|
values.append(values)
|
86
|
|
87
|
return values
|
88
|
|
89
|
# Premier degre publique
|
90
|
ecoles_pub = os.path.join(xls_folder, '1d_pub_ecoles.xls')
|
91
|
print "Parsing %r ..." % ecoles_pub
|
92
|
book = xlrd.open_workbook(ecoles_pub)
|
93
|
mapper = {
|
94
|
'school_type': 0,
|
95
|
'city': 1,
|
96
|
'name': 2,
|
97
|
'address': 4,
|
98
|
'zip_code': 5,
|
99
|
'phone': 6,
|
100
|
'director_name': [7],
|
101
|
'address_complement': [],
|
102
|
}
|
103
|
|
104
|
parse_xls(book, mapper)
|
105
|
|
106
|
# Premier degre privee
|
107
|
ecoles_priv = os.path.join(xls_folder, '1d_priv_ecoles.xls')
|
108
|
print "Parsing %r ..." % ecoles_priv
|
109
|
book = xlrd.open_workbook(ecoles_priv)
|
110
|
book = xlrd.open_workbook(ecoles_priv)
|
111
|
parse_xls(book, mapper, True, 4)
|
112
|
|
113
|
# Second degre publique
|
114
|
xls = os.path.join(xls_folder, '2d_pub.xls')
|
115
|
print "Parsing %r ..." % xls
|
116
|
book = xlrd.open_workbook(xls)
|
117
|
mapper = {
|
118
|
'city': 2,
|
119
|
'school_type': 3,
|
120
|
'name': 4,
|
121
|
'address': 5,
|
122
|
'address_complement': [6, 7],
|
123
|
'zip_code': 8,
|
124
|
'director_name': [10, 9],
|
125
|
'phone': 15,
|
126
|
}
|
127
|
parse_xls(book, mapper)
|
128
|
|
129
|
# Second degre privee
|
130
|
xls = os.path.join(xls_folder, '2d_priv.xls')
|
131
|
print "Parsing %r ..." % xls
|
132
|
book = xlrd.open_workbook(xls)
|
133
|
parse_xls(book, mapper, True)
|