Projet

Général

Profil

Development #4842 » import_school_from_xls_files.py

Jérôme Schneider, 18 juillet 2014 18:36

 
1
# -*- coding: utf-8 -*-
2
#!/usr/bin/env python
3

    
4
import re
5
import os
6

    
7
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "calebasse.settings")
8

    
9
import xlrd
10
from pprint import pprint
11

    
12
from calebasse.ressources.models import School, SchoolType
13

    
14
xls_folder = "/home/jschneider/apps/calebasse/xls"
15

    
16
creche = SchoolType.objects.get(id=1)
17
maternelle = SchoolType.objects.get(id=2)
18
primaire = SchoolType.objects.get(id=3)
19
college = SchoolType.objects.get(id=4)
20
lycee = SchoolType.objects.get(id=5)
21

    
22
def clean_string(string):
23
    if isinstance(string, float):
24
        string = unicode("%d" % string)
25
    elif not isinstance(string, unicode):
26
        string = unicode(string)
27
    string = re.sub(r'\s+$', u'', string)
28
    string = re.sub(r'^\s+', u'', string)
29
    string = re.sub(r'\s+', u' ', string)
30
    return string
31

    
32
def format_phone(number):
33
    num = unicode("%d" % number)
34
    num = unicode("0%s %s %s %s %s" % \
35
            (num[0], num[1:3], num[3:5], num[5:7], num[7:9]))
36
    return num
37

    
38
def parse_xls(book, mapper, private=False, first_row=1):
39
    values = []
40
    sheet = book.sheet_by_index(0)
41
    for i in range(first_row, sheet.nrows - 1):
42
        value = {'school_type': None}
43
        row = sheet.row(i)
44
        # Name
45
        value['name'] = clean_string(row[mapper['name']].value)
46
        # School Type
47
        school_type = clean_string(row[mapper['school_type']].value)
48
        school_type = school_type.replace('.', '')
49
        if school_type.lower() == u"primaire":
50
            value['school_type'] = primaire
51
        elif school_type.lower() == u"maternelle":
52
            value['school_type'] = maternelle
53
        elif school_type.lower() == u"elémentaire":
54
            value['school_type'] = "elementaire"
55
        elif school_type.lower() in [u"lp", u"lycée", u"sep", u"lycée professionnel"]:
56
            value['school_type'] = lycee
57
        elif school_type.lower() == u"collège":
58
            value['school_type'] = college
59
        else:
60
            print "!!!!! WARNING: school type %r unmapple" % school_type
61
        # Address
62
        value['address'] = clean_string(row[mapper['address']].value)
63
        value['address_complement'] = u""
64
        for j in mapper['address_complement']:
65
            value['address_complement'] += row[j].value + " "
66
        value['address_complement'] = clean_string(value['address_complement'])
67
        value['zip_code'] = clean_string(row[mapper['zip_code']].value)
68
        value['city'] = clean_string(row[mapper['city']].value)
69
        if isinstance(row[mapper['phone']].value, unicode) or isinstance(row[mapper['phone']].value, str):
70
            value['phone'] = clean_string(row[mapper['phone']].value)
71
        else:
72
            type(row[mapper['phone']].value)
73
            value['phone'] = format_phone(row[mapper['phone']].value)
74
        value['director_name'] = u""
75
        for j in mapper['director_name']:
76
            value['director_name'] += row[j].value + " "
77
        value['director_name'] = clean_string(value['director_name'])
78
        value['private'] = private
79

    
80
        print "--------------"
81
        print value
82
        print "--------------"
83
        
84
        if value['name']:
85
            values.append(values)
86

    
87
    return values
88

    
89
# Premier degre publique
90
ecoles_pub = os.path.join(xls_folder, '1d_pub_ecoles.xls')
91
print "Parsing %r ..." % ecoles_pub
92
book =  xlrd.open_workbook(ecoles_pub)
93
mapper = {
94
        'school_type': 0,
95
        'city': 1,
96
        'name': 2,
97
        'address': 4,
98
        'zip_code': 5,
99
        'phone': 6,
100
        'director_name': [7],
101
        'address_complement': [],
102
        }
103

    
104
parse_xls(book, mapper)
105

    
106
# Premier degre privee
107
ecoles_priv = os.path.join(xls_folder, '1d_priv_ecoles.xls')
108
print "Parsing %r ..." % ecoles_priv
109
book = xlrd.open_workbook(ecoles_priv)
110
book =  xlrd.open_workbook(ecoles_priv)
111
parse_xls(book, mapper, True, 4)
112

    
113
# Second degre publique
114
xls = os.path.join(xls_folder, '2d_pub.xls')
115
print "Parsing %r ..." % xls
116
book = xlrd.open_workbook(xls)
117
mapper = {
118
        'city': 2,
119
        'school_type': 3,
120
        'name': 4,
121
        'address': 5,
122
        'address_complement': [6, 7],
123
        'zip_code': 8,
124
        'director_name': [10, 9],
125
        'phone': 15,
126
        }
127
parse_xls(book, mapper)
128

    
129
# Second degre privee
130
xls = os.path.join(xls_folder, '2d_priv.xls')
131
print "Parsing %r ..." % xls
132
book = xlrd.open_workbook(xls)
133
parse_xls(book, mapper, True)