Project

General

Profile

Download (21.7 KB) Statistics
| Branch: | Tag: | Revision:

calebasse / scripts / import_rs.py @ b287d07e

1
# -*- coding: utf-8 -*-
2
#!/usr/bin/env python
3

    
4
import os
5
import csv
6

    
7
from datetime import datetime, date
8

    
9
import calebasse.settings
10
import django.core.management
11

    
12
django.core.management.setup_environ(calebasse.settings)
13

    
14
from django.db import transaction
15
from calebasse.agenda.models import EventWithAct
16
from calebasse.dossiers.models import PatientRecord
17
from calebasse.personnes.models import Worker
18
from calebasse.ressources.models import Service
19
from calebasse.ressources.models import ActType
20
from calebasse.actes.models import Act, ActValidationState
21

    
22
from import_dossiers import map_cs
23

    
24
# Configuration
25
db_path = "./scripts/20121221-192258"
26

    
27
dbs = ["F_ST_ETIENNE_SESSAD_TED", "F_ST_ETIENNE_CMPP", "F_ST_ETIENNE_CAMSP", "F_ST_ETIENNE_SESSAD"]
28
# dbs = ["F_ST_ETIENNE_SESSAD_TED"]
29

    
30
def _to_datetime(str_date):
31
    if not str_date:
32
        return None
33
    return datetime.strptime(str_date[:19], "%Y-%m-%d %H:%M:%S")
34

    
35
def _to_date(str_date):
36
    dt = _to_datetime(str_date)
37
    return dt and dt.date()
38

    
39
def _to_time(str_date):
40
    dt = _to_datetime(str_date)
41
    return dt and dt.time()
42

    
43
def _to_duration(str_date):
44
    dt = _to_datetime(str_date)
45
    if dt is None:
46
        return None
47
    return dt - datetime(1900, 01, 01, 0, 0)
48

    
49
def _to_int(str_int):
50
    if not str_int:
51
        return None
52
    return int(str_int)
53

    
54
def _get_dict(cols, line):
55
    """"""
56
    res = {}
57
    for i, data in enumerate(line):
58
        res[cols[i]] = data.decode('utf-8')
59
    return res
60

    
61
def batch_delete(qs, limit):
62
    count = qs.count()
63
    i = 0
64
    while i < count:
65
        ids = qs[i:i+limit].values_list('pk', flat=True)
66
        qs.filter(pk__in=ids).delete()
67
        i += limit
68

    
69

    
70
PERIOD_FAURE_NOUS = {1 : 1,
71
2 : 2,
72
3 : 3,
73
4 : 4,
74
5: 6,
75
6: 7,
76
7: 8,
77
8: 9,
78
9: None,
79
10: 10,
80
12: 11,
81
13: 12,
82
}
83

    
84
JOURS = {1: 'lundi',
85
2: 'mardi',
86
3: 'mercredi',
87
4: 'jeudi',
88
5: 'vendredi'
89
}
90

    
91
dic_worker = {}
92

    
93
def load_csv(db, name, offset=0, limit=9999999, id_column=0):
94
    records = []
95
    idx = {}
96

    
97
    csvfile = open(os.path.join(db_path, db, name + '.csv'), 'rb')
98
    csvlines = csv.reader(csvfile, delimiter=';', quotechar='|')
99
    cols = csvlines.next()
100
    i = 0
101
    for line in csvlines:
102
        if not (offset <= int(line[id_column]) < offset+limit):
103
            continue
104
        data = _get_dict(cols, line)
105
        records.append(data)
106
        idx[data['id']] = i
107
        i += 1
108
    csvfile.close()
109
    return records, idx, cols
110

    
111
def add_invalid(d, reason):
112
    d.setdefault('invalid', '')
113
    if d['invalid']:
114
        d['invalid'] += ', '
115
    d['invalid'] += reason
116

    
117
def main():
118
    """ """
119

    
120
    workers = Worker.objects.all()
121
    invalid_rs_csv = open('./scripts/invalid_rs.csv', 'wb+')
122
    invalid_rs_writer = csv.writer(invalid_rs_csv, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
123
    invalid_rr_csv = open('./scripts/invalid_rr.csv', 'wb+')
124
    invalid_rr_writer = csv.writer(invalid_rr_csv, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
125
    invalid_actes_csv = open('./scripts/invalid_actes.csv', 'wb+')
126
    invalid_actes_writer = csv.writer(invalid_actes_csv, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
127
    for db in dbs:
128
        workers_idx = {}
129
        act_types_idx = {}
130
        if "F_ST_ETIENNE_CMPP" == db:
131
            service = Service.objects.get(name="CMPP")
132
        elif "F_ST_ETIENNE_CAMSP" == db:
133
            service = Service.objects.get(name="CAMSP")
134
        elif "F_ST_ETIENNE_SESSAD_TED" == db:
135
            service = Service.objects.get(name="SESSAD TED")
136
        elif "F_ST_ETIENNE_SESSAD" == db:
137
            service = Service.objects.get(name="SESSAD DYS")
138

    
139
        EventWithAct.objects.filter(services=service).delete()
140
        print '===', service.name, '==='
141
        print datetime.now()
142

    
143
        rs_data, rs_idx, rs_cols = load_csv(db, 'rs')
144
        rr_data, rr_idx, rr_cols = load_csv(db, 'rr')
145
        rs_cols.extend(['workers', 'invalid'])
146
        rr_cols.extend(['workers', 'invalid'])
147
        invalid_rs_writer.writerow(map(lambda x: x.encode('utf-8'), rs_cols))
148
        invalid_rr_writer.writerow(map(lambda x: x.encode('utf-8'), rr_cols))
149

    
150
        print "%s - Nombre de rdv : %d" % (service.name, len(rs_data))
151
        print u"%s - Nombre de rdv récurrents : %d" % (service.name, len(rr_data))
152

    
153
        rs_details_data, _, _ = load_csv(db, 'details_rs')
154
        rr_details_data, _, _ = load_csv(db, 'details_rr')
155
        for i, worker in enumerate(workers):
156
            if service.name == 'CMPP':
157
                j = worker.old_cmpp_id
158
            elif service.name == 'CAMSP':
159
                j = worker.old_camsp_id
160
            elif service.name == 'SESSAD DYS':
161
                j = worker.old_sessad_dys_id
162
            elif service.name == 'SESSAD TED':
163
                j = worker.old_sessad_ted_id
164
            else:
165
                print "service inconnu!!!"
166
                exit(0)
167
            if j:
168
                workers_idx[j] = worker
169

    
170
        act_types = ActType.objects.for_service(service)
171
        act_type_id_not_found = set()
172
        for i, act_type in enumerate(act_types):
173
            j = act_type.old_id
174
            if j:
175
                act_types_idx[j] = act_type
176
            else:
177
                act_type_id_not_found.add(act_type)
178

    
179
        def set_act_type(row, not_found=None):
180
            act_type_id = row['type_acte']
181
            if act_type_id == '0':
182
                add_invalid(row, 'no act_id=>not importable')
183
            elif act_type_id in act_types_idx:
184
                row['act_type'] = act_types_idx[act_type_id]
185
            else:
186
                add_invalid(row, 'act_type not found %s' % act_type_id)
187
                if not_found:
188
                    not_found.add(act_type_id)
189

    
190
        def handle_details(data, idx, details, id_key):
191
            not_found = set()
192
            id_not_found = set()
193
            for detail in details:
194
                i = detail[id_key]
195
                thera_id = detail['thera_id']
196
                if i not in idx:
197
                    id_not_found.add(i)
198
                    continue
199
                row = data[idx[i]]
200
                if thera_id in workers_idx:
201
                    workers = row.setdefault('workers', set())
202
                    workers.add(workers_idx[thera_id])
203
                else:
204
                    add_invalid(row, 'unknown thera_id %s' % thera_id)
205
                    not_found.add(thera_id)
206

    
207
            print "%s - Liste des worker not found : %s" % (service.name, str(set(not_found)))
208
            print "%s - Liste des details pour des RS/RR not found : %s" % (service.name, str(set(id_not_found)))
209

    
210
        print ' - Traitement rs_detail....'
211
        handle_details(rs_data, rs_idx, rs_details_data, 'rs_id')
212
        print ' - Traitement rr_detail....'
213
        handle_details(rr_data, rr_idx, rr_details_data, 'rr_id')
214
        print "%s - Nombre de types d'actes : %d" % (service.name, len(act_types))
215
        print "%s - Liste des types d'actes sans id : %s" % (service.name, str(act_type_id_not_found))
216

    
217
        enfant_idx = {}
218
        for enfant in PatientRecord.objects.filter(service=service):
219
            enfant_idx[enfant.old_id] = enfant
220

    
221
        def set_enfant(row, not_found=None):
222
            # connect enfant
223
            enfant_id = row['enfant_id']
224
            if enfant_id == '0':
225
                add_invalid(row, 'no enfant_id=>not an appointment')
226
                row['event'] = True
227
            elif enfant_id in enfant_idx:
228
                row['enfant'] = enfant_idx[enfant_id]
229
            else:
230
                add_invalid(row, 'enfant_id not found %s' % enfant_id)
231
                if not_found:
232
                    not_found.add(enfant_id)
233

    
234
        enfant_not_found = set()
235
        rr_not_found = set()
236
        rs_without_act_type = set()
237
        unknown_act_type_id = set()
238
        invalid_rs = set()
239
        seen_exceptions = dict()
240
        for rs in rs_data:
241
            rs.setdefault('workers', set())
242
            rs['exception'] = False
243
            rs['event'] = False
244
            rs['date'] = _to_date(rs['date_rdv'])
245
            # connect enfant
246
            enfant_id = rs['enfant_id']
247
            if enfant_id == '0':
248
                add_invalid(rs, 'no enfant_id=>not an appointment')
249
                rs['event'] = True
250
            elif enfant_id in enfant_idx:
251
                rs['enfant'] = enfant_idx[enfant_id]
252
            else:
253
                add_invalid(rs, 'enfant_id not found %s' % enfant_id)
254
                enfant_not_found.add(enfant_id)
255
            # connect rr
256
            rr_id = rs['rr_ev_id']
257
            if rr_id:
258
                if rr_id.startswith('ev_'):
259
                    rs['ok'] = False
260
                elif rr_id.startswith('rr_'):
261
                    _, rr_id = rr_id.split('_')
262
                    if rr_id in rr_idx:
263
                        if (rr_id, rs['date']) not in seen_exceptions:
264
                            seen_exceptions[(rr_id, rs['date'])] = rs['id']
265
                            exceptions = rr_data[rr_idx[rr_id]].setdefault('exceptions', [])
266
                            exceptions.append(rs)
267
                            rs['exception'] = True
268
                        else:
269
                            add_invalid(rs, 'already another exception on the same day: %s' %
270
                                    seen_exceptions[(rr_id, rs['date'])])
271
                    else:
272
                        add_invalid(rs, 'rr_id not found %s' % rr_id)
273
                        rr_not_found.add(rr_id)
274
                else:
275
                    add_invalid(rs, 'rr_id invalid %s' % rr_id)
276
            rs['time'] = _to_time(rs['heure'])
277
            rs['duration'] = _to_duration(rs['duree'])
278
            rs['start_datetime'] = datetime.combine(rs['date'], rs['time'])
279
            rs['end_datetime'] = rs['start_datetime'] + rs['duration']
280
            act_type_id = rs['type_acte']
281
            if act_type_id == '0' and rs['enfant_id'] == '0':
282
                add_invalid(rs, 'no act_id=>not an appointment')
283
                rs['event'] = True
284
            elif act_type_id:
285
                if act_type_id in act_types_idx:
286
                    rs['act_type'] = act_types_idx[act_type_id]
287
                else:
288
                    add_invalid(rs, 'act_type not found %s' % act_type_id)
289
                    unknown_act_type_id.add(act_type_id)
290
            else:
291
                raise NotImplemented
292
            if rs.get('invalid') and not rs['event']:
293
                invalid_rs.add(rs['id'])
294
                invalid_rs_writer.writerow([ unicode(rs[col]).encode('utf-8') for col in rs_cols ])
295

    
296

    
297
        print "%s - Liste des enfants not found : %s" % (service.name, str(enfant_not_found))
298
        print "%s - Liste des RR not found : %s" % (service.name, str(rr_not_found))
299
        print "%s - Liste des RS sans type d'acte : %s" % (service.name, str(rs_without_act_type))
300
        print "%s - Liste des types d'actes inconnus : %s" % (service.name, str(unknown_act_type_id))
301
        print "%s - Liste des RS invalides : %s" % (service.name, len(invalid_rs))
302

    
303
        enfant_not_found = set()
304
        rr_not_found = set()
305
        rs_without_act_type = set()
306
        unknown_act_type_id = set()
307
        invalid_rs = set()
308
        for rr in rr_data:
309
            rs.setdefault('workers', set())
310
            rr.setdefault('exceptions', [])
311
            rr['start_date'] = _to_date(rr['date_debut'])
312
            rr['end_date'] = _to_date(rr['date_fin'])
313
            if rr['end_date'] and rr['start_date'] > rr['end_date']:
314
                add_invalid(rr, 'date_fin < date_debut')
315
            rr['time'] = _to_time(rr['heure'])
316
            rr['duration'] = _to_duration(rr['duree'])
317
            rr['start_datetime'] = datetime.combine(rr['start_date'], rr['time'])
318
            rr['end_datetime'] = rr['start_datetime'] + rr['duration']
319
            # connect rythme
320
            rr['rythme'] = int(rr['rythme'])
321
            if PERIOD_FAURE_NOUS.get(rr['rythme']):
322
                rr['recurrence_periodicity'] = PERIOD_FAURE_NOUS[rr['rythme']]
323
            else:
324
                add_invalid(rr, 'rythme not found %s' % rr['rythme'])
325
            # connect enfant
326
            enfant_id = rr['enfant_id']
327
            if enfant_id == '0':
328
                add_invalid(rr, 'not an appointment')
329
            elif enfant_id in enfant_idx:
330
                rr['enfant'] = enfant_idx[enfant_id]
331
            else:
332
                add_invalid(rr, 'enfant_id not found %s' % enfant_id)
333
                enfant_not_found.add(enfant_id)
334
            # connect act_type
335
            act_type_id = rr['type_acte']
336
            if act_type_id == '0' and rr['enfant_id'] == '0':
337
                add_invalid(rr, 'not an appointment')
338
            elif act_type_id:
339
                if act_type_id in act_types_idx:
340
                    rr['act_type'] = act_types_idx[act_type_id]
341
                else:
342
                    add_invalid(rr, 'act_type not found %s' % act_type_id)
343
                    unknown_act_type_id.add(act_type_id)
344
            else:
345
                raise NotImplemented
346
            if rr.get('invalid'):
347
                invalid_rr_writer.writerow([ unicode(rr[col]).encode('utf-8') for col in rr_cols ])
348

    
349
        # stats
350
        exceptions = 0
351
        events = 0
352
        single = 0
353
        recurrent = 0
354
        invalid_single = 0
355
        invalid_recurrent = 0
356
        for row in rs_data:
357
            if row['event']:
358
                events += 1
359
            elif row.get('invalid'):
360
                invalid_single += 1
361
            elif row['exception']:
362
                exceptions += 1
363
            else:
364
                single += 1
365
        for row in rr_data:
366
            if row.get('invalid'):
367
                invalid_recurrent += 1
368
            else:
369
                recurrent += 1
370
        print ' == Stat == '
371
        print ' Évènements hors RDV', events
372
        print ' Rdv individuels', single
373
        print ' Rdv recurrents', recurrent
374
        print ' Exceptions', exceptions
375
        print ' Rdv recurrents invalides', invalid_recurrent
376
        print ' Rdv individuels invalides', invalid_single
377

    
378
        # create single rdv
379
        limit = 1000000
380
        # single RS
381
        i = 0 
382
        rows = []
383
        events = []
384
        for row in rs_data[:limit]:
385
            if row['exception'] or row.get('invalid'):
386
                continue
387
            i += 1
388
            rows.append(row)
389
            event = EventWithAct.objects.create(patient=row['enfant'],
390
                    start_datetime=row['start_datetime'],
391
                    end_datetime=row['end_datetime'],
392
                    act_type=row['act_type'],
393
                    old_rs_id=row['id'],
394
                    title=row['enfant'].display_name,
395
                    description=row['texte'])
396
            row['event'] = event
397
            events.append(event)
398
            print "Rdv creation %-6d\r" % i,
399
        print
400
        def batch_bulk(model, rows, limit):
401
            i = 0
402
            while rows[i:i+limit]:
403
                model.objects.bulk_create(rows[i:i+limit])
404
                i += limit
405
        def service_and_workers(events, rows):
406
            services = []
407
            ServiceThrough = EventWithAct.services.through
408
            for event in events:
409
                services.append(ServiceThrough(
410
                    event_id=event.event_ptr_id,
411
                    service_id=service.id))
412
            batch_bulk(ServiceThrough, services, 100)
413
            ParticipantThrough = EventWithAct.participants.through
414
            participants = []
415
            for row, event in zip(rows, events):
416
                for worker in row['workers']:
417
                    participants.append(
418
                            ParticipantThrough(
419
                                event_id=event.event_ptr_id,
420
                                people_id=worker.people_ptr_id))
421
            batch_bulk(ParticipantThrough, participants, 100)
422
            print 'Created %s service links' % len(services)
423
            print 'Created %s participants links' % len(participants)
424
        service_and_workers(events, rows)
425
        # RR
426
        rows = []
427
        events = []
428
        i = 0
429
        for row in rr_data[:limit]:
430
            if row.get('invalid'):
431
                continue
432
            i += 1
433
            rows.append(row)
434
            event = EventWithAct.objects.create(
435
                    patient=row['enfant'],
436
                    start_datetime=row['start_datetime'],
437
                    end_datetime=row['end_datetime'],
438
                    act_type=row['act_type'],
439
                    old_rr_id=row['id'],
440
                    title=row['enfant'].display_name,
441
                    description=row['texte'],
442
                    recurrence_periodicity=row['recurrence_periodicity'],
443
                    recurrence_end_date=row['end_date'])
444
            row['event'] = event
445
            events.append(event)
446
            print "Rdv recurrent creation %-6d\r" % i,
447
        print
448
        service_and_workers(events, rows)
449
        # Exceptions
450
        excrows = []
451
        excevents = []
452
        i = 0
453
        for rr, event in zip(rows, events):
454
            for row in rr['exceptions']:
455
                if row.get('invalid'):
456
                    print 'exception invalide'
457
                    continue
458
                i += 1
459
                excrows.append(row)
460
                excevent = EventWithAct.objects.create(
461
                        patient=row['enfant'],
462
                        start_datetime=row['start_datetime'],
463
                        end_datetime=row['end_datetime'],
464
                        act_type=row['act_type'],
465
                        old_rs_id=row['id'],
466
                        title=row['enfant'].display_name,
467
                        description=row['texte'],
468
                        exception_to=event,
469
                        exception_date=row['date'])
470
                row['event'] = excevent
471
                excevents.append(excevent)
472
                print "Exception creation %-6d\r" % i,
473
        print
474
        service_and_workers(excevents, excrows)
475

    
476

    
477
        # Clean act for this service
478
        qs = Act.objects.filter(patient__service=service)
479
        print "Actes before delete", qs.count()
480
        qs.delete()
481
        print "Actes afterdelete", qs.count()
482
        limit = 20000
483
        for offset in range(0, 99999999, limit):
484
            actes_data, actes_idx, actes_cols = load_csv(db, 'actes', offset=offset, limit=limit)
485
            print 'Loading', len(actes_data), 'acts'
486
            if not actes_data:
487
                break
488
            actes_cols.extend(['workers','invalid'])
489
            invalid_actes_writer.writerow(map(lambda x: x.encode('utf-8'), actes_cols))
490
            actes_details_data, _, _ = load_csv(db, 'details_actes', offset=offset, limit=limit, id_column=1)
491
            handle_details(actes_data, actes_idx, actes_details_data, 'acte_id')
492
            act_to_event = dict()
493
            for row in rs_data:
494
                if row.get('event') and row['base_id']:
495
                    act_to_event[row['base_id']] = row['event']
496
            rows = []
497
            actes = []
498
            validation_state = []
499
            doctors = []
500
            i = 0
501
            j = 0
502
            k = 0
503
            DoctorThrough = Act.doctors.through
504
            for row in actes_data:
505
                row.setdefault('workers', set())
506
                row['date'] = _to_date(row['date_acte'])
507
                row['time'] = _to_time(row['heure'])
508
                row['duration'] = _to_duration(row['duree'])
509
                row['is_billed'] = row['marque'] == '1'
510
                row['validation_locked'] = row['date'] < date(2013, 1, 3)
511
                set_enfant(row)
512
                set_act_type(row)
513
                row['parent_event'] = act_to_event.get(row['id'])
514
                row['state'] = map_cs[service.name].get(row['cs'],
515
                        'VALIDE')
516
                duration = row['duration']
517
                if duration:
518
                    duration = duration.seconds // 60
519
                if row.get('invalid'):
520
                    invalid_actes_writer.writerow([ unicode(row[col]).encode('utf-8') for col in actes_cols ])
521
                    continue
522
                i += 1
523
                if row['parent_event']:
524
                    j += 1
525
                else:
526
                    t = row['time']
527
                    if t:
528
                        query = 'EXTRACT(hour from start_datetime) = %i and EXTRACT(minute from start_datetime) = %i' % (t.hour, t.minute)
529
                        qs = EventWithAct.objects.for_today(row['date']).filter(patient=row['enfant']).extra(where=[query])
530
                        if qs:
531
                            try:
532
                                row['parent_event'] = qs.get()
533
                            except:
534
                                print qs
535
                                import pdb
536
                                pdb.set_trace()
537

    
538
                            k += 1
539
                act = Act.objects.create(
540
                        date=row['date'],
541
                        time=row['time'],
542
                        _duration=duration,
543
                        is_billed=row['is_billed'],
544
                        act_type=row['act_type'],
545
                        patient=row['enfant'],
546
                        validation_locked=row['validation_locked'],
547
                        parent_event=row['parent_event'])
548
                rows.append(row)
549
                actes.append(act)
550
                validation_state.append(
551
                        ActValidationState(act=act,
552
                            state_name=row['state'],
553
                            previous_state=None))
554
                for worker in row['workers']:
555
                    doctors.append(DoctorThrough(
556
                        act_id=act.id,
557
                        worker_id=worker.id))
558
            batch_bulk(ActValidationState, validation_state, 500)
559
            batch_bulk(DoctorThrough, doctors, 500)
560

    
561
            print 'Actes:'
562
            print ' - importe ', i
563
            print ' - natual link to rdv', j
564
            print ' - complicated link to rdv', k
565

    
566

    
567
    invalid_rs_csv.close()
568
    invalid_rr_csv.close()
569

    
570
if __name__ == "__main__":
571
    with transaction.commit_manually():
572
        try:
573
            main()
574
        except:
575
            transaction.rollback()
576
            raise
577
        else:
578
            transaction.commit()
(19-19/28)