Project

General

Profile

Download (21.8 KB) Statistics
| Branch: | Tag: | Revision:

calebasse / scripts / import_rs.py @ 1f0de62b

1
# -*- coding: utf-8 -*-
2
#!/usr/bin/env python
3

    
4
import os
5
import csv
6

    
7
from datetime import datetime, date
8

    
9
import calebasse.settings
10
import django.core.management
11

    
12
django.core.management.setup_environ(calebasse.settings)
13

    
14
from django.db import transaction
15
from calebasse.agenda.models import EventWithAct
16
from calebasse.dossiers.models import PatientRecord
17
from calebasse.personnes.models import Worker
18
from calebasse.ressources.models import Service, Room
19
from calebasse.ressources.models import ActType
20
from calebasse.actes.models import Act, ActValidationState
21

    
22
from import_dossiers import map_cs
23

    
24
# Configuration
25
db_path = "./scripts/20121221-192258"
26

    
27
dbs = ["F_ST_ETIENNE_SESSAD_TED", "F_ST_ETIENNE_CMPP", "F_ST_ETIENNE_CAMSP", "F_ST_ETIENNE_SESSAD"]
28
# dbs = ["F_ST_ETIENNE_SESSAD_TED"]
29

    
30
def _to_datetime(str_date):
31
    if not str_date:
32
        return None
33
    return datetime.strptime(str_date[:19], "%Y-%m-%d %H:%M:%S")
34

    
35
def _to_date(str_date):
36
    dt = _to_datetime(str_date)
37
    return dt and dt.date()
38

    
39
def _to_time(str_date):
40
    dt = _to_datetime(str_date)
41
    return dt and dt.time()
42

    
43
def _to_duration(str_date):
44
    dt = _to_datetime(str_date)
45
    if dt is None:
46
        return None
47
    return dt - datetime(1900, 01, 01, 0, 0)
48

    
49
def _to_int(str_int):
50
    if not str_int:
51
        return None
52
    return int(str_int)
53

    
54
def _get_dict(cols, line):
55
    """"""
56
    res = {}
57
    for i, data in enumerate(line):
58
        res[cols[i]] = data.decode('utf-8')
59
    return res
60

    
61
def batch_delete(qs, limit):
62
    count = qs.count()
63
    i = 0
64
    while i < count:
65
        ids = qs[i:i+limit].values_list('pk', flat=True)
66
        qs.filter(pk__in=ids).delete()
67
        i += limit
68

    
69

    
70
PERIOD_FAURE_NOUS = {1 : 1,
71
2 : 2,
72
3 : 3,
73
4 : 4,
74
5: 6,
75
6: 7,
76
7: 8,
77
8: 9,
78
9: None,
79
10: 10,
80
12: 11,
81
13: 12,
82
}
83

    
84
JOURS = {1: 'lundi',
85
2: 'mardi',
86
3: 'mercredi',
87
4: 'jeudi',
88
5: 'vendredi'
89
}
90

    
91
dic_worker = {}
92

    
93
def load_csv(db, name, offset=0, limit=9999999, id_column=0):
94
    records = []
95
    idx = {}
96

    
97
    csvfile = open(os.path.join(db_path, db, name + '.csv'), 'rb')
98
    csvlines = csv.reader(csvfile, delimiter=';', quotechar='|')
99
    cols = csvlines.next()
100
    i = 0
101
    for line in csvlines:
102
        if not (offset <= int(line[id_column]) < offset+limit):
103
            continue
104
        data = _get_dict(cols, line)
105
        records.append(data)
106
        idx[data['id']] = i
107
        i += 1
108
    csvfile.close()
109
    return records, idx, cols
110

    
111
def add_invalid(d, reason):
112
    d.setdefault('invalid', '')
113
    if d['invalid']:
114
        d['invalid'] += ', '
115
    d['invalid'] += reason
116

    
117
def main():
118
    """ """
119

    
120
    workers = Worker.objects.all()
121
    invalid_rs_csv = open('./scripts/invalid_rs.csv', 'wb+')
122
    invalid_rs_writer = csv.writer(invalid_rs_csv, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
123
    invalid_rr_csv = open('./scripts/invalid_rr.csv', 'wb+')
124
    invalid_rr_writer = csv.writer(invalid_rr_csv, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
125
    invalid_actes_csv = open('./scripts/invalid_actes.csv', 'wb+')
126
    invalid_actes_writer = csv.writer(invalid_actes_csv, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
127
    for db in dbs:
128
        workers_idx = {}
129
        act_types_idx = {}
130
        if "F_ST_ETIENNE_CMPP" == db:
131
            service = Service.objects.get(name="CMPP")
132
        elif "F_ST_ETIENNE_CAMSP" == db:
133
            service = Service.objects.get(name="CAMSP")
134
        elif "F_ST_ETIENNE_SESSAD_TED" == db:
135
            service = Service.objects.get(name="SESSAD TED")
136
        elif "F_ST_ETIENNE_SESSAD" == db:
137
            service = Service.objects.get(name="SESSAD DYS")
138

    
139
        EventWithAct.objects.filter(services=service).delete()
140
        print '===', service.name, '==='
141
        print datetime.now()
142

    
143
        rs_data, rs_idx, rs_cols = load_csv(db, 'rs')
144
        rr_data, rr_idx, rr_cols = load_csv(db, 'rr')
145
        rs_cols.extend(['workers', 'invalid'])
146
        rr_cols.extend(['workers', 'invalid'])
147
        invalid_rs_writer.writerow(map(lambda x: x.encode('utf-8'), rs_cols))
148
        invalid_rr_writer.writerow(map(lambda x: x.encode('utf-8'), rr_cols))
149

    
150
        print "%s - Nombre de rdv : %d" % (service.name, len(rs_data))
151
        print u"%s - Nombre de rdv récurrents : %d" % (service.name, len(rr_data))
152

    
153
        rs_details_data, _, _ = load_csv(db, 'details_rs')
154
        rr_details_data, _, _ = load_csv(db, 'details_rr')
155
        for i, worker in enumerate(workers):
156
            if service.name == 'CMPP':
157
                j = worker.old_cmpp_id
158
            elif service.name == 'CAMSP':
159
                j = worker.old_camsp_id
160
            elif service.name == 'SESSAD DYS':
161
                j = worker.old_sessad_dys_id
162
            elif service.name == 'SESSAD TED':
163
                j = worker.old_sessad_ted_id
164
            else:
165
                print "service inconnu!!!"
166
                exit(0)
167
            if j:
168
                workers_idx[j] = worker
169

    
170
        act_types = ActType.objects.for_service(service)
171
        act_type_id_not_found = set()
172
        for i, act_type in enumerate(act_types):
173
            j = act_type.old_id
174
            if j:
175
                act_types_idx[j] = act_type
176
            else:
177
                act_type_id_not_found.add(act_type)
178

    
179
        def set_act_type(row, not_found=None):
180
            act_type_id = row['type_acte']
181
            if act_type_id == '0':
182
                add_invalid(row, 'no act_id=>not importable')
183
            elif act_type_id in act_types_idx:
184
                row['act_type'] = act_types_idx[act_type_id]
185
            else:
186
                add_invalid(row, 'act_type not found %s' % act_type_id)
187
                if not_found:
188
                    not_found.add(act_type_id)
189

    
190
        def handle_details(data, idx, details, id_key):
191
            not_found = set()
192
            id_not_found = set()
193
            for detail in details:
194
                i = detail[id_key]
195
                thera_id = detail['thera_id']
196
                if i not in idx:
197
                    id_not_found.add(i)
198
                    continue
199
                row = data[idx[i]]
200
                if thera_id in workers_idx:
201
                    workers = row.setdefault('workers', set())
202
                    workers.add(workers_idx[thera_id])
203
                else:
204
                    add_invalid(row, 'unknown thera_id %s' % thera_id)
205
                    not_found.add(thera_id)
206

    
207
            print "%s - Liste des worker not found : %s" % (service.name, str(set(not_found)))
208
            print "%s - Liste des details pour des RS/RR not found : %s" % (service.name, str(set(id_not_found)))
209

    
210
        print ' - Traitement rs_detail....'
211
        handle_details(rs_data, rs_idx, rs_details_data, 'rs_id')
212
        print ' - Traitement rr_detail....'
213
        handle_details(rr_data, rr_idx, rr_details_data, 'rr_id')
214
        print "%s - Nombre de types d'actes : %d" % (service.name, len(act_types))
215
        print "%s - Liste des types d'actes sans id : %s" % (service.name, str(act_type_id_not_found))
216

    
217
        enfant_idx = {}
218
        for enfant in PatientRecord.objects.filter(service=service):
219
            enfant_idx[enfant.old_id] = enfant
220

    
221
        def set_enfant(row, not_found=None):
222
            # connect enfant
223
            enfant_id = row['enfant_id']
224
            if enfant_id == '0':
225
                add_invalid(row, 'no enfant_id=>not an appointment')
226
                row['event'] = True
227
            elif enfant_id in enfant_idx:
228
                row['enfant'] = enfant_idx[enfant_id]
229
            else:
230
                add_invalid(row, 'enfant_id not found %s' % enfant_id)
231
                if not_found:
232
                    not_found.add(enfant_id)
233

    
234
        enfant_not_found = set()
235
        rr_not_found = set()
236
        rs_without_act_type = set()
237
        unknown_act_type_id = set()
238
        invalid_rs = set()
239
        seen_exceptions = dict()
240
        for rs in rs_data:
241
            rs.setdefault('workers', set())
242
            rs['exception'] = False
243
            rs['event'] = False
244
            rs['date'] = _to_date(rs['date_rdv'])
245
            # connect enfant
246
            enfant_id = rs['enfant_id']
247
            if enfant_id == '0':
248
                add_invalid(rs, 'no enfant_id=>not an appointment')
249
                rs['event'] = True
250
            elif enfant_id in enfant_idx:
251
                rs['enfant'] = enfant_idx[enfant_id]
252
            else:
253
                add_invalid(rs, 'enfant_id not found %s' % enfant_id)
254
                enfant_not_found.add(enfant_id)
255
            # connect rr
256
            rr_id = rs['rr_ev_id']
257
            if rr_id:
258
                if rr_id.startswith('ev_'):
259
                    rs['ok'] = False
260
                elif rr_id.startswith('rr_'):
261
                    _, rr_id = rr_id.split('_')
262
                    if rr_id in rr_idx:
263
                        if (rr_id, rs['date']) not in seen_exceptions:
264
                            seen_exceptions[(rr_id, rs['date'])] = rs['id']
265
                            exceptions = rr_data[rr_idx[rr_id]].setdefault('exceptions', [])
266
                            exceptions.append(rs)
267
                            rs['exception'] = True
268
                        else:
269
                            add_invalid(rs, 'already another exception on the same day: %s' %
270
                                    seen_exceptions[(rr_id, rs['date'])])
271
                    else:
272
                        add_invalid(rs, 'rr_id not found %s' % rr_id)
273
                        rr_not_found.add(rr_id)
274
                else:
275
                    add_invalid(rs, 'rr_id invalid %s' % rr_id)
276
            rs['time'] = _to_time(rs['heure'])
277
            rs['duration'] = _to_duration(rs['duree'])
278
            rs['start_datetime'] = datetime.combine(rs['date'], rs['time'])
279
            rs['end_datetime'] = rs['start_datetime'] + rs['duration']
280
            act_type_id = rs['type_acte']
281
            if act_type_id == '0' and rs['enfant_id'] == '0':
282
                add_invalid(rs, 'no act_id=>not an appointment')
283
                rs['event'] = True
284
            elif act_type_id:
285
                if act_type_id in act_types_idx:
286
                    rs['act_type'] = act_types_idx[act_type_id]
287
                else:
288
                    add_invalid(rs, 'act_type not found %s' % act_type_id)
289
                    unknown_act_type_id.add(act_type_id)
290
            else:
291
                raise NotImplemented
292
            if rs.get('invalid') and not rs['event']:
293
                invalid_rs.add(rs['id'])
294
                invalid_rs_writer.writerow([ unicode(rs[col]).encode('utf-8') for col in rs_cols ])
295

    
296

    
297
        print "%s - Liste des enfants not found : %s" % (service.name, str(enfant_not_found))
298
        print "%s - Liste des RR not found : %s" % (service.name, str(rr_not_found))
299
        print "%s - Liste des RS sans type d'acte : %s" % (service.name, str(rs_without_act_type))
300
        print "%s - Liste des types d'actes inconnus : %s" % (service.name, str(unknown_act_type_id))
301
        print "%s - Liste des RS invalides : %s" % (service.name, len(invalid_rs))
302

    
303
        enfant_not_found = set()
304
        rr_not_found = set()
305
        rs_without_act_type = set()
306
        unknown_act_type_id = set()
307
        invalid_rs = set()
308
        for rr in rr_data:
309
            rs.setdefault('workers', set())
310
            rr.setdefault('exceptions', [])
311
            rr['start_date'] = _to_date(rr['date_debut'])
312
            rr['end_date'] = _to_date(rr['date_fin'])
313
            if rr['end_date'] and rr['start_date'] > rr['end_date']:
314
                add_invalid(rr, 'date_fin < date_debut')
315
            rr['time'] = _to_time(rr['heure'])
316
            rr['duration'] = _to_duration(rr['duree'])
317
            rr['start_datetime'] = datetime.combine(rr['start_date'], rr['time'])
318
            rr['end_datetime'] = rr['start_datetime'] + rr['duration']
319
            # connect rythme
320
            rr['rythme'] = int(rr['rythme'])
321
            if PERIOD_FAURE_NOUS.get(rr['rythme']):
322
                rr['recurrence_periodicity'] = PERIOD_FAURE_NOUS[rr['rythme']]
323
            else:
324
                add_invalid(rr, 'rythme not found %s' % rr['rythme'])
325
            # connect enfant
326
            enfant_id = rr['enfant_id']
327
            if enfant_id == '0':
328
                add_invalid(rr, 'not an appointment')
329
            elif enfant_id in enfant_idx:
330
                rr['enfant'] = enfant_idx[enfant_id]
331
            else:
332
                add_invalid(rr, 'enfant_id not found %s' % enfant_id)
333
                enfant_not_found.add(enfant_id)
334
            # connect act_type
335
            act_type_id = rr['type_acte']
336
            if act_type_id == '0' and rr['enfant_id'] == '0':
337
                add_invalid(rr, 'not an appointment')
338
            elif act_type_id:
339
                if act_type_id in act_types_idx:
340
                    rr['act_type'] = act_types_idx[act_type_id]
341
                else:
342
                    add_invalid(rr, 'act_type not found %s' % act_type_id)
343
                    unknown_act_type_id.add(act_type_id)
344
            else:
345
                raise NotImplemented
346
            if rr.get('invalid'):
347
                invalid_rr_writer.writerow([ unicode(rr[col]).encode('utf-8') for col in rr_cols ])
348

    
349
        # stats
350
        exceptions = 0
351
        events = 0
352
        single = 0
353
        recurrent = 0
354
        invalid_single = 0
355
        invalid_recurrent = 0
356
        for row in rs_data:
357
            if row['event']:
358
                events += 1
359
            elif row.get('invalid'):
360
                invalid_single += 1
361
            elif row['exception']:
362
                exceptions += 1
363
            else:
364
                single += 1
365
        for row in rr_data:
366
            if row.get('invalid'):
367
                invalid_recurrent += 1
368
            else:
369
                recurrent += 1
370
        print ' == Stat == '
371
        print ' Évènements hors RDV', events
372
        print ' Rdv individuels', single
373
        print ' Rdv recurrents', recurrent
374
        print ' Exceptions', exceptions
375
        print ' Rdv recurrents invalides', invalid_recurrent
376
        print ' Rdv individuels invalides', invalid_single
377

    
378
        # create single rdv
379
        limit = 1000000
380
        # single RS
381
        i = 0 
382
        rows = []
383
        events = []
384
        for row in rs_data[:limit]:
385
            if row['exception'] or row.get('invalid'):
386
                continue
387
            i += 1
388
            rows.append(row)
389
            event = EventWithAct.objects.create(patient=row['enfant'],
390
                    start_datetime=row['start_datetime'],
391
                    end_datetime=row['end_datetime'],
392
                    act_type=row['act_type'],
393
                    old_rs_id=row['id'],
394
                    room=Room(id=1),
395
                    title=row['libelle'],
396
                    description=row['texte'])
397
            row['event'] = event
398
            events.append(event)
399
            print "Rdv creation %-6d\r" % i,
400
        print
401
        def batch_bulk(model, rows, limit):
402
            i = 0
403
            while rows[i:i+limit]:
404
                model.objects.bulk_create(rows[i:i+limit])
405
                i += limit
406
        def service_and_workers(events, rows):
407
            services = []
408
            ServiceThrough = EventWithAct.services.through
409
            for event in events:
410
                services.append(ServiceThrough(
411
                    event_id=event.event_ptr_id,
412
                    service_id=service.id))
413
            batch_bulk(ServiceThrough, services, 100)
414
            ParticipantThrough = EventWithAct.participants.through
415
            participants = []
416
            for row, event in zip(rows, events):
417
                for worker in row['workers']:
418
                    participants.append(
419
                            ParticipantThrough(
420
                                event_id=event.event_ptr_id,
421
                                people_id=worker.people_ptr_id))
422
            batch_bulk(ParticipantThrough, participants, 100)
423
            print 'Created %s service links' % len(services)
424
            print 'Created %s participants links' % len(participants)
425
        service_and_workers(events, rows)
426
        # RR
427
        rows = []
428
        events = []
429
        i = 0
430
        for row in rr_data[:limit]:
431
            if row.get('invalid'):
432
                continue
433
            i += 1
434
            rows.append(row)
435
            event = EventWithAct.objects.create(
436
                    patient=row['enfant'],
437
                    start_datetime=row['start_datetime'],
438
                    end_datetime=row['end_datetime'],
439
                    act_type=row['act_type'],
440
                    old_rr_id=row['id'],
441
                    room=Room(id=1),
442
                    title=row['libelle'],
443
                    description=row['texte'],
444
                    recurrence_periodicity=row['recurrence_periodicity'],
445
                    recurrence_end_date=row['end_date'])
446
            row['event'] = event
447
            events.append(event)
448
            print "Rdv recurrent creation %-6d\r" % i,
449
        print
450
        service_and_workers(events, rows)
451
        # Exceptions
452
        excrows = []
453
        excevents = []
454
        i = 0
455
        for rr, event in zip(rows, events):
456
            for row in rr['exceptions']:
457
                if row.get('invalid'):
458
                    print 'exception invalide'
459
                    continue
460
                i += 1
461
                excrows.append(row)
462
                excevent = EventWithAct.objects.create(
463
                        patient=row['enfant'],
464
                        start_datetime=row['start_datetime'],
465
                        end_datetime=row['end_datetime'],
466
                        act_type=row['act_type'],
467
                        old_rs_id=row['id'],
468
                        room=Room(id=1),
469
                        title=row['libelle'],
470
                        description=row['texte'],
471
                        exception_to=event,
472
                        exception_date=row['date'])
473
                row['event'] = excevent
474
                excevents.append(excevent)
475
                print "Exception creation %-6d\r" % i,
476
        print
477
        service_and_workers(excevents, excrows)
478

    
479

    
480
        # Clean act for this service
481
        qs = Act.objects.filter(patient__service=service)
482
        print "Actes before delete", qs.count()
483
        qs.delete()
484
        print "Actes afterdelete", qs.count()
485
        limit = 20000
486
        for offset in range(0, 99999999, limit):
487
            actes_data, actes_idx, actes_cols = load_csv(db, 'actes', offset=offset, limit=limit)
488
            print 'Loading', len(actes_data), 'acts'
489
            if not actes_data:
490
                break
491
            actes_cols.extend(['workers','invalid'])
492
            invalid_actes_writer.writerow(map(lambda x: x.encode('utf-8'), actes_cols))
493
            actes_details_data, _, _ = load_csv(db, 'details_actes', offset=offset, limit=limit, id_column=1)
494
            handle_details(actes_data, actes_idx, actes_details_data, 'acte_id')
495
            act_to_event = dict()
496
            for row in rs_data:
497
                if row.get('event') and row['base_id']:
498
                    act_to_event[row['base_id']] = row['event']
499
            rows = []
500
            actes = []
501
            validation_state = []
502
            doctors = []
503
            i = 0
504
            j = 0
505
            k = 0
506
            DoctorThrough = Act.doctors.through
507
            for row in actes_data:
508
                row.setdefault('workers', set())
509
                row['date'] = _to_date(row['date_acte'])
510
                row['time'] = _to_time(row['heure'])
511
                row['duration'] = _to_duration(row['duree'])
512
                row['is_billed'] = row['marque'] == '1'
513
                row['validation_locked'] = row['date'] < date(2013, 1, 3)
514
                set_enfant(row)
515
                set_act_type(row)
516
                row['parent_event'] = act_to_event.get(row['id'])
517
                row['state'] = map_cs[service.name].get(row['cs'],
518
                        'VALIDE')
519
                duration = row['duration']
520
                if duration:
521
                    duration = duration.seconds // 60
522
                if row.get('invalid'):
523
                    invalid_actes_writer.writerow([ unicode(row[col]).encode('utf-8') for col in actes_cols ])
524
                    continue
525
                i += 1
526
                if row['parent_event']:
527
                    j += 1
528
                else:
529
                    t = row['time']
530
                    if t:
531
                        query = 'EXTRACT(hour from start_datetime) = %i and EXTRACT(minute from start_datetime) = %i' % (t.hour, t.minute)
532
                        qs = EventWithAct.objects.for_today(row['date']).filter(patient=row['enfant']).extra(where=[query])
533
                        if qs:
534
                            try:
535
                                row['parent_event'] = qs.get()
536
                            except:
537
                                print qs
538
                                import pdb
539
                                pdb.set_trace()
540

    
541
                            k += 1
542
                act = Act.objects.create(
543
                        date=row['date'],
544
                        time=row['time'],
545
                        _duration=duration,
546
                        is_billed=row['is_billed'],
547
                        act_type=row['act_type'],
548
                        patient=row['enfant'],
549
                        validation_locked=row['validation_locked'],
550
                        parent_event=row['parent_event'])
551
                rows.append(row)
552
                actes.append(act)
553
                validation_state.append(
554
                        ActValidationState(act=act,
555
                            state_name=row['state'],
556
                            previous_state=None))
557
                for worker in row['workers']:
558
                    doctors.append(DoctorThrough(
559
                        act_id=act.id,
560
                        worker_id=worker.id))
561
            batch_bulk(ActValidationState, validation_state, 500)
562
            batch_bulk(DoctorThrough, doctors, 500)
563

    
564
            print 'Actes:'
565
            print ' - importe ', i
566
            print ' - natual link to rdv', j
567
            print ' - complicated link to rdv', k
568

    
569

    
570
    invalid_rs_csv.close()
571
    invalid_rr_csv.close()
572

    
573
if __name__ == "__main__":
574
    with transaction.commit_manually():
575
        try:
576
            main()
577
        except:
578
            transaction.rollback()
579
            raise
580
        else:
581
            transaction.commit()
(19-19/28)