from trytond.model import ModelSQL, ModelView, fields, Workflow from trytond.pyson import Eval from trytond.wizard import Button import requests import io import logging import json logger = logging.getLogger(__name__) class AutomationDocument(ModelSQL, ModelView, Workflow): """Automation Document""" __name__ = 'automation.document' document = fields.Many2One('document.incoming', 'Document') type = fields.Selection([ ('invoice', 'Invoice'), ('statement_of_facts', 'Statement of Facts'), ('weight_report', 'Weight Report'), ('bol', 'Bill of Lading'), ('controller_invoice', 'Controller Invoice'), ], 'Type') state = fields.Selection([ ('draft', 'Draft'), ('ocr_done', 'OCR Done'), ('structure_done', 'Structure Done'), ('table_done', 'Table Done'), ('metadata_done', 'Metadata Done'), ('validated', 'Validated'), ('error', 'Error'), ], 'State', required=True) ocr_text = fields.Text('OCR Text') structure_json = fields.Text('Structure JSON') tables_json = fields.Text('Tables JSON') metadata_json = fields.Text('Metadata JSON') notes = fields.Text('Notes') rule_set = fields.Many2One('automation.rule.set', 'Rule Set') @classmethod def __setup__(cls): super().__setup__() cls._buttons.update({ 'run_pipeline': {'invisible': Eval('state') == 'test', 'depends': ['state']}, 'run_ocr': {'invisible': Eval('state') == 'test', 'depends': ['state']}, 'run_structure': {'invisible': Eval('state') == 'test', 'depends': ['state']}, 'run_tables': {'invisible': Eval('state') == 'test', 'depends': ['state']}, 'run_metadata': {'invisible': Eval('state') == 'test', 'depends': ['state']}, }) # ------------------------------------------------------- # OCR # ------------------------------------------------------- @classmethod @ModelView.button def run_ocr(cls, docs): for doc in docs: try: # Décoder le fichier depuis le champ Binary file_data = doc.document.data or b"" logger.info(f"File size: {len(file_data)} bytes") logger.info(f"First 20 bytes: {file_data[:20]}") logger.info(f"Last 20 bytes: {file_data[-20:]}") file_name = doc.document.name or "document" # Envoyer le fichier au service OCR response = requests.post( "http://automation-service:8006/ocr", files={"file": (file_name, io.BytesIO(file_data))} ) response.raise_for_status() data = response.json() logger.info("RUN_OCR_RESPONSE:%s",data) doc.ocr_text = data.get("ocr_text", "") doc.state = "ocr_done" doc.notes = (doc.notes or "") + "OCR done\n" except Exception as e: doc.state = "error" doc.notes = (doc.notes or "") + f"OCR error: {e}\n" doc.save() # ------------------------------------------------------- # STRUCTURE (doctr) # ------------------------------------------------------- @classmethod @ModelView.button def run_structure(cls, docs): for doc in docs: try: file_data = doc.document.data or b"" logger.info(f"File size: {len(file_data)} bytes") logger.info(f"First 20 bytes: {file_data[:20]}") logger.info(f"Last 20 bytes: {file_data[-20:]}") file_name = doc.document.name or "document" response = requests.post( "http://automation-service:8006/structure", files={"file": (file_name, io.BytesIO(file_data))} ) response.raise_for_status() data = response.json() doc.structure_json = data.get("structure", "") doc.state = "structure_done" doc.notes = (doc.notes or "") + "Structure parsing done\n" except Exception as e: doc.state = "error" doc.notes = (doc.notes or "") + f"Structure error: {e}\n" doc.save() # ------------------------------------------------------- # TABLES (camelot) # ------------------------------------------------------- @classmethod @ModelView.button def run_tables(cls, docs): for doc in docs: try: file_data = doc.document.data or b"" logger.info(f"File size: {len(file_data)} bytes") logger.info(f"First 20 bytes: {file_data[:20]}") logger.info(f"Last 20 bytes: {file_data[-20:]}") file_name = doc.document.name or "document" response = requests.post( "http://automation-service:8006/tables", files={"file": (file_name, io.BytesIO(file_data))} ) response.raise_for_status() data = response.json() doc.tables_json = data.get("tables", "") doc.state = "table_done" doc.notes = (doc.notes or "") + "Table extraction done\n" except Exception as e: doc.state = "error" doc.notes = (doc.notes or "") + f"Table error: {e}\n" doc.save() # ------------------------------------------------------- # METADATA (spaCy) # ------------------------------------------------------- @classmethod @ModelView.button def run_metadata(cls, docs): for doc in docs: try: logger.info("Sending OCR text to metadata API: %s", doc.ocr_text) response = requests.post( "http://automation-service:8006/metadata", json={"text": doc.ocr_text or ""} ) response.raise_for_status() data = response.json() # Stocker le JSON complet renvoyé par l'API #doc.metadata_json = data doc.metadata_json = json.dumps(data, indent=4, ensure_ascii=False) doc.state = "metadata_done" doc.notes = (doc.notes or "") + "Metadata extraction done\n" except requests.exceptions.RequestException as e: doc.state = "error" doc.notes = (doc.notes or "") + f"Metadata HTTP error: {e}\n" logger.error("Metadata HTTP error: %s", e) except Exception as e: doc.state = "error" doc.notes = (doc.notes or "") + f"Metadata processing error: {e}\n" logger.error("Metadata processing error: %s", e) doc.save() # ------------------------------------------------------- # FULL PIPELINE # ------------------------------------------------------- @classmethod @ModelView.button def run_pipeline(cls, docs): for doc in docs: try: if cls.rule_set.ocr_required: cls.run_ocr([doc]) if cls.rule_set.structure_required and doc.state != "error": cls.run_structure([doc]) if cls.rule_set.table_required and doc.state != "error": cls.run_tables([doc]) if cls.rule_set.metadata_required and doc.state != "error": cls.run_metadata([doc]) if doc.state != "error": doc.state = "validated" doc.notes = (doc.notes or "") + "Pipeline completed\n" except Exception as e: doc.state = "error" doc.notes = (doc.notes or "") + f"Pipeline error: {e}\n" doc.save()