Files
tradon/modules/automation/automation.py
2025-12-26 13:11:43 +00:00

201 lines
7.9 KiB
Python

from trytond.model import ModelSQL, ModelView, fields, Workflow
from trytond.pyson import Eval
from trytond.wizard import Button
import requests
import io
import logging
import json
logger = logging.getLogger(__name__)
class AutomationDocument(ModelSQL, ModelView, Workflow):
"""Automation Document"""
__name__ = 'automation.document'
document = fields.Many2One('document.incoming', 'Document')
type = fields.Selection([
('invoice', 'Invoice'),
('statement_of_facts', 'Statement of Facts'),
('weight_report', 'Weight Report'),
('bol', 'Bill of Lading'),
('controller_invoice', 'Controller Invoice'),
], 'Type')
state = fields.Selection([
('draft', 'Draft'),
('ocr_done', 'OCR Done'),
('structure_done', 'Structure Done'),
('table_done', 'Table Done'),
('metadata_done', 'Metadata Done'),
('validated', 'Validated'),
('error', 'Error'),
], 'State', required=True)
ocr_text = fields.Text('OCR Text')
structure_json = fields.Text('Structure JSON')
tables_json = fields.Text('Tables JSON')
metadata_json = fields.Text('Metadata JSON')
notes = fields.Text('Notes')
rule_set = fields.Many2One('automation.rule.set', 'Rule Set')
@classmethod
def __setup__(cls):
super().__setup__()
cls._buttons.update({
'run_pipeline': {'invisible': Eval('state') == 'test', 'depends': ['state']},
'run_ocr': {'invisible': Eval('state') == 'test', 'depends': ['state']},
'run_structure': {'invisible': Eval('state') == 'test', 'depends': ['state']},
'run_tables': {'invisible': Eval('state') == 'test', 'depends': ['state']},
'run_metadata': {'invisible': Eval('state') == 'test', 'depends': ['state']},
})
# -------------------------------------------------------
# OCR
# -------------------------------------------------------
@classmethod
@ModelView.button
def run_ocr(cls, docs):
for doc in docs:
try:
# Décoder le fichier depuis le champ Binary
file_data = doc.document.data or b""
logger.info(f"File size: {len(file_data)} bytes")
logger.info(f"First 20 bytes: {file_data[:20]}")
logger.info(f"Last 20 bytes: {file_data[-20:]}")
file_name = doc.document.name or "document"
# Envoyer le fichier au service OCR
response = requests.post(
"http://automation-service:8006/ocr",
files={"file": (file_name, io.BytesIO(file_data))}
)
response.raise_for_status()
data = response.json()
logger.info("RUN_OCR_RESPONSE:%s",data)
doc.ocr_text = data.get("ocr_text", "")
doc.state = "ocr_done"
doc.notes = (doc.notes or "") + "OCR done\n"
except Exception as e:
doc.state = "error"
doc.notes = (doc.notes or "") + f"OCR error: {e}\n"
doc.save()
# -------------------------------------------------------
# STRUCTURE (doctr)
# -------------------------------------------------------
@classmethod
@ModelView.button
def run_structure(cls, docs):
for doc in docs:
try:
file_data = doc.document.data or b""
logger.info(f"File size: {len(file_data)} bytes")
logger.info(f"First 20 bytes: {file_data[:20]}")
logger.info(f"Last 20 bytes: {file_data[-20:]}")
file_name = doc.document.name or "document"
response = requests.post(
"http://automation-service:8006/structure",
files={"file": (file_name, io.BytesIO(file_data))}
)
response.raise_for_status()
data = response.json()
doc.structure_json = data.get("structure", "")
doc.state = "structure_done"
doc.notes = (doc.notes or "") + "Structure parsing done\n"
except Exception as e:
doc.state = "error"
doc.notes = (doc.notes or "") + f"Structure error: {e}\n"
doc.save()
# -------------------------------------------------------
# TABLES (camelot)
# -------------------------------------------------------
@classmethod
@ModelView.button
def run_tables(cls, docs):
for doc in docs:
try:
file_data = doc.document.data or b""
logger.info(f"File size: {len(file_data)} bytes")
logger.info(f"First 20 bytes: {file_data[:20]}")
logger.info(f"Last 20 bytes: {file_data[-20:]}")
file_name = doc.document.name or "document"
response = requests.post(
"http://automation-service:8006/tables",
files={"file": (file_name, io.BytesIO(file_data))}
)
response.raise_for_status()
data = response.json()
doc.tables_json = data.get("tables", "")
doc.state = "table_done"
doc.notes = (doc.notes or "") + "Table extraction done\n"
except Exception as e:
doc.state = "error"
doc.notes = (doc.notes or "") + f"Table error: {e}\n"
doc.save()
# -------------------------------------------------------
# METADATA (spaCy)
# -------------------------------------------------------
@classmethod
@ModelView.button
def run_metadata(cls, docs):
for doc in docs:
try:
logger.info("Sending OCR text to metadata API: %s", doc.ocr_text)
response = requests.post(
"http://automation-service:8006/metadata",
json={"text": doc.ocr_text or ""}
)
response.raise_for_status()
data = response.json()
# Stocker le JSON complet renvoyé par l'API
#doc.metadata_json = data
doc.metadata_json = json.dumps(data, indent=4, ensure_ascii=False)
doc.state = "metadata_done"
doc.notes = (doc.notes or "") + "Metadata extraction done\n"
except requests.exceptions.RequestException as e:
doc.state = "error"
doc.notes = (doc.notes or "") + f"Metadata HTTP error: {e}\n"
logger.error("Metadata HTTP error: %s", e)
except Exception as e:
doc.state = "error"
doc.notes = (doc.notes or "") + f"Metadata processing error: {e}\n"
logger.error("Metadata processing error: %s", e)
doc.save()
# -------------------------------------------------------
# FULL PIPELINE
# -------------------------------------------------------
@classmethod
@ModelView.button
def run_pipeline(cls, docs):
for doc in docs:
try:
if cls.rule_set.ocr_required:
cls.run_ocr([doc])
if cls.rule_set.structure_required and doc.state != "error":
cls.run_structure([doc])
if cls.rule_set.table_required and doc.state != "error":
cls.run_tables([doc])
if cls.rule_set.metadata_required and doc.state != "error":
cls.run_metadata([doc])
if doc.state != "error":
doc.state = "validated"
doc.notes = (doc.notes or "") + "Pipeline completed\n"
except Exception as e:
doc.state = "error"
doc.notes = (doc.notes or "") + f"Pipeline error: {e}\n"
doc.save()