Files
tradon/modules/automation/automation.py
2026-01-20 21:56:09 +01:00

220 lines
8.8 KiB
Python

from trytond.model import ModelSQL, ModelView, fields, Workflow
from trytond.pool import Pool, PoolMeta
from trytond.pyson import Eval
from trytond.wizard import Button
import requests
import io
import logging
import json
logger = logging.getLogger(__name__)
class AutomationDocument(ModelSQL, ModelView, Workflow):
"""Automation Document"""
__name__ = 'automation.document'
document = fields.Many2One('document.incoming', 'Document')
type = fields.Selection([
('invoice', 'Invoice'),
('statement_of_facts', 'Statement of Facts'),
('weight_report', 'Weight Report'),
('bol', 'Bill of Lading'),
('controller_invoice', 'Controller Invoice'),
], 'Type')
state = fields.Selection([
('draft', 'Draft'),
('ocr_done', 'OCR Done'),
('structure_done', 'Structure Done'),
('table_done', 'Table Done'),
('metadata_done', 'Metadata Done'),
('validated', 'Validated'),
('error', 'Error'),
], 'State', required=True)
ocr_text = fields.Text('OCR Text')
structure_json = fields.Text('Structure JSON')
tables_json = fields.Text('Tables JSON')
metadata_json = fields.Text('Metadata JSON')
notes = fields.Text('Notes')
rule_set = fields.Many2One('automation.rule.set', 'Rule Set')
@classmethod
def __setup__(cls):
super().__setup__()
cls._buttons.update({
'run_pipeline': {'invisible': Eval('state') == 'test', 'depends': ['state']},
'run_ocr': {'invisible': Eval('state') == 'test', 'depends': ['state']},
'run_structure': {'invisible': Eval('state') == 'test', 'depends': ['state']},
'run_tables': {'invisible': Eval('state') == 'test', 'depends': ['state']},
'run_metadata': {'invisible': Eval('state') == 'test', 'depends': ['state']},
})
# -------------------------------------------------------
# OCR
# -------------------------------------------------------
@classmethod
@ModelView.button
def run_ocr(cls, docs):
for doc in docs:
try:
# Décoder le fichier depuis le champ Binary
file_data = doc.document.data or b""
logger.info(f"File size: {len(file_data)} bytes")
logger.info(f"First 20 bytes: {file_data[:20]}")
logger.info(f"Last 20 bytes: {file_data[-20:]}")
file_name = doc.document.name or "document"
# Envoyer le fichier au service OCR
response = requests.post(
"http://automation-service:8006/ocr",
files={"file": (file_name, io.BytesIO(file_data))}
)
response.raise_for_status()
data = response.json()
logger.info("RUN_OCR_RESPONSE:%s",data)
doc.ocr_text = data.get("ocr_text", "")
doc.state = "ocr_done"
doc.notes = (doc.notes or "") + "OCR done\n"
except Exception as e:
doc.state = "error"
doc.notes = (doc.notes or "") + f"OCR error: {e}\n"
doc.save()
# -------------------------------------------------------
# STRUCTURE (doctr)
# -------------------------------------------------------
@classmethod
@ModelView.button
def run_structure(cls, docs):
for doc in docs:
try:
file_data = doc.document.data or b""
logger.info(f"File size: {len(file_data)} bytes")
logger.info(f"First 20 bytes: {file_data[:20]}")
logger.info(f"Last 20 bytes: {file_data[-20:]}")
file_name = doc.document.name or "document"
response = requests.post(
"http://automation-service:8006/structure",
files={"file": (file_name, io.BytesIO(file_data))}
)
response.raise_for_status()
data = response.json()
doc.structure_json = data.get("structure", "")
doc.state = "structure_done"
doc.notes = (doc.notes or "") + "Structure parsing done\n"
except Exception as e:
doc.state = "error"
doc.notes = (doc.notes or "") + f"Structure error: {e}\n"
doc.save()
# -------------------------------------------------------
# TABLES (camelot)
# -------------------------------------------------------
@classmethod
@ModelView.button
def run_tables(cls, docs):
for doc in docs:
try:
file_data = doc.document.data or b""
logger.info(f"File size: {len(file_data)} bytes")
logger.info(f"First 20 bytes: {file_data[:20]}")
logger.info(f"Last 20 bytes: {file_data[-20:]}")
file_name = doc.document.name or "document"
response = requests.post(
"http://automation-service:8006/tables",
files={"file": (file_name, io.BytesIO(file_data))}
)
response.raise_for_status()
data = response.json()
doc.tables_json = data.get("tables", "")
doc.state = "table_done"
doc.notes = (doc.notes or "") + "Table extraction done\n"
except Exception as e:
doc.state = "error"
doc.notes = (doc.notes or "") + f"Table error: {e}\n"
doc.save()
# -------------------------------------------------------
# METADATA (spaCy)
# -------------------------------------------------------
@classmethod
@ModelView.button
def run_metadata(cls, docs):
for doc in docs:
try:
logger.info("Sending OCR text to metadata API: %s", doc.ocr_text)
response = requests.post(
#"http://automation-service:8006/metadata",
"http://automation-service:8006/parse",
json={"text": doc.ocr_text or ""}
)
response.raise_for_status()
data = response.json()
# Stocker le JSON complet renvoyé par l'API
#doc.metadata_json = data
doc.metadata_json = json.dumps(data, indent=4, ensure_ascii=False)
doc.state = "metadata_done"
doc.notes = (doc.notes or "") + "Metadata extraction done\n"
except requests.exceptions.RequestException as e:
doc.state = "error"
doc.notes = (doc.notes or "") + f"Metadata HTTP error: {e}\n"
logger.error("Metadata HTTP error: %s", e)
except Exception as e:
doc.state = "error"
doc.notes = (doc.notes or "") + f"Metadata processing error: {e}\n"
logger.error("Metadata processing error: %s", e)
doc.save()
# -------------------------------------------------------
# FULL PIPELINE
# -------------------------------------------------------
@classmethod
@ModelView.button
def run_pipeline(cls, docs):
for doc in docs:
try:
logger.info("DATA_TYPE:%s",type(doc.metadata_json))
metadata = json.loads(str(doc.metadata_json))
logger.info("JSON STRUCTURE:%s",metadata)
WeightReport = Pool().get('weight.report')
wr = WeightReport.create_from_json(metadata)
ShipmentIn = Pool().get('stock.shipment.in')
ShipmentWR = Pool().get('shipment.wr')
sh = ShipmentIn.search([('bl_number','ilike',wr.bl_no)])
if sh:
swr = ShipmentWR()
swr.shipment_in = sh[0]
swr.wr = wr
ShipmentWR.save([swr])
doc.notes = (doc.notes or "") + f"Shipment found: {sh.number}\n"
# if cls.rule_set.ocr_required:
# cls.run_ocr([doc])
# if cls.rule_set.structure_required and doc.state != "error":
# cls.run_structure([doc])
# if cls.rule_set.table_required and doc.state != "error":
# cls.run_tables([doc])
# if cls.rule_set.metadata_required and doc.state != "error":
# cls.run_metadata([doc])
# if doc.state != "error":
# doc.state = "validated"
# doc.notes = (doc.notes or "") + "Pipeline completed\n"
except Exception as e:
doc.state = "error"
doc.notes = (doc.notes or "") + f"Pipeline error: {e}\n"
doc.save()