Initial import from Docker volume
This commit is contained in:
201
modules/automation/automation.py
Normal file
201
modules/automation/automation.py
Normal file
@@ -0,0 +1,201 @@
|
||||
from trytond.model import ModelSQL, ModelView, fields, Workflow
|
||||
from trytond.pyson import Eval
|
||||
from trytond.wizard import Button
|
||||
import requests
|
||||
import io
|
||||
import logging
|
||||
import json
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AutomationDocument(ModelSQL, ModelView, Workflow):
|
||||
"""Automation Document"""
|
||||
__name__ = 'automation.document'
|
||||
|
||||
document = fields.Many2One('document.incoming', 'Document')
|
||||
type = fields.Selection([
|
||||
('invoice', 'Invoice'),
|
||||
('statement_of_facts', 'Statement of Facts'),
|
||||
('weight_report', 'Weight Report'),
|
||||
('bol', 'Bill of Lading'),
|
||||
('controller_invoice', 'Controller Invoice'),
|
||||
], 'Type')
|
||||
|
||||
state = fields.Selection([
|
||||
('draft', 'Draft'),
|
||||
('ocr_done', 'OCR Done'),
|
||||
('structure_done', 'Structure Done'),
|
||||
('table_done', 'Table Done'),
|
||||
('metadata_done', 'Metadata Done'),
|
||||
('validated', 'Validated'),
|
||||
('error', 'Error'),
|
||||
], 'State', required=True)
|
||||
|
||||
ocr_text = fields.Text('OCR Text')
|
||||
structure_json = fields.Text('Structure JSON')
|
||||
tables_json = fields.Text('Tables JSON')
|
||||
metadata_json = fields.Text('Metadata JSON')
|
||||
notes = fields.Text('Notes')
|
||||
rule_set = fields.Many2One('automation.rule.set', 'Rule Set')
|
||||
|
||||
@classmethod
|
||||
def __setup__(cls):
|
||||
super().__setup__()
|
||||
cls._buttons.update({
|
||||
'run_pipeline': {'invisible': Eval('state') == 'test', 'depends': ['state']},
|
||||
'run_ocr': {'invisible': Eval('state') == 'test', 'depends': ['state']},
|
||||
'run_structure': {'invisible': Eval('state') == 'test', 'depends': ['state']},
|
||||
'run_tables': {'invisible': Eval('state') == 'test', 'depends': ['state']},
|
||||
'run_metadata': {'invisible': Eval('state') == 'test', 'depends': ['state']},
|
||||
})
|
||||
|
||||
# -------------------------------------------------------
|
||||
# OCR
|
||||
# -------------------------------------------------------
|
||||
@classmethod
|
||||
@ModelView.button
|
||||
def run_ocr(cls, docs):
|
||||
for doc in docs:
|
||||
try:
|
||||
# Décoder le fichier depuis le champ Binary
|
||||
file_data = doc.document.data or b""
|
||||
logger.info(f"File size: {len(file_data)} bytes")
|
||||
logger.info(f"First 20 bytes: {file_data[:20]}")
|
||||
logger.info(f"Last 20 bytes: {file_data[-20:]}")
|
||||
|
||||
file_name = doc.document.name or "document"
|
||||
|
||||
# Envoyer le fichier au service OCR
|
||||
response = requests.post(
|
||||
"http://automation-service:8006/ocr",
|
||||
files={"file": (file_name, io.BytesIO(file_data))}
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
logger.info("RUN_OCR_RESPONSE:%s",data)
|
||||
doc.ocr_text = data.get("ocr_text", "")
|
||||
doc.state = "ocr_done"
|
||||
doc.notes = (doc.notes or "") + "OCR done\n"
|
||||
|
||||
except Exception as e:
|
||||
doc.state = "error"
|
||||
doc.notes = (doc.notes or "") + f"OCR error: {e}\n"
|
||||
doc.save()
|
||||
# -------------------------------------------------------
|
||||
# STRUCTURE (doctr)
|
||||
# -------------------------------------------------------
|
||||
@classmethod
|
||||
@ModelView.button
|
||||
def run_structure(cls, docs):
|
||||
for doc in docs:
|
||||
try:
|
||||
file_data = doc.document.data or b""
|
||||
logger.info(f"File size: {len(file_data)} bytes")
|
||||
logger.info(f"First 20 bytes: {file_data[:20]}")
|
||||
logger.info(f"Last 20 bytes: {file_data[-20:]}")
|
||||
|
||||
file_name = doc.document.name or "document"
|
||||
|
||||
response = requests.post(
|
||||
"http://automation-service:8006/structure",
|
||||
files={"file": (file_name, io.BytesIO(file_data))}
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
doc.structure_json = data.get("structure", "")
|
||||
doc.state = "structure_done"
|
||||
doc.notes = (doc.notes or "") + "Structure parsing done\n"
|
||||
|
||||
except Exception as e:
|
||||
doc.state = "error"
|
||||
doc.notes = (doc.notes or "") + f"Structure error: {e}\n"
|
||||
doc.save()
|
||||
|
||||
# -------------------------------------------------------
|
||||
# TABLES (camelot)
|
||||
# -------------------------------------------------------
|
||||
@classmethod
|
||||
@ModelView.button
|
||||
def run_tables(cls, docs):
|
||||
for doc in docs:
|
||||
try:
|
||||
file_data = doc.document.data or b""
|
||||
logger.info(f"File size: {len(file_data)} bytes")
|
||||
logger.info(f"First 20 bytes: {file_data[:20]}")
|
||||
logger.info(f"Last 20 bytes: {file_data[-20:]}")
|
||||
|
||||
file_name = doc.document.name or "document"
|
||||
|
||||
response = requests.post(
|
||||
"http://automation-service:8006/tables",
|
||||
files={"file": (file_name, io.BytesIO(file_data))}
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
doc.tables_json = data.get("tables", "")
|
||||
doc.state = "table_done"
|
||||
doc.notes = (doc.notes or "") + "Table extraction done\n"
|
||||
|
||||
except Exception as e:
|
||||
doc.state = "error"
|
||||
doc.notes = (doc.notes or "") + f"Table error: {e}\n"
|
||||
doc.save()
|
||||
|
||||
# -------------------------------------------------------
|
||||
# METADATA (spaCy)
|
||||
# -------------------------------------------------------
|
||||
@classmethod
|
||||
@ModelView.button
|
||||
def run_metadata(cls, docs):
|
||||
for doc in docs:
|
||||
try:
|
||||
logger.info("Sending OCR text to metadata API: %s", doc.ocr_text)
|
||||
|
||||
response = requests.post(
|
||||
"http://automation-service:8006/metadata",
|
||||
json={"text": doc.ocr_text or ""}
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
# Stocker le JSON complet renvoyé par l'API
|
||||
#doc.metadata_json = data
|
||||
doc.metadata_json = json.dumps(data, indent=4, ensure_ascii=False)
|
||||
doc.state = "metadata_done"
|
||||
doc.notes = (doc.notes or "") + "Metadata extraction done\n"
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
doc.state = "error"
|
||||
doc.notes = (doc.notes or "") + f"Metadata HTTP error: {e}\n"
|
||||
logger.error("Metadata HTTP error: %s", e)
|
||||
except Exception as e:
|
||||
doc.state = "error"
|
||||
doc.notes = (doc.notes or "") + f"Metadata processing error: {e}\n"
|
||||
logger.error("Metadata processing error: %s", e)
|
||||
|
||||
doc.save()
|
||||
# -------------------------------------------------------
|
||||
# FULL PIPELINE
|
||||
# -------------------------------------------------------
|
||||
@classmethod
|
||||
@ModelView.button
|
||||
def run_pipeline(cls, docs):
|
||||
for doc in docs:
|
||||
try:
|
||||
if cls.rule_set.ocr_required:
|
||||
cls.run_ocr([doc])
|
||||
if cls.rule_set.structure_required and doc.state != "error":
|
||||
cls.run_structure([doc])
|
||||
if cls.rule_set.table_required and doc.state != "error":
|
||||
cls.run_tables([doc])
|
||||
if cls.rule_set.metadata_required and doc.state != "error":
|
||||
cls.run_metadata([doc])
|
||||
if doc.state != "error":
|
||||
doc.state = "validated"
|
||||
doc.notes = (doc.notes or "") + "Pipeline completed\n"
|
||||
except Exception as e:
|
||||
doc.state = "error"
|
||||
doc.notes = (doc.notes or "") + f"Pipeline error: {e}\n"
|
||||
doc.save()
|
||||
Reference in New Issue
Block a user