Initial import from Docker volume

This commit is contained in:
root
2025-12-26 13:11:43 +00:00
commit 4998dc066a
13336 changed files with 1767801 additions and 0 deletions

View File

@@ -0,0 +1,8 @@
from trytond.pool import Pool
from . import automation,rules #, document
def register():
Pool.register(
automation.AutomationDocument,
rules.AutomationRuleSet,
module='automation', type_='model')

259
modules/automation/app.py Normal file
View File

@@ -0,0 +1,259 @@
from fastapi import FastAPI, UploadFile, HTTPException, Body
from PIL import Image
import pytesseract
from doctr.models import ocr_predictor
from doctr.io import DocumentFile
from PyPDF2 import PdfReader
import camelot
import spacy
import logging
import io
from logging.handlers import RotatingFileHandler
import re
LOG_PATH = "/var/log/automation-service.log"
file_handler = RotatingFileHandler(
LOG_PATH,
maxBytes=10*1024*1024,
backupCount=5,
encoding="utf-8"
)
file_handler.setFormatter(logging.Formatter(
"%(asctime)s - %(levelname)s - %(name)s - %(message)s"
))
# Configure root logger explicitly
root = logging.getLogger()
root.setLevel(logging.INFO)
root.addHandler(file_handler)
root.addHandler(logging.StreamHandler())
# Use root logger for your app
logger = logging.getLogger(__name__)
app = FastAPI()
logger.info("Loading models...")
nlp = spacy.load("en_core_web_sm")
predictor = ocr_predictor(pretrained=True)
logger.info("Models loaded successfully.")
# =============================
# 🧠 Smart OCR
# =============================
@app.post("/ocr")
async def ocr(file: UploadFile):
logger.info(f"Received OCR request: {file.filename}")
try:
file_data = await file.read()
ext = file.filename.lower()
# --------- PDF with native text ---------
if ext.endswith(".pdf"):
logger.info("PDF detected → Extracting native text first")
reader = PdfReader(io.BytesIO(file_data))
direct_text = "".join(
page.extract_text() or "" for page in reader.pages
)
if direct_text.strip():
logger.info("Native PDF text found → No OCR needed")
return {"ocr_text": direct_text}
# -------- Fallback: scanned PDF OCR --------
logger.info("No native text → PDF treated as scanned → OCR")
from pdf2image import convert_from_bytes
images = convert_from_bytes(file_data)
text = ""
for i, img in enumerate(images):
logger.info(f"OCR page {i+1}/{len(images)}")
text += pytesseract.image_to_string(img) + "\n"
return {"ocr_text": text}
# --------- Image file OCR ---------
logger.info("Image detected → Running OCR")
img = Image.open(io.BytesIO(file_data))
text = pytesseract.image_to_string(img)
return {"ocr_text": text}
except Exception as e:
logger.error(f"OCR failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
# =============================
# 🧱 Structure / Layout
# =============================
@app.post("/structure")
async def structure(file: UploadFile):
logger.info(f"Received structure request: {file.filename}")
try:
file_data = await file.read()
ext = file.filename.lower()
if ext.endswith(".pdf"):
doc = DocumentFile.from_pdf(file_data)
logger.info(f"Structure prediction on PDF ({len(doc)} pages)")
else:
img = Image.open(io.BytesIO(file_data)).convert("RGB")
doc = DocumentFile.from_images([img])
logger.info("Structure prediction on image")
res = predictor(doc)
return {"structure": str(res)}
except Exception as e:
logger.error(f"Structure extraction failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
# =============================
# 📊 Tables extraction (PDF only)
# =============================
@app.post("/tables")
async def tables(file: UploadFile):
logger.info(f"Received table extraction request: {file.filename}")
try:
file_data = await file.read()
buffer = io.BytesIO(file_data)
tables = camelot.read_pdf(buffer)
logger.info(f"Found {len(tables)} tables")
return {"tables": [t.df.to_dict() for t in tables]}
except Exception as e:
logger.error(f"Table extraction failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
def safe_search(pattern, text, default=None, group_index=1, context=""):
"""Recherche sécurisée avec logging en cas d'absence de correspondance."""
m = re.search(pattern, text, re.I | re.S)
if not m:
logger.warning("Pattern not found for %s: %s", context, pattern)
return default
try:
return m.group(group_index).strip()
except IndexError:
logger.warning("Group index %d not found for %s: %s", group_index, context, pattern)
return default
def section(text, start, end=None):
"""Extract a block of text between two headings, safely."""
pattern_start = re.escape(start)
if end:
pattern_end = re.escape(end)
reg = re.compile(pattern_start + r"(.*?)" + pattern_end, re.S | re.I)
else:
reg = re.compile(pattern_start + r"(.*)", re.S | re.I)
m = reg.search(text)
if not m:
logger.warning("Section not found: start='%s', end='%s'", start, end)
return ""
return m.group(1).strip()
def extract_field(text, label, default=None):
"""Extract a line of the form 'Label: value', safely."""
pattern = rf"{re.escape(label)}\s*:?[\s]+([^\n]+)"
return safe_search(pattern, text, default=default, context=f"field '{label}'")
def extract_report_metadata(text):
logger.info("Starting metadata extraction, text length=%d", len(text))
try:
# ----------- SECTIONS -----------
order_details = section(text, "Order details", "Weights")
invoice_section = section(text, "INVOICE WEIGHTS", "Bales Weighed")
landed_section = section(text, "Bales Weighed", "Outturn")
loss_section = section(text, "LOSS", "Invoice average")
avg_section = section(text, "Invoice average", "Comments")
signature_block = section(text, "Signed on")
# ----------- TOP INFO -----------
top_info = {
"produced_on": extract_field(text, "Produced On"),
"printed_date": extract_field(text, "Printed Date"),
"client_reference": extract_field(text, "Client Reference"),
"report_number": safe_search(r"(AHK\S+)", text, default="", context="report_number", group_index=1),
}
# ----------- ORDER DETAILS -----------
parties = {
"client": extract_field(order_details, "Client"),
"client_ref_no": extract_field(order_details, "Client Ref No"),
"buyer": extract_field(order_details, "Buyer"),
"destination": extract_field(order_details, "Destination"),
}
shipment = {
"total_bales": extract_field(order_details, "Total Bales"),
"vessel": extract_field(order_details, "Vessel"),
"voyage_no": extract_field(order_details, "Voy. No"),
"bl_no": extract_field(order_details, "B/L No"),
"bl_date": extract_field(order_details, "B/L Date"),
"growth": extract_field(order_details, "Growth"),
"arrival_date": extract_field(order_details, "Arrival Date"),
"first_weighing_date": extract_field(order_details, "First date of weighing"),
"last_weighing_date": extract_field(order_details, "Last Date of Weighing"),
"weighing_method": extract_field(order_details, "Weighing method"),
"tare_basis": extract_field(order_details, "Tare"),
}
# ----------- INVOICE SECTION -----------
invoice = {
"bales": extract_field(invoice_section, "Bales"),
"gross": extract_field(invoice_section, "Gross"),
"tare": extract_field(invoice_section, "Tare"),
"net": extract_field(invoice_section, "Net"),
}
# ----------- LANDED SECTION -----------
landed = {
"bales": extract_field(landed_section, "Bales"),
"gross": extract_field(landed_section, "Gross"),
"tare": extract_field(landed_section, "Tare"),
"net": extract_field(landed_section, "Net"),
}
# ----------- LOSS SECTION -----------
loss = {
"kg": extract_field(loss_section, "kg"),
"lb": extract_field(loss_section, "lb"),
"percent": extract_field(loss_section, "Percentage"),
}
# ----------- AVERAGES SECTION -----------
averages = {
"invoice_gross_per_bale": extract_field(avg_section, "Invoice average"),
"landed_gross_per_bale": extract_field(avg_section, "Landed average"),
}
# ----------- SIGNATURE -----------
signature = {
"signed_on": extract_field(signature_block, "Signed on"),
"signed_by": safe_search(r"\n([A-Za-z ]+)\nClient Services", signature_block, default="", context="signed_by"),
"role": "Client Services Coordinator",
"company": "Alfred H. Knight International Limited"
}
logger.info("Metadata extraction completed successfully")
return {
"report": top_info,
"parties": parties,
"shipment": shipment,
"weights": {
"invoice": invoice,
"landed": landed,
"loss": loss,
"averages": averages
},
"signature": signature
}
except Exception as e:
logger.exception("Unexpected error during metadata extraction")
raise HTTPException(status_code=500, detail=f"Metadata extraction failed: {e}")
@app.post("/metadata")
async def metadata(text: str = Body(..., embed=True)):
return extract_report_metadata(text)

View File

@@ -0,0 +1,201 @@
from trytond.model import ModelSQL, ModelView, fields, Workflow
from trytond.pyson import Eval
from trytond.wizard import Button
import requests
import io
import logging
import json
logger = logging.getLogger(__name__)
class AutomationDocument(ModelSQL, ModelView, Workflow):
"""Automation Document"""
__name__ = 'automation.document'
document = fields.Many2One('document.incoming', 'Document')
type = fields.Selection([
('invoice', 'Invoice'),
('statement_of_facts', 'Statement of Facts'),
('weight_report', 'Weight Report'),
('bol', 'Bill of Lading'),
('controller_invoice', 'Controller Invoice'),
], 'Type')
state = fields.Selection([
('draft', 'Draft'),
('ocr_done', 'OCR Done'),
('structure_done', 'Structure Done'),
('table_done', 'Table Done'),
('metadata_done', 'Metadata Done'),
('validated', 'Validated'),
('error', 'Error'),
], 'State', required=True)
ocr_text = fields.Text('OCR Text')
structure_json = fields.Text('Structure JSON')
tables_json = fields.Text('Tables JSON')
metadata_json = fields.Text('Metadata JSON')
notes = fields.Text('Notes')
rule_set = fields.Many2One('automation.rule.set', 'Rule Set')
@classmethod
def __setup__(cls):
super().__setup__()
cls._buttons.update({
'run_pipeline': {'invisible': Eval('state') == 'test', 'depends': ['state']},
'run_ocr': {'invisible': Eval('state') == 'test', 'depends': ['state']},
'run_structure': {'invisible': Eval('state') == 'test', 'depends': ['state']},
'run_tables': {'invisible': Eval('state') == 'test', 'depends': ['state']},
'run_metadata': {'invisible': Eval('state') == 'test', 'depends': ['state']},
})
# -------------------------------------------------------
# OCR
# -------------------------------------------------------
@classmethod
@ModelView.button
def run_ocr(cls, docs):
for doc in docs:
try:
# Décoder le fichier depuis le champ Binary
file_data = doc.document.data or b""
logger.info(f"File size: {len(file_data)} bytes")
logger.info(f"First 20 bytes: {file_data[:20]}")
logger.info(f"Last 20 bytes: {file_data[-20:]}")
file_name = doc.document.name or "document"
# Envoyer le fichier au service OCR
response = requests.post(
"http://automation-service:8006/ocr",
files={"file": (file_name, io.BytesIO(file_data))}
)
response.raise_for_status()
data = response.json()
logger.info("RUN_OCR_RESPONSE:%s",data)
doc.ocr_text = data.get("ocr_text", "")
doc.state = "ocr_done"
doc.notes = (doc.notes or "") + "OCR done\n"
except Exception as e:
doc.state = "error"
doc.notes = (doc.notes or "") + f"OCR error: {e}\n"
doc.save()
# -------------------------------------------------------
# STRUCTURE (doctr)
# -------------------------------------------------------
@classmethod
@ModelView.button
def run_structure(cls, docs):
for doc in docs:
try:
file_data = doc.document.data or b""
logger.info(f"File size: {len(file_data)} bytes")
logger.info(f"First 20 bytes: {file_data[:20]}")
logger.info(f"Last 20 bytes: {file_data[-20:]}")
file_name = doc.document.name or "document"
response = requests.post(
"http://automation-service:8006/structure",
files={"file": (file_name, io.BytesIO(file_data))}
)
response.raise_for_status()
data = response.json()
doc.structure_json = data.get("structure", "")
doc.state = "structure_done"
doc.notes = (doc.notes or "") + "Structure parsing done\n"
except Exception as e:
doc.state = "error"
doc.notes = (doc.notes or "") + f"Structure error: {e}\n"
doc.save()
# -------------------------------------------------------
# TABLES (camelot)
# -------------------------------------------------------
@classmethod
@ModelView.button
def run_tables(cls, docs):
for doc in docs:
try:
file_data = doc.document.data or b""
logger.info(f"File size: {len(file_data)} bytes")
logger.info(f"First 20 bytes: {file_data[:20]}")
logger.info(f"Last 20 bytes: {file_data[-20:]}")
file_name = doc.document.name or "document"
response = requests.post(
"http://automation-service:8006/tables",
files={"file": (file_name, io.BytesIO(file_data))}
)
response.raise_for_status()
data = response.json()
doc.tables_json = data.get("tables", "")
doc.state = "table_done"
doc.notes = (doc.notes or "") + "Table extraction done\n"
except Exception as e:
doc.state = "error"
doc.notes = (doc.notes or "") + f"Table error: {e}\n"
doc.save()
# -------------------------------------------------------
# METADATA (spaCy)
# -------------------------------------------------------
@classmethod
@ModelView.button
def run_metadata(cls, docs):
for doc in docs:
try:
logger.info("Sending OCR text to metadata API: %s", doc.ocr_text)
response = requests.post(
"http://automation-service:8006/metadata",
json={"text": doc.ocr_text or ""}
)
response.raise_for_status()
data = response.json()
# Stocker le JSON complet renvoyé par l'API
#doc.metadata_json = data
doc.metadata_json = json.dumps(data, indent=4, ensure_ascii=False)
doc.state = "metadata_done"
doc.notes = (doc.notes or "") + "Metadata extraction done\n"
except requests.exceptions.RequestException as e:
doc.state = "error"
doc.notes = (doc.notes or "") + f"Metadata HTTP error: {e}\n"
logger.error("Metadata HTTP error: %s", e)
except Exception as e:
doc.state = "error"
doc.notes = (doc.notes or "") + f"Metadata processing error: {e}\n"
logger.error("Metadata processing error: %s", e)
doc.save()
# -------------------------------------------------------
# FULL PIPELINE
# -------------------------------------------------------
@classmethod
@ModelView.button
def run_pipeline(cls, docs):
for doc in docs:
try:
if cls.rule_set.ocr_required:
cls.run_ocr([doc])
if cls.rule_set.structure_required and doc.state != "error":
cls.run_structure([doc])
if cls.rule_set.table_required and doc.state != "error":
cls.run_tables([doc])
if cls.rule_set.metadata_required and doc.state != "error":
cls.run_metadata([doc])
if doc.state != "error":
doc.state = "validated"
doc.notes = (doc.notes or "") + "Pipeline completed\n"
except Exception as e:
doc.state = "error"
doc.notes = (doc.notes or "") + f"Pipeline error: {e}\n"
doc.save()

View File

@@ -0,0 +1,119 @@
<?xml version="1.0"?>
<tryton>
<data>
<record model="res.group" id="group_automation">
<field name="name">Automation</field>
</record>
<record model="res.group" id="group_automation_admin">
<field name="name">Automation Administration</field>
<field name="parent" ref="group_automation"/>
</record>
<record model="res.user-res.group" id="user_admin_group_automation">
<field name="user" ref="res.user_admin"/>
<field name="group" ref="group_automation"/>
</record>
<record model="res.user-res.group" id="user_admin_group_automation_admin">
<field name="user" ref="res.user_admin"/>
<field name="group" ref="group_automation_admin"/>
</record>
<record model="ir.ui.icon" id="automation_icon">
<field name="name">tradon-automation</field>
<field name="path">icons/tradon-automation.svg</field>
</record>
<record model="ir.ui.view" id="automation_document_form">
<field name="model">automation.document</field>
<field name="type">form</field>
<field name="name">automation_document_form</field>
</record>
<record model="ir.ui.view" id="automation_document_list">
<field name="model">automation.document</field>
<field name="type">tree</field>
<field name="name">automation_document_list</field>
</record>
<record model="ir.ui.view" id="automation_rule_set_form">
<field name="model">automation.rule.set</field>
<field name="type">form</field>
<field name="name">automation_rule_set_form</field>
</record>
<record model="ir.ui.view" id="automation_rule_set_list">
<field name="model">automation.rule.set</field>
<field name="type">tree</field>
<field name="name">automation_rule_set_list</field>
</record>
<record model="ir.action.act_window" id="act_automation_form">
<field name="name">Automation</field>
<field name="res_model">automation.document</field>
</record>
<record model="ir.action.act_window.view" id="act_automation_form_view1">
<field name="sequence" eval="10"/>
<field name="view" ref="automation_document_list"/>
<field name="act_window" ref="act_automation_form"/>
</record>
<record model="ir.action.act_window.view" id="act_automation_form_view2">
<field name="sequence" eval="20"/>
<field name="view" ref="automation_document_form"/>
<field name="act_window" ref="act_automation_form"/>
</record>
<record model="ir.model.access" id="access_automation">
<field name="model">automation.document</field>
<field name="perm_read" eval="False"/>
<field name="perm_write" eval="False"/>
<field name="perm_create" eval="False"/>
<field name="perm_delete" eval="False"/>
</record>
<record model="ir.model.access" id="access_automation_automation">
<field name="model">automation.document</field>
<field name="group" ref="group_automation"/>
<field name="perm_read" eval="True"/>
<field name="perm_write" eval="True"/>
<field name="perm_create" eval="True"/>
<field name="perm_delete" eval="True"/>
</record>
<record model="ir.model.button" id="auto_button1">
<field name="model">automation.document</field>
<field name="name">run_pipeline</field>
<field name="string">Run Full Pipeline</field>
</record>
<record model="ir.model.button" id="auto_button2">
<field name="model">automation.document</field>
<field name="name">run_ocr</field>
<field name="string">Run OCR</field>
</record>
<record model="ir.model.button" id="auto_button3">
<field name="model">automation.document</field>
<field name="name">run_structure</field>
<field name="string">Run Structure</field>
</record>
<record model="ir.model.button" id="auto_button4">
<field name="model">automation.document</field>
<field name="name">run_tables</field>
<field name="string">Run Tables</field>
</record>
<record model="ir.model.button" id="auto_button5">
<field name="model">automation.document</field>
<field name="name">run_metadata</field>
<field name="string">Run Metadata</field>
</record>
<menuitem
name="Automation"
sequence="2"
id="menu_automation"
icon="tradon-automation" />
<menuitem
name="Documents"
action="act_automation_form"
parent="menu_automation"
sequence="10"
id="menu_documents" />
<!-- <menuitem
name="Rule Sets"
parent="menu_rule"
action="act_forex_bi"
sequence="20"
id="menu_forex_bi"/> -->
</data>
</tryton>

View File

@@ -0,0 +1 @@
# Gestion des documents (placeholder)

View File

@@ -0,0 +1,16 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">
<path d="M0 0h24v24H0z" fill="none"/>
<path fill="#267F82" d="
M11 2 h2 v2 h-2 z
M7 6 h10 v10 h-10 z
M10 10 a1 1 0 1 1 0 .01 z
M14 10 a1 1 0 1 1 0 .01 z
M9 14 h6 v1.8 h-6 z
M5 9 h2 v4 h-2 z
M17 9 h2 v4 h-2 z
"/>
</svg>

After

Width:  |  Height:  |  Size: 629 B

View File

@@ -0,0 +1,6 @@
trytond>=6.0
pytesseract
python-doctr
camelot-py[cv]
spacy
Pillow

View File

@@ -0,0 +1,14 @@
from trytond.model import ModelSQL, ModelView, fields
class AutomationRuleSet(ModelSQL, ModelView):
'Automation Rule Set'
__name__ = 'automation.rule.set'
name = fields.Char('Name')
document_type = fields.Char('Document Type')
ocr_required = fields.Boolean('OCR Required')
structure_required = fields.Boolean('Structure Required')
table_required = fields.Boolean('Table Required')
metadata_required = fields.Boolean('Metadata Required')
python_hook_pre = fields.Text('Python Hook Pre')
python_hook_post = fields.Text('Python Hook Post')

123
modules/automation/setup.py Normal file
View File

@@ -0,0 +1,123 @@
#!/usr/bin/env python3
# This file is part of Tryton. The COPYRIGHT file at the top level of
# this repository contains the full copyright notices and license terms.
import io
import os
import re
from configparser import ConfigParser
from setuptools import find_packages, setup
def read(fname):
return io.open(
os.path.join(os.path.dirname(__file__), fname),
'r', encoding='utf-8').read()
def get_require_version(name):
require = '%s >= %s.%s, < %s.%s'
require %= (name, major_version, minor_version,
major_version, minor_version + 1)
return require
config = ConfigParser()
config.read_file(open(os.path.join(os.path.dirname(__file__), 'tryton.cfg')))
info = dict(config.items('tryton'))
for key in ('depends', 'extras_depend', 'xml'):
if key in info:
info[key] = info[key].strip().splitlines()
version = info.get('version', '0.0.1')
major_version, minor_version, _ = version.split('.', 2)
major_version = int(major_version)
minor_version = int(minor_version)
name = 'trytond_automation'
if minor_version % 2:
download_url = ''
else:
download_url = 'http://downloads.tryton.org/%s.%s/' % (
major_version, minor_version)
requires = []
for dep in info.get('depends', []):
if not re.match(r'(ir|res)(\W|$)', dep):
requires.append(get_require_version('trytond_%s' % dep))
requires.append(get_require_version('trytond'))
setup(name=name,
version=version,
description='Tryton module to add period to product',
author='Tryton',
author_email='foundation@tryton.org',
url='http://www.tryton.org/',
download_url=download_url,
project_urls={
"Bug Tracker": 'https://bugs.tryton.org/',
"Documentation": 'https://docs.tryton.org/',
"Forum": 'https://www.tryton.org/forum',
"Source Code": 'https://code.tryton.org/tryton',
},
keywords='tryton automation',
package_dir={'trytond.modules.automation': '.'},
packages=(
['trytond.modules.automation']
+ ['trytond.modules.automation.%s' % p
for p in find_packages()]
),
package_data={
'trytond.modules.automation': (info.get('xml', [])
+ ['tryton.cfg', 'view/*.xml', 'locale/*.po']),
},
classifiers=[
'Development Status :: 5 - Production/Stable',
'Environment :: Plugins',
'Framework :: Tryton',
'Intended Audience :: Developers',
'Intended Audience :: Financial and Insurance Industry',
'Intended Audience :: Legal Industry',
'Intended Audience :: Manufacturing',
'License :: OSI Approved :: '
'GNU General Public License v3 or later (GPLv3+)',
'Natural Language :: Bulgarian',
'Natural Language :: Catalan',
'Natural Language :: Chinese (Simplified)',
'Natural Language :: Czech',
'Natural Language :: Dutch',
'Natural Language :: English',
'Natural Language :: Finnish',
'Natural Language :: French',
'Natural Language :: German',
'Natural Language :: Hungarian',
'Natural Language :: Indonesian',
'Natural Language :: Italian',
'Natural Language :: Persian',
'Natural Language :: Polish',
'Natural Language :: Portuguese (Brazilian)',
'Natural Language :: Romanian',
'Natural Language :: Russian',
'Natural Language :: Slovenian',
'Natural Language :: Spanish',
'Natural Language :: Turkish',
'Natural Language :: Ukrainian',
'Operating System :: OS Independent',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Programming Language :: Python :: Implementation :: CPython',
'Topic :: Office/Business',
],
license='GPL-3',
python_requires='>=3.8',
install_requires=requires,
zip_safe=False,
entry_points="""
[trytond.modules]
automation = trytond.modules.automation
""",
)

View File

@@ -0,0 +1,8 @@
[tryton]
version=7.2.7
depends:
ir
res
document_incoming
xml:
automation.xml

View File

@@ -0,0 +1,38 @@
<?xml version='1.0'?>
<form>
<group col="2" id="he">
<label name="document"/>
<field name="document"/>
<label name="type"/>
<field name="type"/>
<label name="rule_set"/>
<field name="rule_set"/>
</group>
<notebook>
<page string="OCR" id="ocr">
<field name="ocr_text"/>
</page>
<page string="JSON" id="json">
<field name="structure_json"/>
</page>
<page string="Tables" id="tables">
<field name="tables_json"/>
</page>
<page string="Metadata" id="meta">
<field name="metadata_json"/>
</page>
<page string="Notes" id="notes">
<field name="notes"/>
</page>
</notebook>
<group id="fo">
<label name="state"/>
<field name="state"/>
<button name="run_pipeline"/>
<newline/>
<button name="run_ocr"/>
<button name="run_structure"/>
<button name="run_tables"/>
<button name="run_metadata"/>
</group>
</form>

View File

@@ -0,0 +1,5 @@
<tree>
<field name="document"/>
<field name="type"/>
<field name="rule_set"/>
</tree>

View File

@@ -0,0 +1,12 @@
<form>
<group col="2" id="he">
<field name="name"/>
<field name="document_type"/>
<field name="ocr_required"/>
<field name="structure_required"/>
<field name="table_required"/>
<field name="metadata_required"/>
<field name="python_hook_pre" widget="text"/>
<field name="python_hook_post" widget="text"/>
</group>
</form>

View File

@@ -0,0 +1 @@
# Logique du pipeline central (déjà intégrée dans automation.py)