This commit is contained in:
2026-01-11 16:46:46 +01:00
parent def61f18a2
commit 3bf842ae0d

47
app.py
View File

@@ -58,21 +58,30 @@ file_handler.setFormatter(logging.Formatter(
class AHKParser:
lab = "AHK"
def grab(self, text, labels):
def extract_table(self, text, headers):
lines = [l.strip() for l in text.splitlines() if l.strip()]
idx = [i for i,l in enumerate(lines) if l in labels]
if not idx:
return {}
out = {}
for h in headers:
for i,l in enumerate(lines):
if l == h:
for j in range(i+1, i+8):
if j < len(lines) and lines[j].startswith(":"):
out[h] = lines[j][1:].strip()
break
return out
values = []
start = idx[-1] + 1
for l in lines[start:]:
if l.startswith(":"):
values.append(l[1:].strip())
if len(values) == len(labels):
break
return dict(zip(labels, values))
def extract_weights(self, text):
lines = [l.strip() for l in text.splitlines() if l.strip()]
res = {}
for i,l in enumerate(lines):
if l == "Bales Weighed":
headers = ["Bales","Gross","Tare","Net"]
for h in headers:
for j in range(i, i+20):
if j < len(lines) and lines[j].startswith(":"):
res[h] = lines[j][1:].replace("kg","").strip()
break
return res
def parse(self, text):
r = empty_weight_report("AHK")
@@ -85,14 +94,14 @@ class AHKParser:
r["contract"]["invoice_no"] = safe_search(r"Client Reference:\s*([A-Z0-9\- /]+)", text)
r["contract"]["commodity"] = "Raw Cotton"
# parties
# buyer
r["parties"]["buyer"] = safe_search(r"Buyer:\s*([A-Z0-9 ().,-]+)", text)
# shipment block
ship = self.grab(text, [
# shipment tables
ship = self.extract_table(text, [
"Total Bales","Vessel","Voy. No.","B/L No.","B/L Date","Destination"
])
ship2 = self.grab(text, [
ship2 = self.extract_table(text, [
"Growth","Arrival Date","First date of weighing",
"Last Date of Weighing","Weighing method","Tare"
])
@@ -106,8 +115,8 @@ class AHKParser:
r["contract"]["origin"] = ship2.get("Growth")
# weights
inv = self.grab(text, ["Bales","Gross","Tare","Net"])
land = self.grab(section(text,"Bales Weighed","Outturn"),["Bales","Gross","Tare","Net"])
inv = self.extract_table(text, ["Bales","Gross","Tare","Net"])
land = self.extract_weights(text)
r["weights"]["invoice_net_kg"] = to_float(inv.get("Net"))
r["weights"]["gross_landed_kg"] = to_float(land.get("Gross"))