This commit is contained in:
2026-01-11 16:34:24 +01:00
parent ba853074d0
commit b9604b3c95

25
app.py
View File

@@ -59,31 +59,33 @@ class AHKParser:
lab = "AHK" lab = "AHK"
# ---------- helpers ---------- # ---------- helpers ----------
def clean(self, t): def norm(self, s):
return " ".join(t.replace("\xa0", " ").split()) return s.replace("\xa0", " ").strip()
def find(self, pattern, text): def find(self, pattern, text):
m = re.search(pattern, text, re.I) m = re.search(pattern, text, re.I)
return self.clean(m.group(1)) if m else None return self.norm(m.group(1)) if m else None
def lines(self, text):
return [self.norm(l) for l in text.splitlines() if self.norm(l)]
def block(self, text, labels): def block(self, text, labels):
lines = [self.clean(l) for l in text.splitlines() if self.clean(l)] L = self.lines(text)
idx = [i for i,l in enumerate(lines) if l in labels] idx = [i for i,l in enumerate(L) if l in labels]
values = [] values = []
for i in range(len(idx)): for i in range(len(idx)):
start = idx[i] start = idx[i]
end = idx[i+1] if i+1 < len(idx) else len(lines) end = idx[i+1] if i+1 < len(idx) else len(L)
for j in range(start, end): for j in range(start, end):
if lines[j].startswith(":"): if L[j].startswith(":"):
values.append(lines[j].lstrip(":").strip()) values.append(L[j][1:].strip())
break break
return dict(zip(labels, values)) return dict(zip(labels, values))
# ---------- parser ---------- # ---------- parser ----------
def parse(self, text): def parse(self, text):
r = empty_weight_report("AHK") r = empty_weight_report("AHK")
text = self.clean(text)
# ---------- report ---------- # ---------- report ----------
r["report"]["reference"] = self.find(r"(AHK\s*/\S+)", text) r["report"]["reference"] = self.find(r"(AHK\s*/\S+)", text)
@@ -94,10 +96,9 @@ class AHKParser:
r["contract"]["commodity"] = "Raw Cotton" r["contract"]["commodity"] = "Raw Cotton"
# ---------- parties ---------- # ---------- parties ----------
r["parties"]["buyer"] = self.find(r"Buyer\s*:\s*(.+)", text) r["parties"]["buyer"] = self.find(r"Buyer\s*:\s*(.+)", text)
r["parties"]["seller"] = self.find(r"Client\s*Ref No\.\s*:\s*(.+)", text)
# ---------- shipment block ---------- # ---------- shipment ----------
ship = self.block(text, [ ship = self.block(text, [
"Total Bales","Vessel","Voy. No.","B/L No.","B/L Date","Destination", "Total Bales","Vessel","Voy. No.","B/L No.","B/L Date","Destination",
"Growth","Arrival Date","First date of weighing","Last Date of Weighing", "Growth","Arrival Date","First date of weighing","Last Date of Weighing",