diff --git a/app.py b/app.py
index da09da0..42d2693 100644
--- a/app.py
+++ b/app.py
@@ -23,113 +23,6 @@ file_handler.setFormatter(logging.Formatter(
     "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
 ))
 
-# class AHKParser:
-#     lab="AHK"
-#     def parse(self,text):
-#         r=empty_weight_report("AHK")
-#         inv=section(text,"INVOICE WEIGHTS","Bales Weighed")
-#         land=section(text,"Bales Weighed","Outturn")
-#         loss=section(text,"LOSS","Invoice average")
-
-#         r["report"]["reference"]=safe_search(r"(AHK\s*/\S+)",text)
-#         r["report"]["date"]=extract("Produced On",text)
-
-#         r["contract"]["invoice_no"]=extract("Client Reference",text)
-#         r["contract"]["origin"]=extract("Growth",text)
-#         r["contract"]["commodity"]="Raw Cotton"
-
-#         r["parties"]["seller"]=extract("Client",text)
-#         r["parties"]["buyer"]=extract("Buyer",text)
-
-#         r["shipment"]["vessel"]=extract("Vessel",text)
-#         r["shipment"]["bl_no"]=extract("B/L No",text)
-#         r["shipment"]["port_destination"]=extract("Destination",text)
-#         r["shipment"]["arrival_date"]=extract("Arrival Date",text)
-#         r["shipment"]["weighing_method"]=extract("Weighing method",text)
-#         r["shipment"]["bales"]=to_float(extract("Total Bales",text))
-
-#         r["weights"]["gross_landed_kg"]=to_float(extract("Gross",land))
-#         r["weights"]["tare_kg"]=to_float(extract("Tare",land))
-#         r["weights"]["net_landed_kg"]=to_float(extract("Net",land))
-#         r["weights"]["invoice_net_kg"]=to_float(extract("Net",inv))
-#         r["weights"]["gain_loss_kg"]=to_float(extract("kg",loss))
-#         r["weights"]["gain_loss_percent"]=to_float(extract("Percentage",loss))
-#         return r
-# class AHKParser:
-#     lab = "AHK"
-
-#     def extract_table(self, text, headers):
-#         lines = [l.strip() for l in text.splitlines() if l.strip()]
-#         out = {}
-#         for h in headers:
-#             for i,l in enumerate(lines):
-#                 if l == h:
-#                     for j in range(i+1, i+8):
-#                         if j < len(lines) and lines[j].startswith(":"):
-#                             out[h] = lines[j][1:].strip()
-#                             break
-#         return out
-
-#     def extract_weights(self, text):
-#         lines = [l.strip() for l in text.splitlines() if l.strip()]
-#         res = {}
-#         for i,l in enumerate(lines):
-#             if l == "Bales Weighed":
-#                 headers = ["Bales","Gross","Tare","Net"]
-#                 for h in headers:
-#                     for j in range(i, i+20):
-#                         if j < len(lines) and lines[j].startswith(":"):
-#                             res[h] = lines[j][1:].replace("kg","").strip()
-#                             break
-#         return res
-
-#     def parse(self, text):
-#         r = empty_weight_report("AHK")
-
-#         # report
-#         r["report"]["reference"] = safe_search(r"(AHK\s*/[A-Z0-9/]+)", text)
-#         r["report"]["date"]      = safe_search(r"Produced On\s*([0-9A-Za-z ]+)", text)
-
-#         # contract
-#         r["contract"]["invoice_no"] = safe_search(r"Client Reference:\s*([A-Z0-9\- /]+)", text)
-#         r["contract"]["commodity"]  = "Raw Cotton"
-
-#         # buyer
-#         r["parties"]["buyer"] = safe_search(r"Buyer:\s*([A-Z0-9 ().,-]+)", text)
-
-#         # shipment tables
-#         ship = self.extract_table(text, [
-#             "Total Bales","Vessel","Voy. No.","B/L No.","B/L Date","Destination"
-#         ])
-#         ship2 = self.extract_table(text, [
-#             "Growth","Arrival Date","First date of weighing",
-#             "Last Date of Weighing","Weighing method","Tare"
-#         ])
-
-#         r["shipment"]["bales"]            = to_float(ship.get("Total Bales"))
-#         r["shipment"]["vessel"]           = ship.get("Vessel")
-#         r["shipment"]["bl_no"]            = ship.get("B/L No.")
-#         r["shipment"]["port_destination"] = ship.get("Destination")
-#         r["shipment"]["arrival_date"]     = ship2.get("Arrival Date")
-#         r["shipment"]["weighing_method"]  = ship2.get("Weighing method")
-#         r["contract"]["origin"]           = ship2.get("Growth")
-
-#         # weights
-#         inv  = self.extract_table(text, ["Bales","Gross","Tare","Net"])
-#         land = self.extract_weights(text)
-
-#         r["weights"]["invoice_net_kg"]  = to_float(inv.get("Net"))
-#         r["weights"]["gross_landed_kg"] = to_float(land.get("Gross"))
-#         r["weights"]["tare_kg"]         = to_float(land.get("Tare"))
-#         r["weights"]["net_landed_kg"]   = to_float(land.get("Net"))
-
-#         # loss
-#         loss = section(text,"LOSS","Invoice average")
-#         r["weights"]["gain_loss_kg"]      = to_float(safe_search(r"(-?\d+\.?\d*)\s*kg", loss))
-#         r["weights"]["gain_loss_percent"] = to_float(safe_search(r"Percentage\s*:\s*(-?\d+\.?\d*)", loss))
-
-#         return r
-
 class AHKParser:
     lab = "AHK"
 
@@ -205,7 +98,6 @@ class AHKParser:
 
         return r
 
-
 class IntertekParser:
     lab="INTERTEK"
     def parse(self,text):
@@ -354,221 +246,6 @@ predictor = ocr_predictor(pretrained=True)
 
 logger.info("Models loaded successfully.")
 
-import io
-import re
-from datetime import datetime
-from typing import Dict, Any
-import pytesseract
-from pdf2image import convert_from_bytes
-from PIL import Image
-from PyPDF2 import PdfReader
-import json
-
-def parse_cotton_report(ocr_text: str) -> Dict[str, Any]:
-    """
-    Parse structured data from cotton landing report OCR text
-    """
-    result = {
-        "lab": "ALFRED H KNIGHT",
-        "report": {"reference": None, "file_no": None, "date": None},
-        "contract": {"contract_no": None, "invoice_no": None, "lc_no": None, 
-                    "origin": None, "commodity": None},
-        "parties": {"seller": None, "buyer": None, "carrier": None},
-        "shipment": {
-            "vessel": None, "bl_no": None, "port_loading": None,
-            "port_destination": None, "arrival_date": None,
-            "weighing_place": None, "weighing_method": None,
-            "bales": None
-        },
-        "weights": {
-            "gross_landed_kg": None, "tare_kg": None,
-            "net_landed_kg": None, "invoice_net_kg": None,
-            "gain_loss_kg": None, "gain_loss_percent": None
-        }
-    }
-    
-    # Clean the text
-    lines = ocr_text.split('\n')
-    clean_lines = [line.strip() for line in lines if line.strip()]
-    
-    # Extract using patterns
-    text = ocr_text.lower()
-    
-    # 1. Extract report reference and file number
-    ref_match = re.search(r'client reference:\s*([^\n]+)', ocr_text, re.IGNORECASE)
-    if ref_match:
-        result["report"]["reference"] = ref_match.group(1).strip()
-    
-    # Try to get file number from AHK reference
-    ahk_match = re.search(r'ahk s/(\d+)/', ocr_text, re.IGNORECASE)
-    if ahk_match:
-        result["report"]["file_no"] = ahk_match.group(1)
-    
-    # 2. Extract dates
-    date_match = re.search(r'printed date:\s*(\d{1,2}-[a-z]+-\d{4})', text, re.IGNORECASE)
-    if date_match:
-        result["report"]["date"] = date_match.group(1).title()
-    
-    # 3. Extract contract information
-    # Origin/Growth
-    growth_match = re.search(r'growth\s*:?\s*([^\n:]+(?:raw cotton)?)', ocr_text, re.IGNORECASE)
-    if growth_match:
-        origin = growth_match.group(1).strip()
-        result["contract"]["origin"] = origin
-        result["contract"]["commodity"] = "COTTON"
-    
-    # Invoice number from reference
-    if result["report"]["reference"]:
-        inv_match = re.search(r'inv\s*(\d+)', result["report"]["reference"], re.IGNORECASE)
-        if inv_match:
-            result["contract"]["invoice_no"] = inv_match.group(1)
-    
-    # 4. Extract parties
-    # Seller
-    seller_match = re.search(r'client\s*:?\s*([^\n]+)', ocr_text, re.IGNORECASE)
-    if seller_match:
-        # Skip the "Client" label if present
-        seller_text = seller_match.group(1).strip()
-        if not seller_text.lower().startswith('client'):
-            result["parties"]["seller"] = seller_text
-    
-    # Buyer
-    buyer_match = re.search(r'buyer\s*:?\s*([^\n]+)', ocr_text, re.IGNORECASE)
-    if buyer_match:
-        buyer_text = buyer_match.group(1).strip()
-        if not buyer_text.lower().startswith('buyer'):
-            result["parties"]["buyer"] = buyer_text
-    
-    # 5. Extract shipment details
-    # Vessel
-    vessel_match = re.search(r'vessel\s*:?\s*([^\n]+)', ocr_text, re.IGNORECASE)
-    if vessel_match:
-        vessel_text = vessel_match.group(1).strip()
-        if not vessel_text.lower().startswith('vessel'):
-            result["shipment"]["vessel"] = vessel_text
-    
-    # B/L Number
-    bl_match = re.search(r'b/l no\.?\s*:?\s*([^\n]+)', ocr_text, re.IGNORECASE)
-    if bl_match:
-        bl_text = bl_match.group(1).strip()
-        result["shipment"]["bl_no"] = bl_text
-    
-    # Destination
-    dest_match = re.search(r'destination\s*:?\s*([^\n]+)', ocr_text, re.IGNORECASE)
-    if dest_match:
-        dest_text = dest_match.group(1).strip()
-        if not dest_text.lower().startswith('destination'):
-            result["shipment"]["port_destination"] = dest_text
-    
-    # Arrival Date
-    arrival_match = re.search(r'arrival date\s*:?\s*(\d{1,2}-[a-z]+-\d{4})', text, re.IGNORECASE)
-    if arrival_match:
-        result["shipment"]["arrival_date"] = arrival_match.group(1).title()
-    
-    # Weighing method
-    weigh_match = re.search(r'weighing method\s*:?\s*([^\n]+)', ocr_text, re.IGNORECASE)
-    if weigh_match:
-        method_text = weigh_match.group(1).strip()
-        if not method_text.lower().startswith('weighing'):
-            result["shipment"]["weighing_method"] = method_text
-    
-    # Bales count
-    bales_match = re.search(r'total bales\s*:?\s*(\d+)', ocr_text, re.IGNORECASE)
-    if bales_match:
-        result["shipment"]["bales"] = int(bales_match.group(1))
-    
-    # 6. Extract weights (critical section)
-    # Gross Landed Weight
-    gross_match = re.search(r'gross\s*:?\s*([\d,\.]+)\s*kg', ocr_text)
-    if gross_match:
-        # We need the second occurrence (landed weight)
-        all_gross = re.findall(r'gross\s*:?\s*([\d,\.]+)\s*kg', ocr_text)
-        if len(all_gross) >= 2:
-            result["weights"]["gross_landed_kg"] = float(all_gross[1].replace(',', ''))
-    
-    # Tare weight (should be same in both)
-    tare_match = re.search(r'tare\s*:?\s*([\d,\.]+)\s*kg', ocr_text)
-    if tare_match:
-        result["weights"]["tare_kg"] = float(tare_match.group(1).replace(',', ''))
-    
-    # Net weights
-    net_matches = re.findall(r'net\s*:?\s*([\d,\.]+)\s*kg', ocr_text)
-    if len(net_matches) >= 2:
-        result["weights"]["invoice_net_kg"] = float(net_matches[0].replace(',', ''))
-        result["weights"]["net_landed_kg"] = float(net_matches[1].replace(',', ''))
-    
-    # Loss/Gain
-    loss_match = re.search(r'loss\s*:?\s*[-–]?\s*([\d,\.]+)\s*kg', ocr_text, re.IGNORECASE)
-    if loss_match:
-        loss_value = float(loss_match.group(1).replace(',', ''))
-        # Make it negative if not already indicated
-        if '-' not in loss_match.group(0) and '–' not in loss_match.group(0):
-            loss_value = -loss_value
-        result["weights"]["gain_loss_kg"] = loss_value
-    
-    # Percentage
-    percent_match = re.search(r'percentage\s*:?\s*[-–]?\s*([\d,\.]+)%', ocr_text, re.IGNORECASE)
-    if percent_match:
-        percent_value = float(percent_match.group(1).replace(',', ''))
-        if '-' not in percent_match.group(0) and '–' not in percent_match.group(0):
-            percent_value = -percent_value
-        result["weights"]["gain_loss_percent"] = percent_value
-    
-    return result
-
-@app.post("/ocr")
-async def ocr(file: UploadFile):
-    """
-    Enhanced OCR endpoint that returns structured data
-    """
-    logger.info(f"Received structured OCR request: {file.filename}")
-    
-    try:
-        file_data = await file.read()
-        ext = file.filename.lower()
-        
-        ocr_text = ""
-        
-        # Process PDF
-        if ext.endswith(".pdf"):
-            # Try native text extraction first
-            reader = PdfReader(io.BytesIO(file_data))
-            direct_text = "".join(page.extract_text() or "" for page in reader.pages)
-            
-            if direct_text.strip():
-                logger.info("Using native PDF text")
-                ocr_text = direct_text
-            else:
-                # Fallback to OCR
-                logger.info("Using OCR for scanned PDF")
-                images = convert_from_bytes(file_data)
-                for i, img in enumerate(images):
-                    logger.info(f"OCR page {i+1}/{len(images)}")
-                    ocr_text += pytesseract.image_to_string(img) + "\n"
-        else:
-            # Process image
-            img = Image.open(io.BytesIO(file_data))
-            ocr_text = pytesseract.image_to_string(img)
-        
-        # Parse structured data
-        structured_data = parse_cotton_report(ocr_text)
-        
-        return {
-            "success": True,
-            "ocr_text": ocr_text[:1000] + "..." if len(ocr_text) > 1000 else ocr_text,
-            "structured_data": structured_data,
-            "json": json.dumps(structured_data, indent=2, ensure_ascii=False)
-        }
-        
-    except Exception as e:
-        logger.error(f"Structured OCR failed: {e}", exc_info=True)
-        return {
-            "success": False,
-            "error": str(e),
-            "raw_text": "",
-            "structured_data": {}
-        }
-
 # =============================
 # 🧠 Smart OCR
 # =============================
@@ -611,7 +288,91 @@ async def ocr(file: UploadFile):
 #     except Exception as e:
 #         logger.error(f"OCR failed: {e}", exc_info=True)
 #         raise HTTPException(status_code=500, detail=str(e))
-
+@app.post("/ocr")
+async def ocr(file: UploadFile):
+    """
+    Smart PDF processing optimized for cotton landing reports
+    """
+    logger.info(f"Smart OCR request: {file.filename}")
+    
+    try:
+        file_data = await file.read()
+        
+        # Strategy 1: Try pdfplumber (best for digital PDFs)
+        try:
+            with pdfplumber.open(io.BytesIO(file_data)) as pdf:
+                text_parts = []
+                tables_found = []
+                
+                for page in pdf.pages:
+                    # Extract text
+                    page_text = page.extract_text(x_tolerance=2, y_tolerance=2)
+                    if page_text:
+                        text_parts.append(page_text)
+                    
+                    # Look for tables (common in landing reports)
+                    tables = page.extract_tables({
+                        "vertical_strategy": "text",
+                        "horizontal_strategy": "text",
+                        "snap_tolerance": 5,
+                    })
+                    
+                    for table in tables:
+                        if table and len(table) > 1:
+                            tables_found.append(table)
+                
+                combined_text = "\n".join(text_parts)
+                return {"ocr_text": combined_text}
+                # if combined_text.strip():
+                #     logger.info(f"pdfplumber extracted {len(combined_text)} chars")
+                    
+                #     # Try parsing structured data
+                #     structured_data = parse_cotton_report(combined_text)
+                    
+                #     # Check if we got key fields
+                #     if (structured_data.get("shipment", {}).get("bales") and 
+                #         structured_data.get("weights", {}).get("net_landed_kg")):
+                #         logger.info("Successfully parsed structured data from pdfplumber")
+                #         return {
+                #             "method": "pdfplumber",
+                #             "structured_data": structured_data,
+                #             "raw_text_sample": combined_text[:500]
+                #         }
+        
+        except Exception as e:
+            logger.warning(f"pdfplumber attempt: {e}")
+        
+        # Strategy 2: Fallback to OCR for scanned PDFs
+        logger.info("Falling back to OCR...")
+        
+        # Convert PDF to images
+        from pdf2image import convert_from_bytes
+        images = convert_from_bytes(file_data, dpi=200)
+        
+        ocr_results = []
+        for img in images:
+            # Use pytesseract with optimized settings
+            text = pytesseract.image_to_string(
+                img,
+                config='--psm 6 -c preserve_interword_spaces=1'
+            )
+            ocr_results.append(text)
+        
+        ocr_text = "\n".join(ocr_results)
+        structured_data = parse_cotton_report(ocr_text)
+        
+        return {
+            "method": "tesseract_ocr",
+            "structured_data": structured_data,
+            "raw_text_sample": ocr_text[:500]
+        }
+        
+    except Exception as e:
+        logger.error(f"Smart OCR failed: {e}", exc_info=True)
+        return {
+            "error": str(e),
+            "success": False
+        }
 # =============================
 # 🧱 Structure / Layout
 # =============================