<?php
namespace App\Parser;

/**
 * No-Composer HTML parser for Salla category/listing pages.
 * Extracts product cards from HTML.
 */
class CategoryParser
{
    /**
     * Returns array of items:
     * [source_product_id, source_url, name, price, main_image_url]
     */
    public function parse(string $html, string $baseUrl = ''): array
    {
        $dom = new \DOMDocument();
        \libxml_use_internal_errors(true);
        $dom->loadHTML($html);
        \libxml_clear_errors();

        $xp = new \DOMXPath($dom);
        $cards = $xp->query("//custom-salla-product-card");
        $items = [];

        if (!$cards) return $items;

        foreach ($cards as $card) {
            /** @var \DOMElement $card */
            $sourceId = $card->getAttribute('id') ?: null;

            // URL
            $url = null;
            $a = $xp->query('.//a[@href]', $card);
            if ($a && $a->length) {
                $url = $a->item(0)->getAttribute('href');
                if ($url && $baseUrl && str_starts_with($url, '/')) {
                    $url = rtrim($baseUrl, '/') . $url;
                }
            }
            if (!$url) continue;

            // Name
            $name = null;
            $nameNode = $xp->query('.//h3[contains(@class,"s-product-card-content-title")]//a', $card);
            if ($nameNode && $nameNode->length) {
                $name = trim($nameNode->item(0)->textContent);
            }

            // Price
            $price = null;
            $priceNode = $xp->query('.//h4[contains(@class,"s-product-card-price")]', $card);
            if ($priceNode && $priceNode->length) {
                $raw = trim($priceNode->item(0)->textContent);
                $raw = str_replace([',', 'ر.س', 'SAR'], '', $raw);
                if (preg_match('/([0-9]+(\.[0-9]+)?)/', $raw, $m)) {
                    $price = $m[1];
                }
            }

            // Image (prefer data-src)
            $mainImage = null;
            $imgNode = $xp->query('.//img', $card);
            if ($imgNode && $imgNode->length) {
                $imgEl = $imgNode->item(0);
                if ($imgEl instanceof \DOMElement) {
                    $u = $imgEl->getAttribute('data-src') ?: $imgEl->getAttribute('src');
                    if (is_string($u) && preg_match('~^https?://~', $u)) {
                        $mainImage = $u;
                    }
                }
            }

            $items[] = [
                'source_product_id' => $sourceId,
                'source_url' => $url,
                'name' => $name,
                'price' => $price,
                'main_image_url' => $mainImage,
            ];
        }

        return $items;
    }
}
