From dcf51876a571fd6542867d351b6c5a8afebebb1a Mon Sep 17 00:00:00 2001
From: Karim shoair <D4Vinci@users.noreply.github.com>
Date: Wed, 11 Dec 2024 20:18:05 +0200
Subject: [PATCH] fix: Adaptor.body returns raw HTML without processing

If possible, otherwise returns `Adaptor.html_content`
---
 scrapling/parser.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/scrapling/parser.py b/scrapling/parser.py
index 7171080..123fc6b 100644
--- a/scrapling/parser.py
+++ b/scrapling/parser.py
@@ -25,7 +25,7 @@ class Adaptor(SelectorsGeneration):
     __slots__ = (
         'url', 'encoding', '__auto_match_enabled', '_root', '_storage', '__debug',
         '__keep_comments', '__huge_tree_enabled', '__attributes', '__text', '__tag',
-        '__keep_cdata',
+        '__keep_cdata', '__raw_body'
     )
 
     def __init__(
@@ -73,17 +73,20 @@ def __init__(
             raise ValueError("Adaptor class needs text, body, or root arguments to work")
 
         self.__text = None
+        self.__raw_body = ''
         if root is None:
             if text is None:
                 if not body or not isinstance(body, bytes):
                     raise TypeError(f"body argument must be valid and of type bytes, got {body.__class__}")
 
                 body = body.replace(b"\x00", b"").strip()
+                self.__raw_body = body.replace(b"\x00", b"").strip().decode()
             else:
                 if not isinstance(text, str):
                     raise TypeError(f"text argument must be of type str, got {text.__class__}")
 
                 body = text.strip().replace("\x00", "").encode(encoding) or b"<html/>"
+                self.__raw_body = text.strip()
 
             # https://lxml.de/api/lxml.etree.HTMLParser-class.html
             parser = html.HTMLParser(
@@ -264,7 +267,10 @@ def html_content(self) -> str:
         """Return the inner html code of the element"""
         return etree.tostring(self._root, encoding='unicode', method='html', with_tail=False)
 
-    body = html_content
+    @property
+    def body(self) -> str:
+        """Return raw HTML code of the element/page without any processing when possible or return `Adaptor.html_content`"""
+        return self.__raw_body or self.html_content
 
     def prettify(self) -> str:
         """Return a prettified version of the element's inner html-code"""