from difflib import SequenceMatcher

import numpy as np
import pandas as pd

from src.application.image.image_detection import (
    detect_image_by_ai_model,
    detect_image_by_reverse_search,
    detect_image_from_news_image,
)
from src.application.text.entity import (
    apply_highlight,
    highlight_entities,
)
from src.application.text.helper import extract_equal_text
from src.application.text.model_detection import detect_text_by_ai_model, predict_generation_model
from src.application.text.preprocessing import split_into_paragraphs
from src.application.text.search_detection import (
    PARAPHRASE_THRESHOLD_MACHINE,
    find_paragraph_source,
)


class NewsVerification:
    def __init__(self):
        self.news_text = ""
        self.news_title = ""
        self.news_content = ""
        self.news_image = ""

        self.text_prediction_label: list[str] = ["UNKNOWN"]
        self.text_prediction_score: list[float] = [0.0]
        
        self.image_prediction_label: list[str] = ["UNKNOWN"]
        self.image_prediction_score: list[str] = [0.0]
        self.image_referent_url: list[str] = []
        
        self.news_prediction_label = ""
        self.news_prediction_score = -1

        # news' urls to find img
        self.found_img_url: list[str] = []
        
        # Analyzed results
        self.aligned_paragraphs_df: pd.DataFrame = pd.DataFrame(
            columns=[
                "input",
                "source",
                "label",
                "similarity",
                "paraphrase",
                "url",
                "group",
                "entities",
            ],
        )
        self.grouped_url_df: pd.DataFrame = pd.DataFrame()

        # For formatting ouput tables
        self.ordinary_user_table: list = []
        self.fact_checker_table: list = []
        self.governor_table: list = []

    def load_news(self, news_title, news_content, news_image):
        self.news_text = (news_title + "\n\n" + news_content).strip()
        self.news_title = news_title
        self.news_content = news_content
        self.news_image = news_image

    def determine_text_origin(self):
        self.find_text_source()
        
        # Group inout and source by url
        def concat_text(series):
            return ' '.join(series.astype(str).tolist()) #Handle mixed data types and NaNs
        
        self.grouped_url_df = self.aligned_paragraphs_df.groupby('url').agg(
            {
                'input': concat_text,
                'source': concat_text,
                }
            )
        self.grouped_url_df = self.grouped_url_df.reset_index()
        # Add new columns for label and score
        self.grouped_url_df["label"] = None
        self.grouped_url_df["score"] = None
        
        print(f"aligned_paragraphs_df:\n {self.aligned_paragraphs_df}")
        
        for index, row in self.grouped_url_df.iterrows():
            label, score = self.verify_text(row["url"])
            if label == "UNKNOWN":
                # Concatenate text from "input" in sentence_df
                text = " ".join(row["input"])

                # detect by baseline model
                label, score = detect_text_by_ai_model(text)
                
            self.grouped_url_df.at[index, "label"] = label
            self.grouped_url_df.at[index, "score"] = score

        # Overall label or score for the whole input text
        if len(self.grouped_url_df) > 0:
            # filter self.aligned_paragraphs_df["label"] if inclucind substring MACHINE
            machine_label = self.grouped_url_df[
                self.grouped_url_df["label"].str.contains("MACHINE", case=False, na=False)
            ]
            # machine_label = self.aligned_paragraphs_df[
            #     self.aligned_paragraphs_df["label"] == "MACHINE" 
            # ]
            if len(machine_label) > 0:
                label = " ".join(machine_label["label"].tolist())
                self.text_prediction_label[0] = label
                self.text_prediction_score[0] = machine_label["score"].mean()
            else:
                machine_label = self.aligned_paragraphs_df[
                    self.aligned_paragraphs_df["label"] == "HUMAN"
                ]
                self.text_prediction_label[0] = "HUMAN"
                self.text_prediction_score[0] = machine_label["score"].mean()
        else:  # no source found in the input text 
            print("No source found in the input text")
            text = " ".join(self.aligned_paragraphs_df["input"].tolist())
            # detect by baseline model
            label, score = detect_text_by_ai_model(text)      
            self.text_prediction_label[0] = label
            self.text_prediction_score[0] = score
        
    def find_text_source(self):
        """
        Determines the origin of the given text based on paraphrasing detection
            and human authorship analysis.

        Args:
            text: The input text to be analyzed.

        Returns:
            str: The predicted origin of the text:
                - "HUMAN": If the text is likely written by a human.
                - "MACHINE": If the text is likely generated by a machine.
        """
        print("CHECK TEXT:")
        print("\tFrom search engine:")
        # Classify by search engine
        input_sentences = split_into_paragraphs(self.news_text)

        # Setup df for input_sentences

        for _ in range(len(input_sentences)):
            self.aligned_paragraphs_df = pd.concat(
                [self.aligned_paragraphs_df, pd.DataFrame([{
                    "input": None,
                    "source": None,
                    "label": None,
                    "similarity": None,
                    "paraphrase": None,
                    "url": None,
                    "entities": None,
                }])],
                ignore_index=True,
            )

        # find a source for each paragraph
        for index, _ in enumerate(input_sentences):
            similarity = self.aligned_paragraphs_df.loc[index, "similarity"]
            if similarity is not None:
                if similarity > PARAPHRASE_THRESHOLD_MACHINE:
                    continue

            print(f"\n-------index = {index}-------")
            print(f"current_text = {input_sentences[index]}\n")

            self.aligned_paragraphs_df, img_urls = find_paragraph_source(
                input_sentences,
                index,
                self.aligned_paragraphs_df,
            )

            self.found_img_url.extend(img_urls)

        # determine if the whole source is from a news or not

    def verify_text(self, url):
        label = "UNKNOWN"
        score = 0
        # calculate the average similarity when the similary score in each row of sentences_df is higher than 0.8
        filtered_by_url = self.aligned_paragraphs_df[
            self.aligned_paragraphs_df["url"] == url
        ]
        filtered_by_similarity = filtered_by_url[
            filtered_by_url["similarity"] > 0.8
        ]
        if len(filtered_by_similarity) / len(self.aligned_paragraphs_df) > 0.5:
            # check if "MACHINE" is in self.aligned_sentences_df["label"]:
            contains_machine = filtered_by_similarity["label"].str.contains(
                "MACHINE", case=False, na=False
                ).any()
            if contains_machine:
                label = "MACHINE"
                machine_rows = filtered_by_similarity[
                    filtered_by_similarity["label"].str.contains(
                        "MACHINE",
                        case=False,
                        na=False)
                    ]
                generated_model, _ = predict_generation_model(self.news_text)
                label += f"<br>({generated_model})"
                score = machine_rows["similarity"].mean()
            else:
                label = "HUMAN"
                human_rows = filtered_by_similarity[
                    filtered_by_similarity["label"].str.contains(
                        "HUMAN",
                        case=False,
                        na=False)
                    ]
                score = human_rows["similarity"].mean()
        
        return label, score
        

    def determine_image_origin(self):
        print("CHECK IMAGE:")
        if self.news_image is None:
            self.image_prediction_label = "UNKNOWN"
            self.image_prediction_score = 0.0
            self.image_referent_url = None
            return

        matched_url, similarity = detect_image_from_news_image(
            self.news_image,
            self.found_img_url,
        )
        if matched_url is not None:
            print(f"matched image: {matched_url}\nsimilarity: {similarity}\n")
            self.image_prediction_label = "HUMAN"
            self.image_prediction_score = similarity
            self.image_referent_url = matched_url
            return

        matched_url, similarity = detect_image_by_reverse_search(
            self.news_image,
        )
        if matched_url is not None:
            print(f"matched image: {matched_url}\tScore: {similarity}%\n")
            self.image_prediction_label = "HUMAN"
            self.image_prediction_score = similarity
            self.image_referent_url = matched_url
            return

        detected_label, score = detect_image_by_ai_model(self.news_image)
        if detected_label:
            print(f"detected_label: {detected_label} ({score})")
            self.image_prediction_label = detected_label
            self.image_prediction_score = score
            self.image_referent_url = None
            return

        self.image_prediction_label = "UNKNOWN"
        self.image_prediction_score = 50
        self.image_referent_url = None

    def generate_analysis_report(self):
        if self.news_text != "":
            self.determine_text_origin()
        if self.news_image != "":
            self.determine_image_origin()

    def analyze_details(self):
        self.handle_entities()    
        ordinary_user_table = self.create_ordinary_user_table()
        fact_checker_table = self.create_fact_checker_table()
        governor_table = self.create_governor_table()

        return ordinary_user_table, fact_checker_table, governor_table
    
    def handle_entities(self):        
        entities_with_colors = []
        for index, row in self.grouped_url_df.iterrows():
            # Get entity-words (in pair) with colors
            entities_with_colors = highlight_entities(
                row["input"],
                row["source"],
            )

            #self.grouped_url_df.at[index, "entities"] = entities_with_colors  # must use at
            
            for index, paragraph in self.aligned_paragraphs_df.iterrows():
                if paragraph["url"] == row["url"]:
                    self.aligned_paragraphs_df.at[index, "entities"] = entities_with_colors # must use at


    def get_text_urls(self):
        return set(self.text_referent_url)

    def compare_sentences(self, sentence_1, sentence_2, position, color):
        """
        Compares two sentences and identifies common phrases,
            outputting their start and end positions.

        """

        if not sentence_1 or not sentence_2:  # Handle empty strings
            return []

        s = SequenceMatcher(None, sentence_1, sentence_2)
        common_phrases = []

        for block in s.get_matching_blocks():
            if block.size > 0:  # Ignore zero-length matches
                start_1 = block.a
                end_1 = block.a + block.size
                start_2 = block.b
                end_2 = block.b + block.size

                phrase = sentence_1[
                    start_1:end_1
                ]  # Or sentence_2[start_2:end_2], they are the same

                common_phrases.append(
                    {
                        "phrase": phrase,
                        "start_1": start_1 + position,
                        "end_1": end_1 + position,
                        "start_2": start_2,
                        "end_2": end_2,
                        "color": color,
                    },
                )
        position += len(sentence_1)
        return common_phrases, position

    def create_fact_checker_table(self):
        rows = []
        max_length = 30  # TODO: put this in configuration
        rows.append(self.format_image_fact_checker_row(max_length))

        for _, row in self.aligned_paragraphs_df.iterrows():
            if row["input"] == None:
                continue
            
            if row["source"] == None:
                equal_idx_1 = equal_idx_2 = []
            
            else: # Get index of equal phrases in input and source sentences
                equal_idx_1, equal_idx_2 = extract_equal_text(
                    row["input"],
                    row["source"],
                )

            self.fact_checker_table.append(
                [
                    row,
                    equal_idx_1,
                    equal_idx_2,
                    row["entities"],
                    row["url"]
                ],
            )
        
        previous_url = None
        span_row = 1
        for index, row in enumerate(self.fact_checker_table): 
            current_url = row[4]
            last_url_row = False
            
            # First row or URL change
            if index == 0 or current_url != previous_url:
                first_url_row = True
                previous_url = current_url
                # Increase counter  "span_row" when the next url is the same
                while index + span_row < len(self.fact_checker_table) \
                    and self.fact_checker_table[index + span_row][4] == current_url:
                    span_row += 1
                
            else:
                first_url_row = False
                span_row -= 1
                
            if span_row == 1:
                last_url_row = True
                
            formatted_row = self.format_text_fact_checker_row(row, first_url_row, last_url_row, span_row, max_length)
            rows.append(formatted_row)

        table = "\n".join(rows)
        return f"""
<h5>Comparison between input news and source news:</h5>
<table border="1" style="width:100%; text-align:left;">
<col style="width: 170px;">
<col style="width: 170px;">
<col style="width: 30px;">
<col style="width: 75px;">
    <thead>
        <tr>
            <th>Input news</th>
            <th>Source (URL in Originality)</th>
            <th>Forensic</th>
            <th>Originality</th>
        </tr>
    </thead>
    <tbody>
        {table}
    </tbody>
</table>

<style>
    """

    def format_text_fact_checker_row(
        self,
        row,
        first_url_row=True,
        last_url_row=True,
        span_row=1,
        max_length=30,
    ):
        entity_count = 0
        if row[0]["input"] is None:
            return ""
        if row[0]["source"] is not None:  # source is not empty
            if row[3] is not None:
                # highlight entities
                input_sentence, highlight_idx_input = apply_highlight(
                    row[0]["input"],
                    row[3],
                    "input",
                )
                source_sentence, highlight_idx_source = apply_highlight(
                    row[0]["source"],
                    row[3],
                    "source",
                )
            else:
                input_sentence = row[0]["input"]
                source_sentence = row[0]["source"]
                highlight_idx_input = []
                highlight_idx_source = []
                
            if row[3] is not None:
                entity_count = len(row[3])

            # Color overlapping words
            input_sentence = self.color_text(
                input_sentence,
                row[1],
                highlight_idx_input,
            )  # text, index of highlight words
            source_sentence = self.color_text(
                source_sentence,
                row[2],
                highlight_idx_source,
            )  # text, index of highlight words

            # Replace _ to get correct formatting
            # Original one having _ for correct word counting 
            input_sentence = input_sentence.replace(
                "span_style",
                "span style",
            ).replace("1px_4px", "1px 4px")
            source_sentence = source_sentence.replace(
                "span_style",
                "span style",
            ).replace("1px_4px", "1px 4px")
        else:
            input_sentence = row[0]["input"]
            source_sentence = row[0]["source"]

        url = row[0]["url"]
        # Displayed label and score by url
        filterby_url = self.grouped_url_df[
            self.grouped_url_df["url"] == url
        ]
        if len(filterby_url) > 0:
            label = filterby_url["label"].values[0]
            score = filterby_url["score"].values[0]
        else:        
            label = self.text_prediction_label[0]
            score = self.text_prediction_score[0]

        # Format displayed url
        
        short_url = self.shorten_url(url, max_length)
        source_text_url = f"""<a href="{url}">{short_url}</a>"""

        # Format displayed entity count
        entity_count_text = self.get_entity_count_text(entity_count)
        
        border_top = "border-top: 1px solid transparent;"
        border_bottom = "border-bottom: 1px solid transparent;"
        if first_url_row is True:
            # First & Last the group: no transparent
            if last_url_row is True:
                return f"""
<tr>
    <td>{input_sentence}</td>
    <td>{source_sentence}</td>
    <td rowspan="{span_row}">{label}<br>
    ({score * 100:.2f}%)<br><br>
    {entity_count_text}</td>
    <td rowspan="{span_row}">{source_text_url}</td>
</tr>
"""
            # First row of the group: transparent bottom border
            return f"""
<tr>
    <td style="{border_bottom}";>{input_sentence}</td>
    <td style="{border_bottom}";>{source_sentence}</td>
    <td rowspan="{span_row}">{label}<br>
    ({score * 100:.2f}%)<br><br>
    {entity_count_text}</td>
    <td rowspan="{span_row}">{source_text_url}</td>
</tr>
"""
        else:
            if last_url_row is True:
                # NOT First row, Last row: transparent top border
                return f"""
<tr>
    <td style="{border_top}";>{input_sentence}</td>
    <td style="{border_top}";>{source_sentence}</td>
</tr>
"""
            else:
                # NOT First & NOT Last row: transparent top & bottom borders
                return f"""
<tr>
    <td style="{border_top} {border_bottom}";>{input_sentence}</td>
    <td style="{border_top} {border_bottom}";>{source_sentence}</td>
</tr>
"""

    def format_image_fact_checker_row(self, max_length=30):

        if (
            self.image_referent_url is not None
            or self.image_referent_url != ""
        ):
            source_image = f"""<img src="{self.image_referent_url}" width="100" height="150">"""  # noqa: E501
            short_url = self.shorten_url(self.image_referent_url, max_length)
            source_image_url = (
                f"""<a href="{self.image_referent_url}">{short_url}</a>"""
            )
        else:
            source_image = "Image not found"
            source_image_url = ""

        return f"""<tr><td>input image</td><td>{source_image}</td><td>{self.image_prediction_label}<br>({self.image_prediction_score:.2f}%)</td><td>{source_image_url}</td></tr>"""  # noqa: E501

    def create_ordinary_user_table(self):
        rows = []
        max_length = 30  # TODO: put this in configuration
        rows.append(self.format_image_ordinary_user_row(max_length))
        rows.append(self.format_text_ordinary_user_row(max_length))
        table = "\n".join(rows)

        return f"""
<h5>Comparison between input news and source news:</h5>
<table border="1" style="width:100%; text-align:left;">
<col style="width: 170px;">
<col style="width: 30px;">
<col style="width: 75px;">
    <thead>
        <tr>
            <th>Input news</th>
            <th>Forensic</th>
            <th>Originality</th>
        </tr>
    </thead>
    <tbody>
        {table}
    </tbody>
</table>

<style>
    """

    def format_text_ordinary_user_row(self, max_length=30):
        input_sentences = ""
        source_text_urls = ""
        urls = []
        for _, row in self.aligned_paragraphs_df.iterrows():
            if row["input"] == None:
                continue
            input_sentences += row["input"] + "<br><br>"
            url = row["url"]
            if url not in urls:
                urls.append(url)
                short_url = self.shorten_url(url, max_length)
                source_text_urls += f"""<a href="{url}">{short_url}</a><br>"""

        return f"""
                <tr>
                    <td>{input_sentences}</td>
                    <td>{self.text_prediction_label[0]}<br>
                    ({self.text_prediction_score[0] * 100:.2f}%)</td>
                    <td>{source_text_urls}</td>
                </tr>
                """

    def format_image_ordinary_user_row(self, max_length=30):

        if (
            self.image_referent_url is not None
            or self.image_referent_url != ""
        ):
            short_url = self.shorten_url(self.image_referent_url, max_length)
            source_image_url = (
                f"""<a href="{self.image_referent_url}">{short_url}</a>"""
            )
        else:
            # source_image = "Image not found"
            source_image_url = ""

        return f"""<tr><td>input image</td><td>{self.image_prediction_label}<br>({self.image_prediction_score:.2f}%)</td><td>{source_image_url}</td></tr>"""  # noqa: E501

    def create_governor_table(self):
        rows = []
        max_length = 30  # TODO: put this in configuration
        rows.append(self.format_image_governor_row(max_length))

        for _, row in self.aligned_paragraphs_df.iterrows():
            if row["input"] == None:
                continue
                
            if row["source"] == None:
                equal_idx_1 = equal_idx_2 = []
            
            else:    
                # Get index of equal phrases in input and source sentences
                equal_idx_1, equal_idx_2 = extract_equal_text(
                    row["input"],
                    row["source"],
                )

            self.governor_table.append(
                [
                    row,
                    equal_idx_1,
                    equal_idx_2,
                    row["entities"],
                ],
            )

        formatted_row = self.format_text_governor_row(max_length)
        rows.append(formatted_row)

        table = "\n".join(rows)
        return f"""
<h5>Comparison between input news and source news:</h5>
<table border="1" style="width:100%; text-align:left;">
<col style="width: 170px;">
<col style="width: 170px;">
<col style="width: 30px;">
<col style="width: 75px;">
    <thead>
        <tr>
            <th>Input news</th>
            <th>Source (URL in Originality)</th>
            <th>Forensic</th>
            <th>Originality</th>
        </tr>
    </thead>
    <tbody>
        {table}
    </tbody>
</table>

<style>
        """

    def format_text_governor_row(self, max_length=30):
        input_sentences = ""
        source_sentences = ""
        source_text_urls = ""
        urls = []
        sentence_count = 0
        entity_count = [0, 0]  # to get index of [-2]
        for row in self.governor_table:
            if row[0]["input"] is None:
                continue

            if row[0]["source"] is not None and row[3] is not None:  # source is not empty
                # highlight entities
                input_sentence, highlight_idx_input = apply_highlight(
                    row[0]["input"],
                    row[3],  # entities_with_colors
                    "input",  # key
                    entity_count[-2],  # since the last one is for current counting
                )
                source_sentence, highlight_idx_source = apply_highlight(
                    row[0]["source"],
                    row[3],  # entities_with_colors
                    "source",  # key
                    entity_count[-2],  # since the last one is for current counting
                )

                # Color overlapping words
                input_sentence = self.color_text(
                    input_sentence,
                    row[1],
                    highlight_idx_input,
                )  # text, index of highlight words
                source_sentence = self.color_text(
                    source_sentence,
                    row[2],
                    highlight_idx_source,
                )  # text, index of highlight words

                input_sentence = input_sentence.replace(
                    "span_style",
                    "span style",
                ).replace("1px_4px", "1px 4px")
                source_sentence = source_sentence.replace(
                    "span_style",
                    "span style",
                ).replace("1px_4px", "1px 4px")

            else:
                if row[0]["source"] is None:
                    source_sentence = ""
                else:
                    source_sentence = row[0]["source"]
                input_sentence = row[0]["input"]
                

            # convert score to HUMAN-based score:
            input_sentences += input_sentence + "<br><br>"
            source_sentences += source_sentence + "<br><br>"
            
            url = row[0]["url"]
            if url not in urls:
                urls.append(url)
                short_url = self.shorten_url(url, max_length)
                source_text_urls += f"""<a href="{url}">{short_url}</a><br>"""
                sentence_count += 1
                if row[3] is not None:
                    entity_count.append(len(row[3]))
        
        entity_count_text = self.get_entity_count_text(sum(entity_count))

        return f"""
<tr>
    <td>{input_sentences}</td>
    <td>{source_sentences}</td>
    <td>{self.text_prediction_label[0]}<br>
        ({self.text_prediction_score[0] * 100:.2f}%)<br><br>
        {entity_count_text}</td>
    <td>{source_text_urls}</td>
</tr>
                """

    def format_image_governor_row(self, max_length=30):
        if (
            self.image_referent_url is not None
            or self.image_referent_url != ""
        ):
            source_image = f"""<img src="{self.image_referent_url}" width="100" height="150">"""  # noqa: E501
            short_url = self.shorten_url(self.image_referent_url, max_length)
            source_image_url = (
                f"""<a href="{self.image_referent_url}">{short_url}</a>"""
            )
        else:
            source_image = "Image not found"
            source_image_url = ""

        return f"""<tr><td>input image</td><td>{source_image}</td><td>{self.image_prediction_label}<br>({self.image_prediction_score:.2f}%)</td><td>{source_image_url}</td></tr>"""  # noqa: E501

    def get_entity_count_text(self, entity_count):
        if entity_count <= 0:
            entity_count_text = ""
        elif entity_count == 1:
            entity_count_text = "with 1 altered entity"
        else:
            entity_count_text = "with altered entities"
        return entity_count_text

    def shorten_url(self, url, max_length=30):
        if url is None:
            return ""

        if len(url) > max_length:
            short_url = url[:max_length] + "..."
        else:
            short_url = url
        return short_url

    def color_text(self, text, colored_idx, highlighted_idx):
        paragraph = ""
        words = text.split()

        starts, ends = self.extract_starts_ends(colored_idx)
        starts, ends = self.filter_indices(starts, ends, highlighted_idx)
        
        previous_end = 0
        for start, end in zip(starts, ends):
            paragraph += " ".join(words[previous_end:start])

            equal_words = " ".join(words[start:end])
            paragraph += f" <span style='color:#00FF00;'>{equal_words}</span> "

            previous_end = end

        paragraph += " ".join(words[previous_end:])

        return paragraph

    def extract_starts_ends(self, colored_idx):
        starts = []
        ends = []
        for index in colored_idx:
            starts.append(index["start"])
            ends.append(index["end"])
        return starts, ends

    def filter_indices(self, starts, ends, ignore_indices):
        """
        Filters start and end indices to exclude any indices present in the
            ignore_indices list.

        Args:
            starts: A list of starting indices.
            ends: A list of ending indices. Must be the same length as starts.
            ignore_indices: A list of indices to exclude.

        Returns:
            A tuple of two lists: filtered_starts and filtered_ends.
            Returns empty lists if the input is invalid
                or if all ranges are filtered out.
            Prints error messages for invalid input.

        Examples:
            starts = [0, 5, 10]
            ends = [3, 7, 12]
            ignore_indices = [1, 2, 11, 17]

            # Output:
                starts = [0, 3, 5, 10, 12]
                ends = [0, 3, 7, 10, 12]

        """

        if len(starts) != len(ends):
            print(
                "Error: The 'starts' and 'ends' lists must have the same length.",  # noqa: E501
            )
            return [], []

        filtered_starts = []
        filtered_ends = []

        for i in range(len(starts)):
            start = starts[i]
            end = ends[i]

            if end < start:
                print(
                    f"Error: End index {end} is less than start index {start} at position {i}.",  # noqa: E501
                )
                return [], []

            start_end = list(range(start, end + 1, 1))
            start_end = list(set(start_end) - set(ignore_indices))
            new_start, new_end = self.extract_sequences(start_end)
            filtered_starts.extend(new_start)
            filtered_ends.extend(new_end)

        return filtered_starts, filtered_ends

    def extract_sequences(self, numbers):
        if len(numbers) == 1:
            return [numbers[0]], [numbers[0]]

        numbers.sort()
        starts = []
        ends = []
        for i, number in enumerate(numbers):
            if i == 0:
                start = number
                end = number
                continue

            if number - 1 == numbers[i - 1]:
                end = number
            else:
                starts.append(start)
                ends.append(end)
                start = number
                end = number

            if i == len(numbers) - 1:
                starts.append(start)
                ends.append(end)

        return starts, ends