Spaces:

GotoUsuke
/

GraphRag

Running

App Files Files Community

GraphRag / GraphRAG-Ollama-UI /graphrag /tests /unit /indexing /verbs /text /test_split.py

GotoUsuke

Upload folder using huggingface_hub

db4a26f verified 7 months ago

raw

history blame contribute delete

1.89 kB

	# Copyright (c) 2024 Microsoft Corporation.
	# Licensed under the MIT License
	import unittest

	import pandas as pd
	import pytest

	from graphrag.index.verbs.text.split import text_split_df


	class TestTextSplit(unittest.TestCase):
	def test_empty_string(self):
	input = pd.DataFrame([{"in": ""}])
	result = text_split_df(input, "in", "out", ",").to_dict(orient="records")

	assert len(result) == 1
	assert result[0]["out"] == []

	def test_string_without_seperator(self):
	input = pd.DataFrame([{"in": "test_string_without_seperator"}])
	result = text_split_df(input, "in", "out", ",").to_dict(orient="records")

	assert len(result) == 1
	assert result[0]["out"] == ["test_string_without_seperator"]

	def test_string_with_seperator(self):
	input = pd.DataFrame([{"in": "test_1,test_2"}])
	result = text_split_df(input, "in", "out", ",").to_dict(orient="records")

	assert len(result) == 1
	assert result[0]["out"] == ["test_1", "test_2"]

	def test_row_with_list_as_column(self):
	input = pd.DataFrame([{"in": ["test_1", "test_2"]}])
	result = text_split_df(input, "in", "out", ",").to_dict(orient="records")

	assert len(result) == 1
	assert result[0]["out"] == ["test_1", "test_2"]

	def test_non_string_column_throws_error(self):
	input = pd.DataFrame([{"in": 5}])
	with pytest.raises(TypeError):
	text_split_df(input, "in", "out", ",").to_dict(orient="records")

	def test_more_than_one_row_returns_correctly(self):
	input = pd.DataFrame([{"in": "row_1_1,row_1_2"}, {"in": "row_2_1,row_2_2"}])
	result = text_split_df(input, "in", "out", ",").to_dict(orient="records")

	assert len(result) == 2
	assert result[0]["out"] == ["row_1_1", "row_1_2"]
	assert result[1]["out"] == ["row_2_1", "row_2_2"]