Spaces:
Sleeping
Sleeping
add: preprocess data str
Browse files
src/collect/collect_data.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
def collect_data() -> pd.DataFrame:
|
4 |
+
# まず手打ちでデータを作成
|
5 |
+
with open("../data/one_book.txt", "r") as f:
|
6 |
+
data = f.read().splitlines()
|
7 |
+
|
8 |
+
book_title = data[0]
|
9 |
+
body = "".join(data[1:]).replace("\n", "").replace("##", "\n##")
|
10 |
+
|
11 |
+
data_dict = {"book_title": book_title, "body": body}
|
12 |
+
df = pd.DataFrame(data_dict, index=[0])
|
13 |
+
df.to_csv("../data/articles.csv", index=False)
|
14 |
+
|
15 |
+
if __name__ == "__main__":
|
16 |
+
collect_data()
|
src/data_dummy/dummy_articles.csv
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
book_title,body
|
2 |
+
坊ちゃん,ここに感想記事の本文を貼り付ける
|