1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
|
import os
import json
import re
import yaml
import markdown
from datetime import datetime
def parse_markdown(filepath):
with open(filepath, 'r', encoding='utf-8') as f:
text = f.read()
match = re.match(r'^---\n(.*?)\n---\n(.*)', text, re.DOTALL)
if not match:
print(f"⚠️ 跳过文件:{filepath},没有找到 front matter")
return None
front_matter_text, content_part = match.groups()
try:
front_matter = yaml.safe_load(front_matter_text)
except Exception as e:
print(f"⚠️ YAML 解析错误:{filepath} -> {e}")
return None
title = front_matter.get('title')
raw_date = front_matter.get('date')
tags = front_matter.get('tags', [])
if not title or not raw_date or not content_part.strip():
print(f"⚠️ 数据不完整,跳过文件:{filepath} -> title: {title}, date: {raw_date}, content_length: {len(content_part.strip())}")
return None
try:
if isinstance(raw_date, datetime):
dt = raw_date
elif isinstance(raw_date, str):
try:
dt = datetime.fromisoformat(raw_date.replace("Z", "+00:00"))
except ValueError:
try:
dt = datetime.strptime(raw_date, "%Y-%m-%d %H:%M:%S")
except ValueError:
dt = datetime.strptime(raw_date, "%Y-%m-%dT%H:%M:%S%z")
else:
print(f"⚠️ 日期格式无法识别,跳过:{filepath} -> date: {raw_date}")
return None
except Exception as e:
print(f"⚠️ 日期解析失败:{filepath} -> {e}")
return None
date_str = dt.strftime('%Y-%m-%dT%H:%M:%S.000Z')
html_content = markdown.markdown(content_part.strip(), extensions=['extra', 'codehilite', 'tables'])
return {
'title': title,
'slug': os.path.splitext(os.path.basename(filepath))[0],
'html': html_content,
'created_at': date_str,
'updated_at': date_str,
'tags': tags
}
def walk_md_files(base_dir):
for root, _, files in os.walk(base_dir):
for file in files:
if file.endswith('.md'):
yield os.path.join(root, file)
def main():
base_dir = './'
print(f"开始遍历目录:{base_dir}")
posts = []
tags_set = set()
for filepath in walk_md_files(base_dir):
print(f"📄 正在处理文件:{filepath}")
post = parse_markdown(filepath)
if post:
post_tags = []
for tag_name in post['tags']:
tag_str = str(tag_name).strip()
if tag_str:
tags_set.add(tag_str)
post_tags.append(tag_str)
post['tags'] = post_tags
posts.append(post)
ghost_data = {
"meta": {
"exported_on": int(datetime.now().timestamp() * 1000),
"version": "5.0.0"
},
"data": {
"posts": [],
"tags": [],
"posts_tags": [],
"users": [
{
"id": 1,
"name": "admin",
"slug": "admin",
"email": "admin@example.com"
}
]
}
}
for i, post in enumerate(posts, start=1):
ghost_data["data"]["posts"].append({
"id": i,
"title": post["title"],
"slug": post["slug"],
"html": post["html"],
"status": "published",
"created_at": post["created_at"],
"updated_at": post["updated_at"],
"published_at": post["created_at"],
"author_id": 1
})
tag_id_map = {tag: idx + 1 for idx, tag in enumerate(tags_set)}
for tag, tag_id in tag_id_map.items():
ghost_data["data"]["tags"].append({
"id": tag_id,
"name": tag,
"slug": tag.lower().replace(" ", "-")
})
for post_id, post in enumerate(posts, start=1):
for tag in post["tags"]:
ghost_data["data"]["posts_tags"].append({
"post_id": post_id,
"tag_id": tag_id_map[tag]
})
with open("ghost_import.json", "w", encoding="utf-8") as f:
json.dump(ghost_data, f, ensure_ascii=False, indent=2)
print("✅ 已生成 ghost_import.json 文件")
if __name__ == "__main__":
main()
|