misc
Loading...
Searching...
No Matches
write_publication_html.py
1import json
2import tomllib
3
4def read_config(path):
5 with open(path, 'rb') as f:
6 config = tomllib.load(f)
7 return config
8
9def extract_structure(jsonl_file_path):
10 structure = set() # 重複しないキーを保持するためにセットを使用
11
12 with open(jsonl_file_path, 'r', encoding='utf-8') as file:
13 for line in file:
14 # 各行をJSONとして読み込む
15 json_obj = json.loads(line.strip())
16 # JSONオブジェクトのキーをセットに追加
17 structure.update(json_obj.keys())
18
19 return structure
20
21def extract_key_contents(jsonl_file_path, key_val):
22 insert_contents = []
23
24 with open(jsonl_file_path, 'r', encoding='utf-8') as file:
25 for line in file:
26 # 各行をJSONとして読み込む
27 json_obj = json.loads(line.strip())
28 # 'insert'キーが存在する場合、その中身を取り出す
29 if key_val in json_obj:
30 insert_contents.append(json_obj[key_val])
31
32 return insert_contents
33
34def check_structure(jsonl_file_path):
35 structure = extract_structure(jsonl_file_path)
36 print("JSONLファイルの構造(キーの一覧):")
37 for key in structure:
38 print(f"- {key}")
39
40def check_key(jsonl_file_path,key_val):
41 insert_data = extract_key_contents(jsonl_file_path,key_val)
42 print("JSONLファイル内の",key_val,"キーの中身:")
43 for content in insert_data:
44 print(content)
45
46def get_content(meta, data, keyname, label, lang):
47 lang_inv = 'en' if lang == 'ja' else 'ja'
48 result=''
49 flag = lang
50 if label in data and keyname in meta['type'] :
51 if lang in data[label]:
52 result = data[label][lang]
53 flag = lang
54 else:
55 result = data[label][lang_inv]
56 flag = lang_inv
57 return result
58
59def get_content_without_lang(meta, data, keyname, label):
60 result=''
61 if label in data and keyname in meta['type'] :
62 result = data[label]
63
64 return result
65
66
67def dump_content_with_lang(meta,data,keyname,lists,lang):
68 all_data=[]
69 for i in range(len(data)):
70 row_data = []
71 for j in range(len(lists)):
72 row_data.append( get_content(meta[i], data[i], keyname, lists[j], lang) )
73
74 if len(row_data[0]) > 1:
75 all_data.append(row_data)
76 return all_data
77
78def dump_content_without_lang(meta,data,keyname,lists,):
79 all_data=[]
80 for i in range(len(data)):
81 row_data = []
82 for j in range(len(lists)):
83 row_data.append( get_content_without_lang(meta[i], data[i], keyname, lists[j]) )
84
85 if len(row_data[0]) > 1:
86 all_data.append(row_data)
87
88 return all_data
89
90def dump_paper_misc_list(meta,data,pmflag):
91 data1 = dump_content_with_lang(meta,data,pmflag,['paper_title','authors','publication_name'],'en')
92 data2 = dump_content_without_lang(meta,data,pmflag,['volume','publication_date'])
93
94 print(f"len(data1): {len(data1)}")
95 print(f"len(data2): {len(data2)}")
96
97 result_all = []
98
99 for k in range(len(data1)):
100 result_i = []
101 output1 = []
102 for j in range(len(data1[k])):
103 if j == 1:
104 authors_list = []
105 for i in range(len(data1[k][1])):
106 authors_list.append(data1[k][1][i]['name'])
107 if(authors_list[i]=='F. Endo'):
108 ref = i
109 elif(authors_list[i]=='遠藤史隆'):
110 ref = i
111 if ref == 0:
112 #output1.append(f"<u>{authors_list[ref]}({str(ref+1)}-st)</u>")
113 authors_list[ref] = f"<u>{authors_list[ref]}({str(ref+1)}-st)</u>"
114 elif ref == 1:
115 #output1.append(f"<u>{authors_list[ref]}({str(ref+1)}-nd)</u>")
116 authors_list[ref] = f"<u>{authors_list[ref]}({str(ref+1)}-nd)</u>"
117 elif ref == 2:
118 #output1.append(f"<u>{authors_list[ref]}({str(ref+1)}-rd)</u>")
119 authors_list[ref] = f"<u>{authors_list[ref]}({str(ref+1)}-rd)</u>"
120 else:
121 #output1.append(f"<u>{authors_list[ref]}({str(ref+1)}-th)</u>")
122 authors_list[ref] = f"<u>{authors_list[ref]}({str(ref+1)}-th)</u>"
123
124 output1.append(', '.join(map(str, authors_list)))
125
126 elif j == 0:
127 output1.append(f"<b>{data1[k][j]}</b>")
128 else:
129 output1.append(f"{data1[k][j]}")
130 output2 = []
131
132
133 print(k,data1[k][0],data2[k])
134 for j in range(len(data2[k])):
135
136 if j == 1:
137 output2.append(f"({data2[k][j][:4]})")
138 else:
139 output2.append(f"<b>{data2[k][j]}</b>")
140
141 if False:
142 print(f"\t<p>")
143 print(f"\t\t<span class=\"toggle-btn\">")
144 print(f"\t\t\t{output1[0]}")
145 print(f"\t\t</span>")
146 print(f"\t\t<span class=\"dots\">...</span>")
147 print(f"\t\t<span class=\"moreContent\">")
148 print(f"\t\t\t<br/>")
149 print(f"\t\t\t{output1[1]}")
150 print(f"\t\t\t<br/>")
151 print(f"\t\t\t{output1[2]} {output2[0]} {output2[1]}")
152 print(f"\t\t</span>")
153 print(f"\t</p>")
154
155 result_i.append(f"\t<p>")
156 result_i.append(f"\t\t<span class=\"toggle-btn\">")
157 result_i.append(f"\t\t\t{output1[0]}")
158 result_i.append(f"\t\t</span>")
159 result_i.append(f"\t\t<span class=\"dots\">...</span>")
160 result_i.append(f"\t\t<span class=\"moreContent\">")
161 result_i.append(f"\t\t\t<br/>")
162 result_i.append(f"\t\t\t{output1[1]}")
163 result_i.append(f"\t\t\t<br/>")
164 result_i.append(f"\t\t\t{output1[2]} {output2[0]} {output2[1]}")
165 result_i.append(f"\t\t</span>")
166 result_i.append(f"\t</p>")
167
168 result_all.append(result_i)
169
170 return result_all
171
172
173def dump_presentations_list(meta,data,pmflag):
174 data1 = dump_content_with_lang(meta,data,pmflag,['presentation_title','presenters','event'],'en')
175 data2 = dump_content_without_lang(meta,data,pmflag,['publication_date'])
176 result_all = []
177
178 for k in range(len(data1)):
179 result_i = []
180 output1 = []
181 for j in range(len(data1[k])):
182 if j == 1:
183 authors_list = []
184 for i in range(len(data1[k][1])):
185 authors_list.append(data1[k][1][i]['name'])
186 if(authors_list[i]=='F. Endo'):
187 ref = i
188 elif(authors_list[i]=='Fumitaka ENDO'):
189 ref = i
190 elif(authors_list[i]=='遠藤 史隆'):
191 ref = i
192 elif(authors_list[i]=='遠藤史隆'):
193 ref = i
194 if ref == 0:
195 #output1.append(f"<u>{authors_list[ref]}({str(ref+1)}-st)</u>")
196 authors_list[ref] = f"<u>{authors_list[ref]}({str(ref+1)}-st)</u>"
197 elif ref == 1:
198 #output1.append(f"<u>{authors_list[ref]}({str(ref+1)}-nd)</u>")
199 authors_list[ref] = f"<u>{authors_list[ref]}({str(ref+1)}-nd)</u>"
200 elif ref == 2:
201 #output1.append(f"<u>{authors_list[ref]}({str(ref+1)}-rd)</u>")
202 authors_list[ref] = f"<u>{authors_list[ref]}({str(ref+1)}-rd)</u>"
203 else:
204 #output1.append(f"<u>{authors_list[ref]}({str(ref+1)}-th)</u>")
205 authors_list[ref] = f"<u>{authors_list[ref]}({str(ref+1)}-th)</u>"
206
207 output1.append(', '.join(map(str, authors_list)))
208
209 elif j == 0:
210 output1.append(f"<b>{data1[k][j]}</b>")
211 else:
212 output1.append(data1[k][j])
213 output2 = []
214 for j in range(len(data2[k])):
215 if j == 0:
216 output2.append(f"({data2[k][j][:4]}/{data2[k][j][5:7]})")
217 else:
218 output2.append(data2[k][j])
219
220 if False:
221 print(f"\t<p>")
222 print(f"\t\t<span class=\"toggle-btn\">")
223 print(f"\t\t\t{output1[0]}")
224 print(f"\t\t</span>")
225 print(f"\t\t<span class=\"dots\">...</span>")
226 print(f"\t\t<span class=\"moreContent\">")
227 print(f"\t\t\t<br/>")
228 print(f"\t\t\t{output1[1]}")
229 print(f"\t\t\t<br/>")
230 print(f"\t\t\t{output1[2]} {output2[0]}")
231 print(f"\t\t</span>")
232 print(f"\t</p>")
233
234 result_i.append(f"\t<p>")
235 result_i.append(f"\t\t<span class=\"toggle-btn\">")
236 result_i.append(f"\t\t\t{output1[0]}")
237 result_i.append(f"\t\t</span>")
238 result_i.append(f"\t\t<span class=\"dots\">...</span>")
239 result_i.append(f"\t\t<span class=\"moreContent\">")
240 result_i.append(f"\t\t\t<br/>")
241 result_i.append(f"\t\t\t{output1[1]}")
242 result_i.append(f"\t\t\t<br/>")
243 result_i.append(f"\t\t\t{output1[2]} {output2[0]}")
244 result_i.append(f"\t\t</span>")
245 result_i.append(f"\t</p>")
246
247 result_all.append(result_i)
248
249 return result_all
250
251def separate_paper_mist_1st_author(data):
252
253 data_1st = []
254 data_other = []
255
256 for j in range(len(data)):
257 test0 = data[j][7].replace('<u>', '')
258 test1 = test0.replace('</u>', '')
259 test2 = test1.split(',')
260
261 for i in range(len(test2)):
262 if 'F. Endo' in test2[i]:
263 ref = i
264 if 'Fumitaka ENDO' in test2[i]:
265 ref = i
266 if '遠藤 史隆' in test2[i]:
267 ref = i
268 if '遠藤史隆' in test2[i]:
269 ref = i
270
271 if ref+1 == 1:
272 data_1st.append(data[j])
273 else:
274 data_other.append(data[j])
275
276 return data_1st,data_other
277
278
279def write_with_hugo_format():
280
281 config = read_config("param/publication.toml")
282 input_path = config["publication"]["input"].strip()
283 output_ja_path = config["publication"]["ja"]["output"].strip()
284 output_en_path = config["publication"]["en"]["output"].strip()
285
286 jsonl_file_path = input_path
287 #check_structure(jsonl_file_path)
288 #check_key(jsonl_file_path,'merge')
289
290 data = extract_key_contents(jsonl_file_path,'merge')
291 meta = extract_key_contents(jsonl_file_path,'insert')
292
293 paper = dump_paper_misc_list(meta,data,'published_papers')
294 misc = dump_paper_misc_list(meta,data,'misc')
295 prese = dump_presentations_list(meta,data,'presentations')
296
297 p1st, poth = separate_paper_mist_1st_author(paper)
298
299 #for i in range(len(p1st)):
300 # print(p1st[i][2])
301
302 #for i in range(len(prese[0])):
303 # print(i,prese[0][i])
304
305 #for i in range(len(misc)):
306 # print(paper[i][9][-5:-1])
307
308 f = open(output_ja_path, 'w')
309 f.write(f"+++\n")
310 f.write(f"archetype = \"theming\"\n")
311 f.write(f"weight = 2 \n")
312 f.write(f"title = \"Publications\"\n")
313 f.write(f"+++\n")
314 f.write(f"\n")
315 f.write(f"<!DOCTYPE html>\n")
316 f.write(f"<html lang=\"ja\">\n")
317 f.write(f"<head>\n")
318 f.write(f"\t<style>\n")
319 f.write(f"\t\t.moreContent {{\n")
320 f.write(f"\t\t\tdisplay: none;\n")
321 f.write(f"\t\t}}\n")
322 f.write(f"\t\t.btn {{\n")
323 f.write(f"\t\t\tcursor: pointer;\n")
324 f.write(f"\t\t\tbackground-color: transparent;\n")
325 f.write(f"\t\t\tborder: none;\n")
326 f.write(f"\t\t\tfont-size: 16px;\n")
327 f.write(f"\t\t}}\n")
328 f.write(f"\t</style>\n")
329 f.write(f"</head>\n")
330 f.write(f"<body>\n")
331
332
333 f.write(f"\t<p>\n")
334 f.write(f"\t<a href=\"https://researchmap.jp/FumitakaENDO\" target=\"_blank\" rel=\"noopener noreferrer\">research map</a>\n")
335 f.write(f"\tをもとに作成した業績リスト\n")
336 f.write(f"\t</p>\n")
337 f.write(f"\t<p>タイトルをクリックすると共著者や雑誌名が表示されます。</p>\n")
338
339 f.write(f"\t<h3>学術雑誌に発表した論文 (筆頭著者)</h3>\n")
340 for i in range(len(p1st)):
341 for j in range(len(p1st[i])):
342 if j == 2:
343 f.write(f"{p1st[i][j][:3]}[{i+1}] {p1st[i][j][3:]}")
344 else:
345 f.write(f"{p1st[i][j]}\n")
346
347 f.write(f"\t<h3>学術雑誌に発表した論文 (共著者)</h3>\n")
348 for i in range(len(poth)):
349 for j in range(len(poth[i])):
350 if j == 2:
351 f.write(f"{poth[i][j][:3]}[{i+1}] {poth[i][j][3:]}")
352 else:
353 f.write(f"{poth[i][j]}\n")
354
355 f.write(f"\t<h3>一般講演・招待講演</h3>\n")
356 for i in range(len(prese)):
357 for j in range(len(prese[i])):
358 if j == 2:
359 f.write(f"{prese[i][j][:3]}[{i+1}] {prese[i][j][3:]}")
360 else:
361 f.write(f"{prese[i][j]}\n")
362
363 f.write(f"\t<h3>MISC</h3>\n")
364 for i in range(len(misc)):
365 for j in range(len(misc[i])):
366 if j == 2:
367 f.write(f"{misc[i][j][:3]}[{i+1}] {misc[i][j][3:]}")
368 else:
369 f.write(f"{misc[i][j]}\n")
370
371 f.write(f"\t<script src=\"dump_all_info.js\"></script>\n")
372 f.write(f"</body>\n")
373 f.write(f"</html>\n")
374 f.close()
375
376 f = open(output_en_path, 'w')
377 f.write(f"+++\n")
378 f.write(f"archetype = \"theming\"\n")
379 f.write(f"weight = 2 \n")
380 f.write(f"title = \"Publications\"\n")
381 f.write(f"+++\n")
382 f.write(f"\n")
383 f.write(f"<!DOCTYPE html>\n")
384 f.write(f"<html lang=\"en\">\n")
385 f.write(f"<head>\n")
386 f.write(f"\t<style>\n")
387 f.write(f"\t\t.moreContent {{\n")
388 f.write(f"\t\t\tdisplay: none;\n")
389 f.write(f"\t\t}}\n")
390 f.write(f"\t\t.btn {{\n")
391 f.write(f"\t\t\tcursor: pointer;\n")
392 f.write(f"\t\t\tbackground-color: transparent;\n")
393 f.write(f"\t\t\tborder: none;\n")
394 f.write(f"\t\t\tfont-size: 16px;\n")
395 f.write(f"\t\t}}\n")
396 f.write(f"\t</style>\n")
397 f.write(f"</head>\n")
398 f.write(f"<body>\n")
399 f.write(f"\t<p>\n")
400 f.write(f"\tList of achievements created from \n")
401 f.write(f"\t<a href=\"https://researchmap.jp/FumitakaENDO\" target=\"_blank\" rel=\"noopener noreferrer\">research map</a>\n")
402 f.write(f"\t</p>\n")
403 f.write(f"\t<p>Click on the title will display the co-authors, the journal name and so on.</p>\n")
404
405 f.write(f"\t<h3>Major Papers (1st author)</h3>\n")
406 for i in range(len(p1st)):
407 for j in range(len(p1st[i])):
408 if j == 2:
409 f.write(f"{p1st[i][j][:3]}[{i+1}] {p1st[i][j][3:]}")
410 else:
411 f.write(f"{p1st[i][j]}\n")
412
413 f.write(f"\t<h3>Major Papers (co-author)</h3>\n")
414 for i in range(len(poth)):
415 for j in range(len(poth[i])):
416 if j == 2:
417 f.write(f"{poth[i][j][:3]}[{i+1}] {poth[i][j][3:]}")
418 else:
419 f.write(f"{poth[i][j]}\n")
420
421 f.write(f"\t<h3>Presentation</h3>\n")
422 for i in range(len(prese)):
423 for j in range(len(prese[i])):
424 if j == 2:
425 f.write(f"{prese[i][j][:3]}[{i+1}] {prese[i][j][3:]}")
426 else:
427 f.write(f"{prese[i][j]}\n")
428
429 f.write(f"\t<h3>MISC</h3>\n")
430 for i in range(len(misc)):
431 for j in range(len(misc[i])):
432 if j == 2:
433 f.write(f"{misc[i][j][:3]}[{i+1}] {misc[i][j][3:]}")
434 else:
435 f.write(f"{misc[i][j]}\n")
436
437 f.write(f"\t<script src=\"dump_all_info.js\"></script>\n")
438 f.write(f"</body>\n")
439 f.write(f"</html>\n")
440 f.close()
441
442
443if __name__ == '__main__':
444 config = read_config("param/publication.toml")
445 input_path = config["publication"]["input"].strip()
446 output_ja_path = config["publication"]["ja"]["output"].strip()
447 output_en_path = config["publication"]["en"]["output"].strip()
448
449 print("Input:", input_path)
450 print("JA Output:", output_ja_path)
451 print("EN Output:", output_en_path)