Skip to content

Commit 6c41062

Browse files
author
blmoistawinde
committed
V0.3
1 parent 03dc741 commit 6c41062

130 files changed

Lines changed: 18 additions & 13817 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

README.md

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ HarvestText是一个基于少量种子词和背景知识完成一些领域自适
1616
## 依赖
1717
- jieba
1818
- numpy, pandas
19-
- networkx(可选)
19+
- networkx
2020

2121
## 用法
2222

@@ -258,11 +258,6 @@ from harvesttext import loadHT,saveHT
258258
para = "上港的武磊和恒大的郜林,谁是中国最好的前锋?那当然是武磊武球王了,他是射手榜第一,原来是弱点的单刀也有了进步"
259259
saveHT(ht,"ht_model1")
260260
ht2 = loadHT("ht_model1")
261-
print("cut with loaded model")
262-
print(ht2.seg(para))
263-
ht2.clear()
264-
print("cut with cleared model")
265-
print(ht2.seg(para))
266261

267262
# 消除记录
268263
ht2.clear()

examples/ht_model1

0 Bytes
Binary file not shown.

harvesttext/resources.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
# 此资源被用于以下论文中:
99
# Jun Li and Maosong Sun, Experimental Study on Sentiment Classification of Chinese Review using Machine Learning Techniques, in Proceding of IEEE NLPKE 2007
1010
# 李军 中文评论的褒贬义分类实验研究 硕士论文 清华大学 2008
11+
import os
12+
import json
1113

1214
def get_qh_sent_dict():
1315
"""
@@ -19,25 +21,20 @@ def get_qh_sent_dict():
1921
李军 中文评论的褒贬义分类实验研究 硕士论文 清华大学 2008
2022
:return: qh_sent_dict = {"pos":[words],"neg":[words]}
2123
"""
22-
qh_sent_dict = {"pos":[],"neg":[]}
23-
with open("../resources/sentiment.dict.v1.0/tsinghua.positive.gb.txt","r") as f:
24-
qh_sent_dict["pos"] = f.read().split()
2524

26-
with open("../resources/sentiment.dict.v1.0/tsinghua.negative.gb.txt","r") as f:
27-
qh_sent_dict["neg"] = f.read().split()
25+
pwd = os.path.abspath(os.path.dirname(__file__))
26+
with open(pwd+"/resources/qh_sent_dict.json","r",encoding="utf-8") as f:
27+
qh_sent_dict = json.load(f)
2828
return qh_sent_dict
2929

3030
def get_sanguo():
3131
"""
3232
获得三国演义原文
3333
:return: ["章节1文本","章节2文本",...]
3434
"""
35-
import os
36-
basedir = "../resources/三国演义/"
37-
docs = ["" for i in range(120)]
38-
for i in range(1,121):
39-
with open(basedir + f"{i}.txt", "r" ,encoding="utf-8") as f:
40-
docs[i-1] = f.read().strip()
35+
pwd = os.path.abspath(os.path.dirname(__file__))
36+
with open(pwd+"/resources/sanguo_docs.json","r",encoding="utf-8") as f:
37+
docs = json.load(f)
4138
return docs
4239

4340
def get_sanguo_entity_dict():
@@ -47,7 +44,9 @@ def get_sanguo_entity_dict():
4744
:return: entity_mention_dict,entity_type_dict
4845
"""
4946
import json
50-
with open("../resources/sanguo_entity_dict.json","r",encoding="utf-8") as f:
47+
pwd = os.path.abspath(os.path.dirname(__file__))
48+
with open(pwd+"/resources/sanguo_entity_dict.json","r",encoding="utf-8") as f:
5149
entity_dict = json.load(f)
5250
return entity_dict["mention"], entity_dict["type"]
5351

52+

harvesttext/resources/qh_sent_dict.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

harvesttext/resources/sanguo_docs.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.
File renamed without changes.

resources/sentiment.dict.v1.0/readme.txt

Lines changed: 0 additions & 5 deletions
This file was deleted.

0 commit comments

Comments
 (0)