Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
Clark Lin
jupyter-genai
Commits
0176e840
Commit
0176e840
authored
Nov 25, 2024
by
Administrator
Browse files
added QA LLM in retrieve script
parent
7f1e6463
Changes
2
Hide whitespace changes
Inline
Side-by-side
notebooks/embed-with-hf-local.ipynb
View file @
0176e840
...
...
@@ -150,26 +150,52 @@
metadatas
=
[{
"chunk"
:
idx
}],
embeddings
=
[
doc
]
)
```
%%%% Output: display_data
%% Cell type:code id:e7d24e7f tags:
```
python
# 释放内存
del
model
import
gc
gc
.
collect
()
```
%%%% Output: execute_result
%% Cell type:code id:ac99b839 tags:
```
python
# 导入 umap 和 matplotlib
import
umap
import
matplotlib.pyplot
as
plt
import
numpy
as
np
import
mplcursors
14438
# 创建 UMAP 转换器
reducer
=
umap
.
UMAP
()
%% Cell type:code id:ac99b839 tags:
# 将数据降维到二维
embedding
=
reducer
.
fit_transform
(
embedded_docs
)
```
%% Cell type:code id:084d21af tags:
```
python
# 绘制二维数据点
scatter
=
plt
.
scatter
(
embedding
[:,
0
],
embedding
[:,
1
],
s
=
10
)
# 添加交互式标签
cursor
=
mplcursors
.
cursor
(
scatter
,
hover
=
True
)
cursor
.
connect
(
"add"
,
lambda
sel
:
sel
.
annotation
.
set_text
(
request_docs
[
sel
.
index
]))
plt
.
gca
().
set_aspect
(
'equal'
,
'datalim'
)
plt
.
title
(
'UMAP Projection'
,
fontsize
=
24
)
# 显示图形
plt
.
show
()
```
%% Cell type:code id:61d5bbbe tags:
```
python
```
...
...
notebooks/retrieve-with-hf-local.ipynb
View file @
0176e840
...
...
@@ -3,11 +3,11 @@
```
python
## 准备部分
# 指定Embed对象
embed_object
=
"labors-law"
# 指定用户问题
query_text
=
"
对于劳动合同的解除
有什么规定?"
query_text
=
"
关于工资福利,
有什么规定?"
```
%% Cell type:code id:4b9d9fa0-1c73-4cef-abc4-397458215159 tags:
```
python
...
...
@@ -110,8 +110,61 @@
del
model
import
gc
gc
.
collect
()
```
%%
%% Output: execute_result
%%
Cell type:code id:4b9a1869 tags:
28863
```
python
# 导入QA模型
from
transformers
import
AutoModelForCausalLM
,
AutoTokenizer
model_name
=
"Qwen/Qwen2.5-1.5B-Instruct"
model
=
AutoModelForCausalLM
.
from_pretrained
(
model_name
,
torch_dtype
=
"auto"
,
device_map
=
"auto"
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name
)
```
%% Cell type:code id:902860e9 tags:
```
python
# 准备提示词
prompt
=
query_text
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You are Qwen, created by Alibaba Cloud. You are a helpful assistant."
},
{
"role"
:
"system"
,
"content"
:
"在回答的过程中,必须根据以下提示回答:
\n
"
+
"
\n
- "
.
join
(
results
[
"documents"
][
0
])},
{
"role"
:
"user"
,
"content"
:
prompt
}
]
text
=
tokenizer
.
apply_chat_template
(
messages
,
tokenize
=
False
,
add_generation_prompt
=
True
)
model_inputs
=
tokenizer
([
text
],
return_tensors
=
"pt"
).
to
(
model
.
device
)
```
%% Cell type:code id:f4aa0f73 tags:
```
python
# 生成回答
generated_ids
=
model
.
generate
(
**
model_inputs
,
max_new_tokens
=
512
)
generated_ids
=
[
output_ids
[
len
(
input_ids
):]
for
input_ids
,
output_ids
in
zip
(
model_inputs
.
input_ids
,
generated_ids
)
]
response
=
tokenizer
.
batch_decode
(
generated_ids
,
skip_special_tokens
=
True
)[
0
]
print
(
response
)
```
%% Cell type:code id:f2baa5f5 tags:
```
python
```
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment