Skip to content

Commit

Permalink
refactor(ggus): 重构 ggus 库
Browse files Browse the repository at this point in the history
Signed-off-by: YdrMaster <ydrml@hotmail.com>
  • Loading branch information
YdrMaster committed Aug 15, 2024
1 parent 0d506b9 commit 27c325b
Show file tree
Hide file tree
Showing 22 changed files with 887 additions and 1,115 deletions.
73 changes: 71 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

79 changes: 59 additions & 20 deletions ggus/src/file.rs
Original file line number Diff line number Diff line change
@@ -1,67 +1,106 @@
use crate::{pad, GGufFileHeader, GGufMetaKVPairs, GGufReadError, GGufTensors};
use crate::{
pad, GGufFileHeader, GGufMetaKV, GGufReadError, GGufReader, GGufTensorMeta, DEFAULT_ALIGNMENT,
GENERAL_ALIGNMENT,
};
use indexmap::IndexMap;
use std::{error::Error, fmt};

#[derive(Clone)]
pub struct GGuf<'a> {
pub header: GGufFileHeader,
pub meta_kvs: GGufMetaKVPairs<'a>,
pub tensors: GGufTensors<'a>,
pub alignment: usize,
pub meta_kvs: IndexMap<&'a str, GGufMetaKV<'a>>,
pub tensors: IndexMap<&'a str, GGufTensorMeta<'a>>,
pub data: &'a [u8],
}

#[derive(Debug)]
pub enum GGufError {
Reading(GGufReadError),
MagicMismatch,
EndianNotSupport,
VersionNotSupport,
Reading(GGufReadError),
DuplicateMetaKey(String),
DuplicateTensorName(String),
}

impl fmt::Display for GGufError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Reading(e) => write!(f, "reading error: {e:?}"),
Self::MagicMismatch => f.write_str("magic mismatch"),
Self::EndianNotSupport => f.write_str("endian not support"),
Self::VersionNotSupport => f.write_str("version not support"),
Self::Reading(e) => write!(f, "reading error: {e:?}"),
Self::DuplicateMetaKey(key) => write!(f, "duplicate meta key: {key}"),
Self::DuplicateTensorName(name) => write!(f, "duplicate tensor name: {name}"),
}
}
}

impl Error for GGufError {}

impl<'a> GGuf<'a> {
pub fn scan(data: &'a [u8]) -> Result<Self, GGufError> {
let header = unsafe { data.as_ptr().cast::<GGufFileHeader>().read() };
pub fn new(data: &'a [u8]) -> Result<Self, GGufError> {
use GGufError::*;

let mut reader = GGufReader::new(data);

let header = reader.read_header().map_err(Reading)?;
if !header.is_magic_correct() {
return Err(GGufError::MagicMismatch);
return Err(MagicMismatch);
}
if !header.is_native_endian() {
return Err(GGufError::EndianNotSupport);
return Err(EndianNotSupport);
}
if header.version != 3 {
return Err(GGufError::VersionNotSupport);
return Err(VersionNotSupport);
}

let cursor = header.nbytes();
let meta_kvs = GGufMetaKVPairs::scan(header.metadata_kv_count, &data[cursor..])
.map_err(GGufError::Reading)?;
let mut alignment = DEFAULT_ALIGNMENT;
let mut meta_kvs = IndexMap::with_capacity(header.metadata_kv_count as _);
for _ in 0..header.metadata_kv_count {
let kv = reader.read_meta_kv().map_err(Reading)?;
let k = kv.key();
if k == GENERAL_ALIGNMENT {
alignment = kv.value_reader().read::<u32>().map_err(Reading)? as _;
}
if meta_kvs.insert(k, kv).is_some() {
return Err(DuplicateMetaKey(k.into()));
}
}

let cursor = cursor + meta_kvs.nbytes();
let tensors =
GGufTensors::scan(header.tensor_count, &data[cursor..]).map_err(GGufError::Reading)?;
let mut data_len = 0;
let mut tensors = IndexMap::with_capacity(header.tensor_count as _);
for _ in 0..header.tensor_count {
let tensor = reader.read_tensor_meta().map_err(Reading)?;
let name = tensor.name();
let info = tensor.to_info();
let end = info.offset() + info.nbytes();
if end > data_len {
data_len = end;
}
if tensors.insert(name, tensor).is_some() {
return Err(DuplicateTensorName(name.into()));
}
}

let cursor = cursor + tensors.nbytes();
let cursor = data.len() - reader.remaining().len();
let padding = if tensors.is_empty() {
0
} else {
pad(cursor, meta_kvs.alignment())
pad(cursor, alignment)
};
reader.skip::<u8>(padding).map_err(Reading)?;
let data = reader.remaining();
if data.len() != data_len {
return Err(Reading(GGufReadError::Eos));
}

Ok(Self {
header,
alignment,
meta_kvs,
tensors,
data: &data[cursor + padding..],
data,
})
}
}
19 changes: 12 additions & 7 deletions ggus/src/header.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::str::Utf8Error;
use crate::{GGufReadError, GGufReader};
use std::str::{from_utf8, Utf8Error};

#[derive(Clone, Default, Debug)]
#[repr(C)]
Expand All @@ -11,6 +12,15 @@ pub struct GGufFileHeader {

const MAGIC: [u8; 4] = *b"GGUF";

impl GGufReader<'_> {
#[inline]
pub fn read_header(&mut self) -> Result<GGufFileHeader, GGufReadError> {
let ptr = self.remaining().as_ptr().cast::<GGufFileHeader>();
self.skip::<GGufFileHeader>(1)?;
Ok(unsafe { ptr.read() })
}
}

impl GGufFileHeader {
#[inline]
pub const fn new(version: u32, tensor_count: u64, metadata_kv_count: u64) -> Self {
Expand Down Expand Up @@ -39,11 +49,6 @@ impl GGufFileHeader {

#[inline]
pub const fn magic(&self) -> Result<&str, Utf8Error> {
std::str::from_utf8(&self.magic)
}

#[inline]
pub const fn nbytes(&self) -> usize {
size_of::<Self>()
from_utf8(&self.magic)
}
}
11 changes: 6 additions & 5 deletions ggus/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#![doc = include_str!("../README.md")]
#![deny(warnings)]
// #![deny(warnings)]

mod file;
mod header;
Expand All @@ -12,13 +12,14 @@ mod write;
pub use file::{GGuf, GGufError};
pub use header::GGufFileHeader;
pub use metadata::{
utok, GGufArray, GGufFileType, GGufMetaDataValueType, GGufMetaKV, GGufMetaKVPairs,
GGufTokenType, DEFAULT_ALIGNMENT, GENERAL_ALIGNMENT,
GGufFileType, GGufMetaDataValueType, GGufMetaKV, DEFAULT_ALIGNMENT, GENERAL_ALIGNMENT,
};
pub use name::GGufFileName;
pub use read::{GGufReadError, GGufReader};
pub use tensor::{GGmlType, GGufTensorInfo, GGufTensors};
pub use write::{GGufMetaWriter, GGufSimulator, GGufTensorWriter};
pub use tensor::{GGmlType, GGufTensorInfo, GGufTensorMeta};
pub use write::{
DataFuture, GGufFileSimulator, GGufFileWriter, GGufTensorSimulator, GGufTensorWriter,
};

#[inline(always)]
const fn pad(pos: usize, align: usize) -> usize {
Expand Down
77 changes: 0 additions & 77 deletions ggus/src/metadata/general.rs

This file was deleted.

Loading

0 comments on commit 27c325b

Please sign in to comment.