davda54 commited on
Commit
4620a93
1 Parent(s): beeda4c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -3
README.md CHANGED
@@ -141,13 +141,14 @@ llm = Llama.from_pretrained(
141
  filename="*Q4_K_M.gguf", # suffix of the filename containing the level of quantization.
142
  n_ctx=32768, # The max sequence length to use - note that longer sequence lengths require much more resources
143
  n_threads=8, # The number of CPU threads to use, tailor to your system and the resulting performance
144
- n_gpu_layers=16 # The number of layers to offload to GPU, if you have GPU acceleration available
145
  )
146
 
147
  # Simple inference example
148
  output = llm(
149
  """<s><|im_start|> user
150
- Hva kan jeg bruke einstape til?<|im_end|><|im_start|> assitant
 
151
  """, # Prompt
152
  max_tokens=512, # Generate up to 512 tokens
153
  stop=["<|im_end|>"], # Example stop token
@@ -161,7 +162,7 @@ llm.create_chat_completion(
161
  messages = [
162
  {
163
  "role": "user",
164
- "content": Hva kan jeg bruke einstape til?"
165
  }
166
  ]
167
  )
 
141
  filename="*Q4_K_M.gguf", # suffix of the filename containing the level of quantization.
142
  n_ctx=32768, # The max sequence length to use - note that longer sequence lengths require much more resources
143
  n_threads=8, # The number of CPU threads to use, tailor to your system and the resulting performance
144
+ n_gpu_layers=35 # The number of layers to offload to GPU, if you have GPU acceleration available
145
  )
146
 
147
  # Simple inference example
148
  output = llm(
149
  """<s><|im_start|> user
150
+ Hva kan jeg bruke einstape til?<|im_end|>
151
+ <|im_start|> assistant
152
  """, # Prompt
153
  max_tokens=512, # Generate up to 512 tokens
154
  stop=["<|im_end|>"], # Example stop token
 
162
  messages = [
163
  {
164
  "role": "user",
165
+ "content": "Hva kan jeg bruke einstape til?"
166
  }
167
  ]
168
  )